From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Thu, 30 Apr 2026 21:18:52 +0000 (+0200) Subject: [PATCH] [3.10] gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name... X-Git-Tag: archive/raspbian/3.9.2-1+rpi1+deb11u7^2~1 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=bee7219b040dd56d529cbedaf9a115a76dfc4b0f;p=python3.9.git [PATCH] [3.10] gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling (#145816) gh-141707: Skip TarInfo DIRTYPE normalization during GNU long name handling (cherry picked from commit 42d754e34c06e57ad6b8e7f92f32af679912d8ab) Co-authored-by: Seth Michael Larson Co-authored-by: Eashwar Ranganathan Origin: backport, https://github.com/python/cpython/commit/72dde1016493c52abe857fc4a7bf6c40138b4114 Gbp-Pq: Name CVE-2025-13462.patch --- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index cb2da23..8839147 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1048,6 +1048,20 @@ class TarInfo(object): @classmethod def frombuf(cls, buf, encoding, errors): """Construct a TarInfo object from a 512 byte bytes object. + + To support the old v7 tar format AREGTYPE headers are + transformed to DIRTYPE headers if their name ends in '/'. + """ + return cls._frombuf(buf, encoding, errors) + + @classmethod + def _frombuf(cls, buf, encoding, errors, *, dircheck=True): + """Construct a TarInfo object from a 512 byte bytes object. + + If ``dircheck`` is set to ``True`` then ``AREGTYPE`` headers will + be normalized to ``DIRTYPE`` if the name ends in a trailing slash. + ``dircheck`` must be set to ``False`` if this function is called + on a follow-up header such as ``GNUTYPE_LONGNAME``. """ if len(buf) == 0: raise EmptyHeaderError("empty header") @@ -1078,7 +1092,7 @@ class TarInfo(object): # Old V7 tar format represents a directory as a regular # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): + if dircheck and obj.type == AREGTYPE and obj.name.endswith("/"): obj.type = DIRTYPE # The old GNU sparse format occupies some of the unused @@ -1113,8 +1127,15 @@ class TarInfo(object): """Return the next TarInfo object from TarFile object tarfile. """ + return cls._fromtarfile(tarfile) + + @classmethod + def _fromtarfile(cls, tarfile, *, dircheck=True): + """ + See dircheck documentation in _frombuf(). + """ buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) + obj = cls._frombuf(buf, tarfile.encoding, tarfile.errors, dircheck=dircheck) obj.offset = tarfile.fileobj.tell() - BLOCKSIZE return obj._proc_member(tarfile) @@ -1167,7 +1188,7 @@ class TarInfo(object): # Fetch the next header and process it. try: - next = self.fromtarfile(tarfile) + next = self._fromtarfile(tarfile, dircheck=False) except HeaderError: raise SubsequentHeaderError("missing or bad subsequent header") @@ -1297,7 +1318,7 @@ class TarInfo(object): # Fetch the next header. try: - next = self.fromtarfile(tarfile) + next = self._fromtarfile(tarfile, dircheck=False) except HeaderError: raise SubsequentHeaderError("missing or bad subsequent header") diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 0c19258..0ddb2cc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -974,10 +974,30 @@ class LongnameTest: self.assertEqual(tarinfo.type, self.longnametype) + def test_longname_file_not_directory(self): + # Test reading a longname file and ensure it is not handled as a directory + # Issue #141707 + buf = io.BytesIO() + with tarfile.open(mode='w', fileobj=buf, format=self.format) as tar: + ti = tarfile.TarInfo() + ti.type = tarfile.AREGTYPE + ti.name = ('a' * 99) + '/' + ('b' * 3) + tar.addfile(ti) + + expected = {t.name: t.type for t in tar.getmembers()} + + buf.seek(0) + with tarfile.open(mode='r', fileobj=buf) as tar: + actual = {t.name: t.type for t in tar.getmembers()} + + self.assertEqual(expected, actual) + + class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase): subdir = "gnu" longnametype = tarfile.GNUTYPE_LONGNAME + format = tarfile.GNU_FORMAT # Since 3.2 tarfile is supposed to accurately restore sparse members and # produce files with holes. This is what we actually want to test here. @@ -1037,6 +1057,7 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase): subdir = "pax" longnametype = tarfile.XHDTYPE + format = tarfile.PAX_FORMAT def test_pax_global_headers(self): tar = tarfile.open(tarname, encoding="iso8859-1") diff --git a/Misc/ACKS b/Misc/ACKS index 73d35c2..5073981 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1389,6 +1389,7 @@ Dhushyanth Ramasamy Ashwin Ramaswami Jeff Ramnani Bayard Randel +Eashwar Ranganathan Varpu Rantala Brodie Rao Rémi Rampin diff --git a/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst new file mode 100644 index 0000000..1f5b8ed --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-18-06-35-53.gh-issue-141707.DBmQIy.rst @@ -0,0 +1,2 @@ +Don't change :class:`tarfile.TarInfo` type from ``AREGTYPE`` to ``DIRTYPE`` when parsing +GNU long name or link headers.