From 53576c4fc75d60af86d9f7b3a2cf8462c422e0f8 Mon Sep 17 00:00:00 2001 From: Frederick Price Date: Wed, 1 May 2024 19:17:58 -0400 Subject: [PATCH 1/4] CVE-2007-4559 Tried to copy back only the new bits from Python3.6 Symlinks --- Lib/test/test_tarfile.py | 318 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 316 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index b7ff47f783e72e..f75d46ed3144a0 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -27,11 +27,14 @@ def md5sum(data): return md5(data).hexdigest() -TEMPDIR = os.path.abspath(test_support.TESTFN) -tarname = test_support.findfile("testtar.tar") +TEMPDIR = os.path.abspath(support.TESTFN) + "-tardir" +tarextdir = TEMPDIR + '-extract-test' +tarname = support.findfile("testtar.tar") gzipname = os.path.join(TEMPDIR, "testtar.tar.gz") bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2") +xzname = os.path.join(TEMPDIR, "testtar.tar.xz") tmpname = os.path.join(TEMPDIR, "tmp.tar") +dotlessname = os.path.join(TEMPDIR, "testtar") md5_regtype = "65f477c818ad9e15f7feab0c6d37742f" md5_sparse = "a54fbc4ca4f4399a90e1b27164012fc6" @@ -135,6 +138,18 @@ def test_fileobj_seek(self): "read() after readline() failed") fobj.close() + def test_fileobj_text(self): + with self.tar.extractfile("ustar/regtype") as fobj: + fobj = io.TextIOWrapper(fobj) + data = fobj.read().encode("iso8859-1") + self.assertEqual(md5sum(data), md5_regtype) + try: + fobj.seek(100) + except AttributeError: + # Issue #13815: seek() complained about a missing + # flush() method. + self.fail("seeking failed in text mode") + # Test if symbolic and hard links are resolved by extractfile(). The # test link members each point to a regular member whose data is # supposed to be exported. @@ -158,6 +173,15 @@ def test_fileobj_symlink2(self): def test_issue14160(self): self._test_fileobj_link("symtype2", "ustar/regtype") +class GzipUstarReadTest(GzipTest, UstarReadTest): + pass + +class Bz2UstarReadTest(Bz2Test, UstarReadTest): + pass + +class LzmaUstarReadTest(LzmaTest, UstarReadTest): + pass + class ListTest(ReadTest, unittest.TestCase): @@ -220,6 +244,17 @@ def test_list_verbose(self): self.assertIn('pax' + ('/123' * 125) + '/longlink link to pax' + ('/123' * 125) + '/longname', out) + def test_list_members(self): + tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + def members(tar): + for tarinfo in tar.getmembers(): + if 'reg' in tarinfo.name: + yield tarinfo + with support.swap_attr(sys, 'stdout', tio): + self.tar.list(verbose=False, members=members(self.tar)) + out = tio.detach().getvalue() + self.assertIn(b'ustar/regtype', out) + self.assertNotIn(b'ustar/conttype', out) class GzipListTest(ListTest): tarname = gzipname @@ -252,6 +287,12 @@ def test_empty_tarfile(self): finally: tar.close() + def test_non_existent_tarfile(self): + # Test for issue11513: prevent non-existent gzipped tarfiles raising + # multiple exceptions. + with self.assertRaisesRegex(FileNotFoundError, "xxx"): + tarfile.open("xxx", self.mode) + def test_null_tarfile(self): # Test for issue6123: Allow opening empty archives. # This test guarantees that tarfile.open() does not treat an empty @@ -440,6 +481,9 @@ def test_find_members(self): self.assertTrue(self.tar.getmembers()[-1].name == "misc/eof", "could not find all members") + @unittest.skipUnless(hasattr(os, "link"), + "Missing hardlink implementation") + @support.skip_unless_symlink def test_extract_hardlink(self): # Test hardlink extraction (e.g. bug #857297). with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: @@ -474,6 +518,41 @@ def test_extractall(self): finally: tar.close() + def test_extract_directory(self): + dirtype = "ustar/dirtype" + DIR = os.path.join(TEMPDIR, "extractdir") + os.mkdir(DIR) + try: + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tarinfo = tar.getmember(dirtype) + tar.extract(tarinfo, path=DIR) + extracted = os.path.join(DIR, dirtype) + self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + if sys.platform != "win32": + self.assertEqual(os.stat(extracted).st_mode & 0o777, 0o755) + finally: + support.rmtree(DIR) + + def test_extractall_pathlike_name(self): + DIR = pathlib.Path(TEMPDIR) / "extractall" + with support.temp_dir(DIR), \ + tarfile.open(tarname, encoding="iso8859-1") as tar: + directories = [t for t in tar if t.isdir()] + tar.extractall(DIR, directories) + for tarinfo in directories: + path = DIR / tarinfo.name + self.assertEqual(os.path.getmtime(path), tarinfo.mtime) + + def test_extract_pathlike_name(self): + dirtype = "ustar/dirtype" + DIR = pathlib.Path(TEMPDIR) / "extractall" + with support.temp_dir(DIR), \ + tarfile.open(tarname, encoding="iso8859-1") as tar: + tarinfo = tar.getmember(dirtype) + tar.extract(tarinfo, path=DIR) + extracted = DIR / dirtype + self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + def test_init_close_fobj(self): # Issue #7341: Close the internal file object in the TarFile # constructor in case of an error. For the test we rely on @@ -501,6 +580,8 @@ def test_parallel_iteration(self): self.assertEqual(m1.offset, m2.offset) self.assertEqual(m1.name, m2.name) +class MiscReadTest(MiscReadTestBase, unittest.TestCase): + test_fail_comp = None class StreamReadTest(CommonReadTest): @@ -543,9 +624,16 @@ def test_compare_members(self): finally: tar1.close() +class GzipStreamReadTest(GzipTest, StreamReadTest): + pass class DetectReadTest(unittest.TestCase): +class LzmaStreamReadTest(LzmaTest, StreamReadTest): + pass + + +class DetectReadTest(TarTest, unittest.TestCase): def _testfunc_file(self, name, mode): try: tar = tarfile.open(name, mode) @@ -614,6 +702,8 @@ def test_detect_stream_bz2(self): self._testfunc_file(tmpname, "r|*") +class LzmaDetectReadTest(LzmaTest, DetectReadTest): + pass class MemberReadTest(ReadTest): @@ -673,6 +763,22 @@ def test_find_sparse(self): tarinfo = self.tar.getmember("ustar/sparse") self._test_member(tarinfo, size=86016, chksum=md5_sparse) + def test_find_gnusparse(self): + tarinfo = self.tar.getmember("gnu/sparse") + self._test_member(tarinfo, size=86016, chksum=md5_sparse) + + def test_find_gnusparse_00(self): + tarinfo = self.tar.getmember("gnu/sparse-0.0") + self._test_member(tarinfo, size=86016, chksum=md5_sparse) + + def test_find_gnusparse_01(self): + tarinfo = self.tar.getmember("gnu/sparse-0.1") + self._test_member(tarinfo, size=86016, chksum=md5_sparse) + + def test_find_gnusparse_10(self): + tarinfo = self.tar.getmember("gnu/sparse-1.0") + self._test_member(tarinfo, size=86016, chksum=md5_sparse) + def test_find_umlauts(self): tarinfo = self.tar.getmember("ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf") self._test_member(tarinfo, size=7011, chksum=md5_regtype) @@ -803,6 +909,15 @@ def test_fileobj_no_close(self): self.assertFalse(fobj.closed) self.assertEqual(data, fobj.getvalue()) + def test_eof_marker(self): + # Make sure an end of archive marker is written (two zero blocks). + # tarfile insists on aligning archives to a 20 * 512 byte recordsize. + # So, we create an archive that has exactly 10240 bytes without the + # marker, and has 20480 bytes once the marker is written. + with tarfile.open(tmpname, self.mode) as tar: + t = tarfile.TarInfo("foo") + t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE + tar.addfile(t, io.BytesIO(b"a" * t.size)) class WriteTest(WriteTestBase): @@ -872,6 +987,19 @@ def test_directory_size(self): finally: os.rmdir(path) + def test_gettarinfo_pathlike_name(self): + with tarfile.open(tmpname, self.mode) as tar: + path = pathlib.Path(TEMPDIR) / "file" + with open(path, "wb") as fobj: + fobj.write(b"aaa") + tarinfo = tar.gettarinfo(path) + tarinfo2 = tar.gettarinfo(os.fspath(path)) + self.assertIsInstance(tarinfo.name, str) + self.assertEqual(tarinfo.name, tarinfo2.name) + self.assertEqual(tarinfo.size, 3) + + @unittest.skipUnless(hasattr(os, "link"), + "Missing hardlink implementation") def test_link_size(self): if hasattr(os, "link"): link = os.path.join(TEMPDIR, "link") @@ -892,6 +1020,7 @@ def test_link_size(self): os.remove(target) os.remove(link) + @support.skip_unless_symlink def test_symlink_size(self): if hasattr(os, "symlink"): path = os.path.join(TEMPDIR, "symlink") @@ -971,6 +1100,10 @@ def filter(tarinfo): finally: tar.close() + # Verify that filter is a keyword-only argument + with self.assertRaises(TypeError): + tar.add(tempdir, "empty_dir", True, None, filter) + tar = tarfile.open(tmpname, "r") try: for tarinfo in tar: @@ -1014,6 +1147,36 @@ def _test_pathname(self, path, cmp_path=None, dir=False): self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) + + @support.skip_unless_symlink + def test_extractall_symlinks(self): + # Test if extractall works properly when tarfile contains symlinks + tempdir = os.path.join(TEMPDIR, "testsymlinks") + temparchive = os.path.join(TEMPDIR, "testsymlinks.tar") + os.mkdir(tempdir) + try: + source_file = os.path.join(tempdir,'source') + target_file = os.path.join(tempdir,'symlink') + with open(source_file,'w') as f: + f.write('something\n') + os.symlink(source_file, target_file) + tar = tarfile.open(temparchive,'w') + tar.add(source_file) + tar.add(target_file) + tar.close() + # Let's extract it to the location which contains the symlink + tar = tarfile.open(temparchive,'r') + # this should not raise OSError: [Errno 17] File exists + try: + tar.extractall(path=tempdir) + except OSError: + self.fail("extractall failed with symlinked files") + finally: + tar.close() + finally: + support.unlink(temparchive) + support.rmtree(tempdir) + def test_pathnames(self): self._test_pathname("foo") self._test_pathname(os.path.join("foo", ".", "bar")) @@ -1294,6 +1457,117 @@ def test_longnamelink_1025(self): ("longlnk/" * 127) + "longlink_") +class CreateTest(WriteTestBase, unittest.TestCase): + + prefix = "x:" + + file_path = os.path.join(TEMPDIR, "spameggs42") + + def setUp(self): + support.unlink(tmpname) + + @classmethod + def setUpClass(cls): + with open(cls.file_path, "wb") as fobj: + fobj.write(b"aaa") + + @classmethod + def tearDownClass(cls): + support.unlink(cls.file_path) + + def test_create(self): + with tarfile.open(tmpname, self.mode) as tobj: + tobj.add(self.file_path) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_existing(self): + with tarfile.open(tmpname, self.mode) as tobj: + tobj.add(self.file_path) + + with self.assertRaises(FileExistsError): + tobj = tarfile.open(tmpname, self.mode) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_taropen(self): + with self.taropen(tmpname, "x") as tobj: + tobj.add(self.file_path) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_existing_taropen(self): + with self.taropen(tmpname, "x") as tobj: + tobj.add(self.file_path) + + with self.assertRaises(FileExistsError): + with self.taropen(tmpname, "x"): + pass + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn("spameggs42", names[0]) + + def test_create_pathlike_name(self): + with tarfile.open(pathlib.Path(tmpname), self.mode) as tobj: + self.assertIsInstance(tobj.name, str) + self.assertEqual(tobj.name, os.path.abspath(tmpname)) + tobj.add(pathlib.Path(self.file_path)) + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_taropen_pathlike_name(self): + with self.taropen(pathlib.Path(tmpname), "x") as tobj: + self.assertIsInstance(tobj.name, str) + self.assertEqual(tobj.name, os.path.abspath(tmpname)) + tobj.add(pathlib.Path(self.file_path)) + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + +class GzipCreateTest(GzipTest, CreateTest): + pass + + +class Bz2CreateTest(Bz2Test, CreateTest): + pass + + +class LzmaCreateTest(LzmaTest, CreateTest): + pass + + +class CreateWithXModeTest(CreateTest): + + prefix = "x" + + test_create_taropen = None + test_create_existing_taropen = None + + +@unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation") class HardlinkTest(unittest.TestCase): # Test the creation of LNKTYPE (hardlink) members in an archive. @@ -1536,6 +1810,12 @@ def test_error_handler_utf8(self): errors="utf-8") self.assertEqual(tar.getnames()[0], "\xe4\xf6\xfc/" + u"\u20ac".encode("utf8")) + # Test the same as above for the 100 bytes link field. + def test_unicode_link1(self): + self._test_ustar_link("0123456789" * 10) + self._test_ustar_link("0123456789" * 10 + "0", ValueError) + self._test_ustar_link("0123456789" * 9 + "01234567\xff") + self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError) class AppendTest(unittest.TestCase): # Test append mode (cp. patch #1652681). @@ -1686,7 +1966,32 @@ def test_pax_limits(self): class MiscTest(unittest.TestCase): + def test_char_fields(self): + self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"), + b"foo\0\0\0\0\0") + self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"), + b"foo") + self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), + "foo") + self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), + "foo") + def test_read_number_fields(self): + # Issue 13158: Test if GNU tar specific base-256 number fields + # are decoded correctly. + self.assertEqual(tarfile.nti(b"0000001\x00"), 1) + self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), + 0o10000000) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), + 0xffffffff) + self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), + -1) + self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), + -100) + self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), + -0x100000000000000) + # Issue 24514: Test if empty number fields are converted to zero. self.assertEqual(tarfile.nti("\0"), 0) self.assertEqual(tarfile.nti(" \0"), 0) @@ -1760,15 +2065,24 @@ def _test_link_extraction(self, name): data = open(os.path.join(TEMPDIR, name), "rb").read() self.assertEqual(md5sum(data), md5_regtype) + # See issues #1578269, #8879, and #17689 for some history on these skips + @unittest.skipIf(hasattr(os.path, "islink"), + "Skip emulation - has os.path.islink but not os.link") def test_hardlink_extraction1(self): self._test_link_extraction("ustar/lnktype") + @unittest.skipIf(hasattr(os.path, "islink"), + "Skip emulation - has os.path.islink but not os.link") def test_hardlink_extraction2(self): self._test_link_extraction("./ustar/linktest2/lnktype") + @unittest.skipIf(hasattr(os, "symlink"), + "Skip emulation if symlink exists") def test_symlink_extraction1(self): self._test_link_extraction("ustar/symtype") + @unittest.skipIf(hasattr(os, "symlink"), + "Skip emulation if symlink exists") def test_symlink_extraction2(self): self._test_link_extraction("./ustar/linktest2/symtype") From 2592f3274c9f107be55dbb693817e7e0eb7543cd Mon Sep 17 00:00:00 2001 From: Frederick Price Date: Wed, 1 May 2024 19:47:04 -0400 Subject: [PATCH 2/4] CVE-2007-4559 Remove invalid tests, get others working with Python2 --- Lib/tarfile.py | 16 +++ Lib/test/test_tarfile.py | 227 +++++++++++++++++++++------------------ 2 files changed, 136 insertions(+), 107 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 574a6bb279ddf0..c2603d8651716e 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -916,6 +916,22 @@ def __iter__(self): if not line: break yield line + + def __enter__(self): + #INFO: This was used in Python3.6, but we aren't based off io.BufferedReader in Python2 + # self._check() + return self + + def __exit__(self, type, value, traceback): + if type is None: + self.close() + else: + # An exception occurred. We must not call close() because + # it would try to write end-of-archive blocks and padding. + #INFO: This was used in Python3.6, but we aren't based off io.BufferedReader in Python2 + # if not self._extfileobj: + # self.fileobj.close() + self.close() #class ExFileObject #------------------ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index f75d46ed3144a0..4f5677a4afcec4 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,5 +1,6 @@ import sys import os +import io import shutil import StringIO from binascii import unhexlify @@ -12,6 +13,7 @@ from test import test_support from test import test_support as support +from test import symlink_support # Check for our compression modules. try: @@ -78,6 +80,7 @@ def test_fileobj_readlines(self): "I will gladly admit that Python is not the fastest running scripting language.\n", "fileobj.readlines() failed") + #FIX: This now breaks with the new tarfile def test_fileobj_iter(self): self.tar.extract("ustar/regtype", TEMPDIR) tarinfo = self.tar.getmember("ustar/regtype") @@ -140,7 +143,7 @@ def test_fileobj_seek(self): def test_fileobj_text(self): with self.tar.extractfile("ustar/regtype") as fobj: - fobj = io.TextIOWrapper(fobj) + # fobj = io.TextIOWrapper(fobj) data = fobj.read().encode("iso8859-1") self.assertEqual(md5sum(data), md5_regtype) try: @@ -173,15 +176,6 @@ def test_fileobj_symlink2(self): def test_issue14160(self): self._test_fileobj_link("symtype2", "ustar/regtype") -class GzipUstarReadTest(GzipTest, UstarReadTest): - pass - -class Bz2UstarReadTest(Bz2Test, UstarReadTest): - pass - -class LzmaUstarReadTest(LzmaTest, UstarReadTest): - pass - class ListTest(ReadTest, unittest.TestCase): @@ -244,17 +238,18 @@ def test_list_verbose(self): self.assertIn('pax' + ('/123' * 125) + '/longlink link to pax' + ('/123' * 125) + '/longname', out) - def test_list_members(self): - tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') - def members(tar): - for tarinfo in tar.getmembers(): - if 'reg' in tarinfo.name: - yield tarinfo - with support.swap_attr(sys, 'stdout', tio): - self.tar.list(verbose=False, members=members(self.tar)) - out = tio.detach().getvalue() - self.assertIn(b'ustar/regtype', out) - self.assertNotIn(b'ustar/conttype', out) + #INFO: Python2 list doesn't have the "members" functionality, so can't test + # def test_list_members(self): + # tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') + # def members(tar): + # for tarinfo in tar.getmembers(): + # if 'reg' in tarinfo.name: + # yield tarinfo + # with support.swap_attr(sys, 'stdout', tio): + # self.tar.list(verbose=False, members=members(self.tar)) + # out = tio.detach().getvalue() + # self.assertIn(b'ustar/regtype', out) + # self.assertNotIn(b'ustar/conttype', out) class GzipListTest(ListTest): tarname = gzipname @@ -483,7 +478,7 @@ def test_find_members(self): @unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation") - @support.skip_unless_symlink + @symlink_support.skip_unless_symlink def test_extract_hardlink(self): # Test hardlink extraction (e.g. bug #857297). with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: @@ -533,25 +528,27 @@ def test_extract_directory(self): finally: support.rmtree(DIR) - def test_extractall_pathlike_name(self): - DIR = pathlib.Path(TEMPDIR) / "extractall" - with support.temp_dir(DIR), \ - tarfile.open(tarname, encoding="iso8859-1") as tar: - directories = [t for t in tar if t.isdir()] - tar.extractall(DIR, directories) - for tarinfo in directories: - path = DIR / tarinfo.name - self.assertEqual(os.path.getmtime(path), tarinfo.mtime) - - def test_extract_pathlike_name(self): - dirtype = "ustar/dirtype" - DIR = pathlib.Path(TEMPDIR) / "extractall" - with support.temp_dir(DIR), \ - tarfile.open(tarname, encoding="iso8859-1") as tar: - tarinfo = tar.getmember(dirtype) - tar.extract(tarinfo, path=DIR) - extracted = DIR / dirtype - self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + #INFO: Pathlib doesn't exist on Python2 + # def test_extractall_pathlike_name(self): + # DIR = pathlib.Path(TEMPDIR) / "extractall" + # with support.temp_dir(DIR), \ + # tarfile.open(tarname, encoding="iso8859-1") as tar: + # directories = [t for t in tar if t.isdir()] + # tar.extractall(DIR, directories) + # for tarinfo in directories: + # path = DIR / tarinfo.name + # self.assertEqual(os.path.getmtime(path), tarinfo.mtime) + + #INFO: Pathlib doesn't exist on Python2 + # def test_extract_pathlike_name(self): + # dirtype = "ustar/dirtype" + # DIR = pathlib.Path(TEMPDIR) / "extractall" + # with support.temp_dir(DIR), \ + # tarfile.open(tarname, encoding="iso8859-1") as tar: + # tarinfo = tar.getmember(dirtype) + # tar.extract(tarinfo, path=DIR) + # extracted = DIR / dirtype + # self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) def test_init_close_fobj(self): # Issue #7341: Close the internal file object in the TarFile @@ -580,8 +577,9 @@ def test_parallel_iteration(self): self.assertEqual(m1.offset, m2.offset) self.assertEqual(m1.name, m2.name) -class MiscReadTest(MiscReadTestBase, unittest.TestCase): - test_fail_comp = None +# FIX: Brought in from Python3.6 +# class MiscReadTest(MiscReadTestBase, unittest.TestCase): +# test_fail_comp = None class StreamReadTest(CommonReadTest): @@ -624,14 +622,23 @@ def test_compare_members(self): finally: tar1.close() -class GzipStreamReadTest(GzipTest, StreamReadTest): - pass - -class DetectReadTest(unittest.TestCase): +# FIX: Brought in from Python3.6 +# class GzipStreamReadTest(GzipTest, StreamReadTest): +# pass +# +#INFO: Python2 doesn't have LZMA +# class LzmaStreamReadTest(LzmaTest, StreamReadTest): +# pass -class LzmaStreamReadTest(LzmaTest, StreamReadTest): - pass +class TarTest: + tarname = tarname + suffix = '' + open = io.FileIO + taropen = tarfile.TarFile.taropen + @property + def mode(self): + return self.prefix + self.suffix class DetectReadTest(TarTest, unittest.TestCase): def _testfunc_file(self, name, mode): @@ -702,8 +709,10 @@ def test_detect_stream_bz2(self): self._testfunc_file(tmpname, "r|*") -class LzmaDetectReadTest(LzmaTest, DetectReadTest): - pass + +#INFO: Python2 doesn't have LZMA +# class LzmaDetectReadTest(LzmaTest, DetectReadTest): +# pass class MemberReadTest(ReadTest): @@ -767,18 +776,6 @@ def test_find_gnusparse(self): tarinfo = self.tar.getmember("gnu/sparse") self._test_member(tarinfo, size=86016, chksum=md5_sparse) - def test_find_gnusparse_00(self): - tarinfo = self.tar.getmember("gnu/sparse-0.0") - self._test_member(tarinfo, size=86016, chksum=md5_sparse) - - def test_find_gnusparse_01(self): - tarinfo = self.tar.getmember("gnu/sparse-0.1") - self._test_member(tarinfo, size=86016, chksum=md5_sparse) - - def test_find_gnusparse_10(self): - tarinfo = self.tar.getmember("gnu/sparse-1.0") - self._test_member(tarinfo, size=86016, chksum=md5_sparse) - def test_find_umlauts(self): tarinfo = self.tar.getmember("ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf") self._test_member(tarinfo, size=7011, chksum=md5_regtype) @@ -987,16 +984,17 @@ def test_directory_size(self): finally: os.rmdir(path) - def test_gettarinfo_pathlike_name(self): - with tarfile.open(tmpname, self.mode) as tar: - path = pathlib.Path(TEMPDIR) / "file" - with open(path, "wb") as fobj: - fobj.write(b"aaa") - tarinfo = tar.gettarinfo(path) - tarinfo2 = tar.gettarinfo(os.fspath(path)) - self.assertIsInstance(tarinfo.name, str) - self.assertEqual(tarinfo.name, tarinfo2.name) - self.assertEqual(tarinfo.size, 3) + #INFO: We don't have pathlib on Python2, not sure if we can really test this + # def test_gettarinfo_pathlike_name(self): + # with tarfile.open(tmpname, self.mode) as tar: + # path = pathlib.Path(TEMPDIR) / "file" + # with open(path, "wb") as fobj: + # fobj.write(b"aaa") + # tarinfo = tar.gettarinfo(path) + # tarinfo2 = tar.gettarinfo(os.fspath(path)) + # self.assertIsInstance(tarinfo.name, str) + # self.assertEqual(tarinfo.name, tarinfo2.name) + # self.assertEqual(tarinfo.size, 3) @unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation") @@ -1020,7 +1018,7 @@ def test_link_size(self): os.remove(target) os.remove(link) - @support.skip_unless_symlink + @symlink_support.skip_unless_symlink def test_symlink_size(self): if hasattr(os, "symlink"): path = os.path.join(TEMPDIR, "symlink") @@ -1100,9 +1098,10 @@ def filter(tarinfo): finally: tar.close() - # Verify that filter is a keyword-only argument - with self.assertRaises(TypeError): - tar.add(tempdir, "empty_dir", True, None, filter) + #FIX: Not sure how to test this on Python2 ATM + # # Verify that filter is a keyword-only argument + # with self.assertRaises(TypeError): + # tar.add(tempdir, "empty_dir", True, None, filter) tar = tarfile.open(tmpname, "r") try: @@ -1148,7 +1147,7 @@ def _test_pathname(self, path, cmp_path=None, dir=False): self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) - @support.skip_unless_symlink + @symlink_support.skip_unless_symlink def test_extractall_symlinks(self): # Test if extractall works properly when tarfile contains symlinks tempdir = os.path.join(TEMPDIR, "testsymlinks") @@ -1547,16 +1546,19 @@ def test_create_taropen_pathlike_name(self): self.assertIn('spameggs42', names[0]) -class GzipCreateTest(GzipTest, CreateTest): - pass +#FIX: Brought in by Python3.6 +# class GzipCreateTest(GzipTest, CreateTest): +# pass -class Bz2CreateTest(Bz2Test, CreateTest): - pass +#FIX: Brought in by Python3.6 +# class Bz2CreateTest(Bz2Test, CreateTest): +# pass -class LzmaCreateTest(LzmaTest, CreateTest): - pass +#INFO: Python2 doesn't have LZMA +# class LzmaCreateTest(LzmaTest, CreateTest): +# pass class CreateWithXModeTest(CreateTest): @@ -1810,12 +1812,13 @@ def test_error_handler_utf8(self): errors="utf-8") self.assertEqual(tar.getnames()[0], "\xe4\xf6\xfc/" + u"\u20ac".encode("utf8")) - # Test the same as above for the 100 bytes link field. - def test_unicode_link1(self): - self._test_ustar_link("0123456789" * 10) - self._test_ustar_link("0123456789" * 10 + "0", ValueError) - self._test_ustar_link("0123456789" * 9 + "01234567\xff") - self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError) + # FIX: Came in on Python3.6 + # # Test the same as above for the 100 bytes link field. + # def test_unicode_link1(self): + # self._test_ustar_link("0123456789" * 10) + # self._test_ustar_link("0123456789" * 10 + "0", ValueError) + # self._test_ustar_link("0123456789" * 9 + "01234567\xff") + # self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError) class AppendTest(unittest.TestCase): # Test append mode (cp. patch #1652681). @@ -1966,31 +1969,32 @@ def test_pax_limits(self): class MiscTest(unittest.TestCase): + # Came in on Python3.6 def test_char_fields(self): - self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"), - b"foo\0\0\0\0\0") - self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"), - b"foo") - self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"), - "foo") - self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"), - "foo") - + self.assertEqual(tarfile.stn("foo", 8), + b"foo\0\0\0\0\0") + self.assertEqual(tarfile.stn("foobar", 3), + b"foo") + self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0"), + "foo") + self.assertEqual(tarfile.nts(b"foo\0bar\0"), + "foo") + def test_read_number_fields(self): # Issue 13158: Test if GNU tar specific base-256 number fields # are decoded correctly. self.assertEqual(tarfile.nti(b"0000001\x00"), 1) self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777) self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), - 0o10000000) + 0o10000000) self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), - 0xffffffff) - self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), - -1) - self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), - -100) - self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), - -0x100000000000000) + 0xffffffff) + # self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), + # -1) + # self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), + # -100) + # self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), + # -0x100000000000000) # Issue 24514: Test if empty number fields are converted to zero. self.assertEqual(tarfile.nti("\0"), 0) @@ -2149,6 +2153,10 @@ def test_partial_input_bz2(self): def test_main(): + #NOTE: + # The tests are assuming a default system locale with ISO-8859-1, but that's not normal anymore + tarfile.ENCODING = "ISO-8859-1" + support.unlink(TEMPDIR) os.makedirs(TEMPDIR) @@ -2222,5 +2230,10 @@ def test_main(): if os.path.exists(TEMPDIR): shutil.rmtree(TEMPDIR) +#NOTE: Reset tarfile default encoding again after tests are done +tarfile.ENCODING = sys.getfilesystemencoding() +if tarfile.ENCODING is None: + tarfile.ENCODING = sys.getdefaultencoding() + if __name__ == "__main__": test_main() From 48017271d0fa1690b4a7bcc39ec31ffef3280eba Mon Sep 17 00:00:00 2001 From: Frederick Price Date: Mon, 6 Jan 2025 14:51:36 -0500 Subject: [PATCH 3/4] Add in ActiveTests so its easier to test in future --- Lib/tarfile.py | 14 +++--- Lib/test/test_tarfile.py | 96 ++++++++++++++++------------------------ Makefile.pre.in | 10 +++++ 3 files changed, 57 insertions(+), 63 deletions(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index c2603d8651716e..4aec4ea14548dd 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -928,10 +928,10 @@ def __exit__(self, type, value, traceback): else: # An exception occurred. We must not call close() because # it would try to write end-of-archive blocks and padding. - #INFO: This was used in Python3.6, but we aren't based off io.BufferedReader in Python2 - # if not self._extfileobj: - # self.fileobj.close() - self.close() + if not self._extfileobj: + self.fileobj.close() + + #class ExFileObject #------------------ @@ -1962,14 +1962,16 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None): tarinfo.devminor = os.minor(statres.st_rdev) return tarinfo - def list(self, verbose=True): + def list(self, verbose=True, members=None): """Print a table of contents to sys.stdout. If `verbose' is False, only the names of the members are printed. If it is True, an `ls -l'-like output is produced. """ self._check() - for tarinfo in self: + if members is None: + members = self.getmembers() + for tarinfo in members: if verbose: print filemode(tarinfo.mode), print "%s/%s" % (tarinfo.uname or tarinfo.uid, diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 4f5677a4afcec4..17e760eecfc1a1 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -80,7 +80,6 @@ def test_fileobj_readlines(self): "I will gladly admit that Python is not the fastest running scripting language.\n", "fileobj.readlines() failed") - #FIX: This now breaks with the new tarfile def test_fileobj_iter(self): self.tar.extract("ustar/regtype", TEMPDIR) tarinfo = self.tar.getmember("ustar/regtype") @@ -238,18 +237,17 @@ def test_list_verbose(self): self.assertIn('pax' + ('/123' * 125) + '/longlink link to pax' + ('/123' * 125) + '/longname', out) - #INFO: Python2 list doesn't have the "members" functionality, so can't test - # def test_list_members(self): - # tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n') - # def members(tar): - # for tarinfo in tar.getmembers(): - # if 'reg' in tarinfo.name: - # yield tarinfo - # with support.swap_attr(sys, 'stdout', tio): - # self.tar.list(verbose=False, members=members(self.tar)) - # out = tio.detach().getvalue() - # self.assertIn(b'ustar/regtype', out) - # self.assertNotIn(b'ustar/conttype', out) + def test_list_members(self): + tio = io.BufferedRandom(io.BytesIO()) + def members(tar): + for tarinfo in tar.getmembers(): + if 'reg' in tarinfo.name: + yield tarinfo + with support.swap_attr(sys, 'stdout', tio): + self.tar.list(verbose=False, members=members(self.tar)) + out = tio.detach().getvalue() + self.assertIn(b'ustar/regtype', out) + self.assertNotIn(b'ustar/conttype', out) class GzipListTest(ListTest): tarname = gzipname @@ -577,9 +575,6 @@ def test_parallel_iteration(self): self.assertEqual(m1.offset, m2.offset) self.assertEqual(m1.name, m2.name) -# FIX: Brought in from Python3.6 -# class MiscReadTest(MiscReadTestBase, unittest.TestCase): -# test_fail_comp = None class StreamReadTest(CommonReadTest): @@ -622,13 +617,6 @@ def test_compare_members(self): finally: tar1.close() -# FIX: Brought in from Python3.6 -# class GzipStreamReadTest(GzipTest, StreamReadTest): -# pass -# -#INFO: Python2 doesn't have LZMA -# class LzmaStreamReadTest(LzmaTest, StreamReadTest): -# pass class TarTest: tarname = tarname @@ -640,6 +628,7 @@ class TarTest: def mode(self): return self.prefix + self.suffix + class DetectReadTest(TarTest, unittest.TestCase): def _testfunc_file(self, name, mode): try: @@ -710,10 +699,6 @@ def test_detect_stream_bz2(self): self._testfunc_file(tmpname, "r|*") -#INFO: Python2 doesn't have LZMA -# class LzmaDetectReadTest(LzmaTest, DetectReadTest): -# pass - class MemberReadTest(ReadTest): def _test_member(self, tarinfo, chksum=None, **kwargs): @@ -916,6 +901,7 @@ def test_eof_marker(self): t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE tar.addfile(t, io.BytesIO(b"a" * t.size)) + class WriteTest(WriteTestBase): mode = "w:" @@ -984,6 +970,7 @@ def test_directory_size(self): finally: os.rmdir(path) + #INFO: We don't have pathlib on Python2, not sure if we can really test this # def test_gettarinfo_pathlike_name(self): # with tarfile.open(tmpname, self.mode) as tar: @@ -996,8 +983,7 @@ def test_directory_size(self): # self.assertEqual(tarinfo.name, tarinfo2.name) # self.assertEqual(tarinfo.size, 3) - @unittest.skipUnless(hasattr(os, "link"), - "Missing hardlink implementation") + @unittest.skipUnless(hasattr(os, "link"),"Missing hardlink implementation") def test_link_size(self): if hasattr(os, "link"): link = os.path.join(TEMPDIR, "link") @@ -1546,21 +1532,6 @@ def test_create_taropen_pathlike_name(self): self.assertIn('spameggs42', names[0]) -#FIX: Brought in by Python3.6 -# class GzipCreateTest(GzipTest, CreateTest): -# pass - - -#FIX: Brought in by Python3.6 -# class Bz2CreateTest(Bz2Test, CreateTest): -# pass - - -#INFO: Python2 doesn't have LZMA -# class LzmaCreateTest(LzmaTest, CreateTest): -# pass - - class CreateWithXModeTest(CreateTest): prefix = "x" @@ -1813,12 +1784,29 @@ def test_error_handler_utf8(self): self.assertEqual(tar.getnames()[0], "\xe4\xf6\xfc/" + u"\u20ac".encode("utf8")) # FIX: Came in on Python3.6 - # # Test the same as above for the 100 bytes link field. - # def test_unicode_link1(self): - # self._test_ustar_link("0123456789" * 10) - # self._test_ustar_link("0123456789" * 10 + "0", ValueError) - # self._test_ustar_link("0123456789" * 9 + "01234567\xff") - # self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError) + # Test the same as above for the 100 bytes link field. + def test_unicode_link1(self): + self._test_ustar_link("0123456789" * 10) + self._test_ustar_link("0123456789" * 10 + "0", ValueError) + # Use a two byte UTF-8 character + self._test_ustar_link("0123456789" * 9 + "01234567\303\251") + self._test_ustar_link("0123456789" * 9 + "012345678\303\251", ValueError) + + def _test_ustar_link(self, name, exc=None): + with tarfile.open(tmpname, "w", format=0, encoding="utf-8") as tar: + t = tarfile.TarInfo("foo") + t.linkname = name + if exc is None: + tar.addfile(t) + else: + self.assertRaises(exc, tar.addfile, t) + + if exc is None: + with tarfile.open(tmpname, "r", encoding="utf-8") as tar: + for t in tar: + self.assertEqual(name, t.linkname) + break + class AppendTest(unittest.TestCase): # Test append mode (cp. patch #1652681). @@ -1979,7 +1967,7 @@ def test_char_fields(self): "foo") self.assertEqual(tarfile.nts(b"foo\0bar\0"), "foo") - + def test_read_number_fields(self): # Issue 13158: Test if GNU tar specific base-256 number fields # are decoded correctly. @@ -1989,12 +1977,6 @@ def test_read_number_fields(self): 0o10000000) self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), 0xffffffff) - # self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"), - # -1) - # self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"), - # -100) - # self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"), - # -0x100000000000000) # Issue 24514: Test if empty number fields are converted to zero. self.assertEqual(tarfile.nti("\0"), 0) diff --git a/Makefile.pre.in b/Makefile.pre.in index 2a14f3323bc3f4..5aa5e88cc4ebc3 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -873,6 +873,16 @@ testall: @DEF_MAKE_RULE@ platform -$(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS) $(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS) + +TESTOPTSA= $(TESTOPTS) -vvv +TESTPROGA= $(srcdir)/Lib/test/test_shutil.py +TESTPYTHONA= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -3 -E -tt $(TESTPYTHONOPTS) +testA: + -find $(srcdir)/Lib -name '*.py[co]' -print | xargs rm -f + -$(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA) + $(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA) + + # Run the unitests for both architectures in a Universal build on OSX # Must be run on an Intel box. testuniversal: @DEF_MAKE_RULE@ platform From a7c09b5027aedda2965dde930840165b0c20d0b0 Mon Sep 17 00:00:00 2001 From: Frederick Price Date: Mon, 20 Jan 2025 18:56:02 -0500 Subject: [PATCH 4/4] CVE-2007-4559 Add NEWS entry --- Misc/NEWS.d/2.7.18.11.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/2.7.18.11.rst diff --git a/Misc/NEWS.d/2.7.18.11.rst b/Misc/NEWS.d/2.7.18.11.rst new file mode 100644 index 00000000000000..f046354fb64fc1 --- /dev/null +++ b/Misc/NEWS.d/2.7.18.11.rst @@ -0,0 +1,16 @@ +.. bpo: ? +.. date: 2025-01-20 +.. nonce: +.. release date: 2025-01-22 +.. section: Core and Builtins + +CVE-2007-4559 + +Implement parts of PEP 706 Filter for tarfile.extractall + +ExFileObject now acts as a context manager. +The list method of TarFile now has the "members" parameter + +Various tests were added to check for proper behaviour with SymLinks + +Python2 doesn't have pathlib, so those tests are disabled