diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 574a6bb279ddf0..4aec4ea14548dd 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -916,6 +916,22 @@ def __iter__(self): if not line: break yield line + + def __enter__(self): + #INFO: This was used in Python3.6, but we aren't based off io.BufferedReader in Python2 + # self._check() + return self + + def __exit__(self, type, value, traceback): + if type is None: + self.close() + else: + # An exception occurred. We must not call close() because + # it would try to write end-of-archive blocks and padding. + if not self._extfileobj: + self.fileobj.close() + + #class ExFileObject #------------------ @@ -1946,14 +1962,16 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None): tarinfo.devminor = os.minor(statres.st_rdev) return tarinfo - def list(self, verbose=True): + def list(self, verbose=True, members=None): """Print a table of contents to sys.stdout. If `verbose' is False, only the names of the members are printed. If it is True, an `ls -l'-like output is produced. """ self._check() - for tarinfo in self: + if members is None: + members = self.getmembers() + for tarinfo in members: if verbose: print filemode(tarinfo.mode), print "%s/%s" % (tarinfo.uname or tarinfo.uid, diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index b7ff47f783e72e..17e760eecfc1a1 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,5 +1,6 @@ import sys import os +import io import shutil import StringIO from binascii import unhexlify @@ -12,6 +13,7 @@ from test import test_support from test import test_support as support +from test import symlink_support # Check for our compression modules. try: @@ -27,11 +29,14 @@ def md5sum(data): return md5(data).hexdigest() -TEMPDIR = os.path.abspath(test_support.TESTFN) -tarname = test_support.findfile("testtar.tar") +TEMPDIR = os.path.abspath(support.TESTFN) + "-tardir" +tarextdir = TEMPDIR + '-extract-test' +tarname = support.findfile("testtar.tar") gzipname = os.path.join(TEMPDIR, "testtar.tar.gz") bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2") +xzname = os.path.join(TEMPDIR, "testtar.tar.xz") tmpname = os.path.join(TEMPDIR, "tmp.tar") +dotlessname = os.path.join(TEMPDIR, "testtar") md5_regtype = "65f477c818ad9e15f7feab0c6d37742f" md5_sparse = "a54fbc4ca4f4399a90e1b27164012fc6" @@ -135,6 +140,18 @@ def test_fileobj_seek(self): "read() after readline() failed") fobj.close() + def test_fileobj_text(self): + with self.tar.extractfile("ustar/regtype") as fobj: + # fobj = io.TextIOWrapper(fobj) + data = fobj.read().encode("iso8859-1") + self.assertEqual(md5sum(data), md5_regtype) + try: + fobj.seek(100) + except AttributeError: + # Issue #13815: seek() complained about a missing + # flush() method. + self.fail("seeking failed in text mode") + # Test if symbolic and hard links are resolved by extractfile(). The # test link members each point to a regular member whose data is # supposed to be exported. @@ -220,6 +237,17 @@ def test_list_verbose(self): self.assertIn('pax' + ('/123' * 125) + '/longlink link to pax' + ('/123' * 125) + '/longname', out) + def test_list_members(self): + tio = io.BufferedRandom(io.BytesIO()) + def members(tar): + for tarinfo in tar.getmembers(): + if 'reg' in tarinfo.name: + yield tarinfo + with support.swap_attr(sys, 'stdout', tio): + self.tar.list(verbose=False, members=members(self.tar)) + out = tio.detach().getvalue() + self.assertIn(b'ustar/regtype', out) + self.assertNotIn(b'ustar/conttype', out) class GzipListTest(ListTest): tarname = gzipname @@ -252,6 +280,12 @@ def test_empty_tarfile(self): finally: tar.close() + def test_non_existent_tarfile(self): + # Test for issue11513: prevent non-existent gzipped tarfiles raising + # multiple exceptions. + with self.assertRaisesRegex(FileNotFoundError, "xxx"): + tarfile.open("xxx", self.mode) + def test_null_tarfile(self): # Test for issue6123: Allow opening empty archives. # This test guarantees that tarfile.open() does not treat an empty @@ -440,6 +474,9 @@ def test_find_members(self): self.assertTrue(self.tar.getmembers()[-1].name == "misc/eof", "could not find all members") + @unittest.skipUnless(hasattr(os, "link"), + "Missing hardlink implementation") + @symlink_support.skip_unless_symlink def test_extract_hardlink(self): # Test hardlink extraction (e.g. bug #857297). with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar: @@ -474,6 +511,43 @@ def test_extractall(self): finally: tar.close() + def test_extract_directory(self): + dirtype = "ustar/dirtype" + DIR = os.path.join(TEMPDIR, "extractdir") + os.mkdir(DIR) + try: + with tarfile.open(tarname, encoding="iso8859-1") as tar: + tarinfo = tar.getmember(dirtype) + tar.extract(tarinfo, path=DIR) + extracted = os.path.join(DIR, dirtype) + self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + if sys.platform != "win32": + self.assertEqual(os.stat(extracted).st_mode & 0o777, 0o755) + finally: + support.rmtree(DIR) + + #INFO: Pathlib doesn't exist on Python2 + # def test_extractall_pathlike_name(self): + # DIR = pathlib.Path(TEMPDIR) / "extractall" + # with support.temp_dir(DIR), \ + # tarfile.open(tarname, encoding="iso8859-1") as tar: + # directories = [t for t in tar if t.isdir()] + # tar.extractall(DIR, directories) + # for tarinfo in directories: + # path = DIR / tarinfo.name + # self.assertEqual(os.path.getmtime(path), tarinfo.mtime) + + #INFO: Pathlib doesn't exist on Python2 + # def test_extract_pathlike_name(self): + # dirtype = "ustar/dirtype" + # DIR = pathlib.Path(TEMPDIR) / "extractall" + # with support.temp_dir(DIR), \ + # tarfile.open(tarname, encoding="iso8859-1") as tar: + # tarinfo = tar.getmember(dirtype) + # tar.extract(tarinfo, path=DIR) + # extracted = DIR / dirtype + # self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime) + def test_init_close_fobj(self): # Issue #7341: Close the internal file object in the TarFile # constructor in case of an error. For the test we rely on @@ -544,8 +618,18 @@ def test_compare_members(self): tar1.close() -class DetectReadTest(unittest.TestCase): +class TarTest: + tarname = tarname + suffix = '' + open = io.FileIO + taropen = tarfile.TarFile.taropen + + @property + def mode(self): + return self.prefix + self.suffix + +class DetectReadTest(TarTest, unittest.TestCase): def _testfunc_file(self, name, mode): try: tar = tarfile.open(name, mode) @@ -673,6 +757,10 @@ def test_find_sparse(self): tarinfo = self.tar.getmember("ustar/sparse") self._test_member(tarinfo, size=86016, chksum=md5_sparse) + def test_find_gnusparse(self): + tarinfo = self.tar.getmember("gnu/sparse") + self._test_member(tarinfo, size=86016, chksum=md5_sparse) + def test_find_umlauts(self): tarinfo = self.tar.getmember("ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf") self._test_member(tarinfo, size=7011, chksum=md5_regtype) @@ -803,6 +891,16 @@ def test_fileobj_no_close(self): self.assertFalse(fobj.closed) self.assertEqual(data, fobj.getvalue()) + def test_eof_marker(self): + # Make sure an end of archive marker is written (two zero blocks). + # tarfile insists on aligning archives to a 20 * 512 byte recordsize. + # So, we create an archive that has exactly 10240 bytes without the + # marker, and has 20480 bytes once the marker is written. + with tarfile.open(tmpname, self.mode) as tar: + t = tarfile.TarInfo("foo") + t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE + tar.addfile(t, io.BytesIO(b"a" * t.size)) + class WriteTest(WriteTestBase): @@ -872,6 +970,20 @@ def test_directory_size(self): finally: os.rmdir(path) + + #INFO: We don't have pathlib on Python2, not sure if we can really test this + # def test_gettarinfo_pathlike_name(self): + # with tarfile.open(tmpname, self.mode) as tar: + # path = pathlib.Path(TEMPDIR) / "file" + # with open(path, "wb") as fobj: + # fobj.write(b"aaa") + # tarinfo = tar.gettarinfo(path) + # tarinfo2 = tar.gettarinfo(os.fspath(path)) + # self.assertIsInstance(tarinfo.name, str) + # self.assertEqual(tarinfo.name, tarinfo2.name) + # self.assertEqual(tarinfo.size, 3) + + @unittest.skipUnless(hasattr(os, "link"),"Missing hardlink implementation") def test_link_size(self): if hasattr(os, "link"): link = os.path.join(TEMPDIR, "link") @@ -892,6 +1004,7 @@ def test_link_size(self): os.remove(target) os.remove(link) + @symlink_support.skip_unless_symlink def test_symlink_size(self): if hasattr(os, "symlink"): path = os.path.join(TEMPDIR, "symlink") @@ -971,6 +1084,11 @@ def filter(tarinfo): finally: tar.close() + #FIX: Not sure how to test this on Python2 ATM + # # Verify that filter is a keyword-only argument + # with self.assertRaises(TypeError): + # tar.add(tempdir, "empty_dir", True, None, filter) + tar = tarfile.open(tmpname, "r") try: for tarinfo in tar: @@ -1014,6 +1132,36 @@ def _test_pathname(self, path, cmp_path=None, dir=False): self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) + + @symlink_support.skip_unless_symlink + def test_extractall_symlinks(self): + # Test if extractall works properly when tarfile contains symlinks + tempdir = os.path.join(TEMPDIR, "testsymlinks") + temparchive = os.path.join(TEMPDIR, "testsymlinks.tar") + os.mkdir(tempdir) + try: + source_file = os.path.join(tempdir,'source') + target_file = os.path.join(tempdir,'symlink') + with open(source_file,'w') as f: + f.write('something\n') + os.symlink(source_file, target_file) + tar = tarfile.open(temparchive,'w') + tar.add(source_file) + tar.add(target_file) + tar.close() + # Let's extract it to the location which contains the symlink + tar = tarfile.open(temparchive,'r') + # this should not raise OSError: [Errno 17] File exists + try: + tar.extractall(path=tempdir) + except OSError: + self.fail("extractall failed with symlinked files") + finally: + tar.close() + finally: + support.unlink(temparchive) + support.rmtree(tempdir) + def test_pathnames(self): self._test_pathname("foo") self._test_pathname(os.path.join("foo", ".", "bar")) @@ -1294,6 +1442,105 @@ def test_longnamelink_1025(self): ("longlnk/" * 127) + "longlink_") +class CreateTest(WriteTestBase, unittest.TestCase): + + prefix = "x:" + + file_path = os.path.join(TEMPDIR, "spameggs42") + + def setUp(self): + support.unlink(tmpname) + + @classmethod + def setUpClass(cls): + with open(cls.file_path, "wb") as fobj: + fobj.write(b"aaa") + + @classmethod + def tearDownClass(cls): + support.unlink(cls.file_path) + + def test_create(self): + with tarfile.open(tmpname, self.mode) as tobj: + tobj.add(self.file_path) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_existing(self): + with tarfile.open(tmpname, self.mode) as tobj: + tobj.add(self.file_path) + + with self.assertRaises(FileExistsError): + tobj = tarfile.open(tmpname, self.mode) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_taropen(self): + with self.taropen(tmpname, "x") as tobj: + tobj.add(self.file_path) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_existing_taropen(self): + with self.taropen(tmpname, "x") as tobj: + tobj.add(self.file_path) + + with self.assertRaises(FileExistsError): + with self.taropen(tmpname, "x"): + pass + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn("spameggs42", names[0]) + + def test_create_pathlike_name(self): + with tarfile.open(pathlib.Path(tmpname), self.mode) as tobj: + self.assertIsInstance(tobj.name, str) + self.assertEqual(tobj.name, os.path.abspath(tmpname)) + tobj.add(pathlib.Path(self.file_path)) + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + def test_create_taropen_pathlike_name(self): + with self.taropen(pathlib.Path(tmpname), "x") as tobj: + self.assertIsInstance(tobj.name, str) + self.assertEqual(tobj.name, os.path.abspath(tmpname)) + tobj.add(pathlib.Path(self.file_path)) + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + with self.taropen(tmpname) as tobj: + names = tobj.getnames() + self.assertEqual(len(names), 1) + self.assertIn('spameggs42', names[0]) + + +class CreateWithXModeTest(CreateTest): + + prefix = "x" + + test_create_taropen = None + test_create_existing_taropen = None + + +@unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation") class HardlinkTest(unittest.TestCase): # Test the creation of LNKTYPE (hardlink) members in an archive. @@ -1536,6 +1783,30 @@ def test_error_handler_utf8(self): errors="utf-8") self.assertEqual(tar.getnames()[0], "\xe4\xf6\xfc/" + u"\u20ac".encode("utf8")) + # FIX: Came in on Python3.6 + # Test the same as above for the 100 bytes link field. + def test_unicode_link1(self): + self._test_ustar_link("0123456789" * 10) + self._test_ustar_link("0123456789" * 10 + "0", ValueError) + # Use a two byte UTF-8 character + self._test_ustar_link("0123456789" * 9 + "01234567\303\251") + self._test_ustar_link("0123456789" * 9 + "012345678\303\251", ValueError) + + def _test_ustar_link(self, name, exc=None): + with tarfile.open(tmpname, "w", format=0, encoding="utf-8") as tar: + t = tarfile.TarInfo("foo") + t.linkname = name + if exc is None: + tar.addfile(t) + else: + self.assertRaises(exc, tar.addfile, t) + + if exc is None: + with tarfile.open(tmpname, "r", encoding="utf-8") as tar: + for t in tar: + self.assertEqual(name, t.linkname) + break + class AppendTest(unittest.TestCase): # Test append mode (cp. patch #1652681). @@ -1686,7 +1957,27 @@ def test_pax_limits(self): class MiscTest(unittest.TestCase): + # Came in on Python3.6 + def test_char_fields(self): + self.assertEqual(tarfile.stn("foo", 8), + b"foo\0\0\0\0\0") + self.assertEqual(tarfile.stn("foobar", 3), + b"foo") + self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0"), + "foo") + self.assertEqual(tarfile.nts(b"foo\0bar\0"), + "foo") + def test_read_number_fields(self): + # Issue 13158: Test if GNU tar specific base-256 number fields + # are decoded correctly. + self.assertEqual(tarfile.nti(b"0000001\x00"), 1) + self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"), + 0o10000000) + self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"), + 0xffffffff) + # Issue 24514: Test if empty number fields are converted to zero. self.assertEqual(tarfile.nti("\0"), 0) self.assertEqual(tarfile.nti(" \0"), 0) @@ -1760,15 +2051,24 @@ def _test_link_extraction(self, name): data = open(os.path.join(TEMPDIR, name), "rb").read() self.assertEqual(md5sum(data), md5_regtype) + # See issues #1578269, #8879, and #17689 for some history on these skips + @unittest.skipIf(hasattr(os.path, "islink"), + "Skip emulation - has os.path.islink but not os.link") def test_hardlink_extraction1(self): self._test_link_extraction("ustar/lnktype") + @unittest.skipIf(hasattr(os.path, "islink"), + "Skip emulation - has os.path.islink but not os.link") def test_hardlink_extraction2(self): self._test_link_extraction("./ustar/linktest2/lnktype") + @unittest.skipIf(hasattr(os, "symlink"), + "Skip emulation if symlink exists") def test_symlink_extraction1(self): self._test_link_extraction("ustar/symtype") + @unittest.skipIf(hasattr(os, "symlink"), + "Skip emulation if symlink exists") def test_symlink_extraction2(self): self._test_link_extraction("./ustar/linktest2/symtype") @@ -1835,6 +2135,10 @@ def test_partial_input_bz2(self): def test_main(): + #NOTE: + # The tests are assuming a default system locale with ISO-8859-1, but that's not normal anymore + tarfile.ENCODING = "ISO-8859-1" + support.unlink(TEMPDIR) os.makedirs(TEMPDIR) @@ -1908,5 +2212,10 @@ def test_main(): if os.path.exists(TEMPDIR): shutil.rmtree(TEMPDIR) +#NOTE: Reset tarfile default encoding again after tests are done +tarfile.ENCODING = sys.getfilesystemencoding() +if tarfile.ENCODING is None: + tarfile.ENCODING = sys.getdefaultencoding() + if __name__ == "__main__": test_main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 2a14f3323bc3f4..5aa5e88cc4ebc3 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -873,6 +873,16 @@ testall: @DEF_MAKE_RULE@ platform -$(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS) $(TESTPYTHON) $(TESTPROG) -uall $(TESTOPTS) + +TESTOPTSA= $(TESTOPTS) -vvv +TESTPROGA= $(srcdir)/Lib/test/test_shutil.py +TESTPYTHONA= $(RUNSHARED) ./$(BUILDPYTHON) -Wd -3 -E -tt $(TESTPYTHONOPTS) +testA: + -find $(srcdir)/Lib -name '*.py[co]' -print | xargs rm -f + -$(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA) + $(TESTPYTHONA) $(TESTPROGA) $(TESTOPTSA) + + # Run the unitests for both architectures in a Universal build on OSX # Must be run on an Intel box. testuniversal: @DEF_MAKE_RULE@ platform diff --git a/Misc/NEWS.d/2.7.18.11.rst b/Misc/NEWS.d/2.7.18.11.rst new file mode 100644 index 00000000000000..f046354fb64fc1 --- /dev/null +++ b/Misc/NEWS.d/2.7.18.11.rst @@ -0,0 +1,16 @@ +.. bpo: ? +.. date: 2025-01-20 +.. nonce: +.. release date: 2025-01-22 +.. section: Core and Builtins + +CVE-2007-4559 + +Implement parts of PEP 706 Filter for tarfile.extractall + +ExFileObject now acts as a context manager. +The list method of TarFile now has the "members" parameter + +Various tests were added to check for proper behaviour with SymLinks + +Python2 doesn't have pathlib, so those tests are disabled