Skip to content

Commit 0478bd8

Browse files
miss-islingtonencukougpshead
authored
[3.13] gh-149486: tarfile.data_filter: validate written link target (GH-149487) (GH-149555)
* gh-149486: tarfile.data_filter: validate written link target (GH-149487) The data filter rewrote linknames with normpath() but ran the containment check against the un-normalised value, and computed a symlink's directory before stripping trailing slashes. Both let a crafted archive create links pointing outside the destination. Also reject link members that resolve to the destination directory itself, which could otherwise replace it with a symlink and redirect all subsequent members. (Patch by Greg; Petr's just reviewing & merging.) (cherry picked from commit 5784119) Co-authored-by: Petr Viktorin <encukou@gmail.com> Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 6ab65b3 commit 0478bd8

3 files changed

Lines changed: 133 additions & 39 deletions

File tree

Lib/tarfile.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -819,16 +819,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
819819
if member.islnk() or member.issym():
820820
if os.path.isabs(member.linkname):
821821
raise AbsoluteLinkError(member)
822+
# A link member that resolves to the destination directory itself
823+
# would replace it with a (sym)link, redirecting the destination
824+
# for all subsequent members.
825+
if target_path == dest_path:
826+
raise OutsideDestinationError(member, target_path)
822827
normalized = os.path.normpath(member.linkname)
823828
if normalized != member.linkname:
824829
new_attrs['linkname'] = normalized
825830
if member.issym():
826-
target_path = os.path.join(dest_path,
827-
os.path.dirname(name),
828-
member.linkname)
831+
# The symlink is created at `name` with trailing separators
832+
# stripped, so its target is relative to the directory
833+
# containing that path.
834+
link_dir = os.path.dirname(name.rstrip('/' + os.sep))
835+
target_path = os.path.join(dest_path, link_dir, normalized)
829836
else:
830-
target_path = os.path.join(dest_path,
831-
member.linkname)
837+
target_path = os.path.join(dest_path, normalized)
832838
target_path = os.path.realpath(target_path,
833839
strict=os.path.ALLOW_MISSING)
834840
if os.path.commonpath([target_path, dest_path]) != dest_path:

Lib/test/test_tarfile.py

Lines changed: 117 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3649,6 +3649,39 @@ class TestExtractionFilters(unittest.TestCase):
36493649
# The destination for the extraction, within `outerdir`
36503650
destdir = outerdir / 'dest'
36513651

3652+
@classmethod
3653+
def setUpClass(cls):
3654+
# Posix and Windows have different pathname resolution:
3655+
# either symlink or a '..' component resolve first.
3656+
# Let's see which we are on.
3657+
if os_helper.can_symlink():
3658+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3659+
os.mkdir(testpath)
3660+
3661+
# testpath/current links to `.` which is all of:
3662+
# - `testpath`
3663+
# - `testpath/current`
3664+
# - `testpath/current/current`
3665+
# - etc.
3666+
os.symlink('.', os.path.join(testpath, 'current'))
3667+
3668+
# we'll test where `testpath/current/../file` ends up
3669+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3670+
pass
3671+
3672+
if os.path.exists(os.path.join(testpath, 'file')):
3673+
# Windows collapses 'current\..' to '.' first, leaving
3674+
# 'testpath\file'
3675+
cls.dotdot_resolves_early = True
3676+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3677+
# Posix resolves 'current' to '.' first, leaving
3678+
# 'testpath/../file'
3679+
cls.dotdot_resolves_early = False
3680+
else:
3681+
raise AssertionError('Could not determine link resolution')
3682+
else:
3683+
cls.dotdot_resolves_early = False
3684+
36523685
@contextmanager
36533686
def check_context(self, tar, filter, *, check_flag=True):
36543687
"""Extracts `tar` to `self.destdir` and allows checking the result
@@ -3820,10 +3853,19 @@ def test_parent_symlink(self):
38203853
+ "which is outside the destination")
38213854

38223855
with self.check_context(arc.open(), 'data'):
3823-
self.expect_exception(
3824-
tarfile.LinkOutsideDestinationError,
3825-
"""'parent' would link to ['"].*outerdir['"], """
3826-
+ "which is outside the destination")
3856+
if self.dotdot_resolves_early:
3857+
# 'current/../..' normalises to '..', which is rejected.
3858+
self.expect_exception(
3859+
tarfile.LinkOutsideDestinationError,
3860+
"""'parent' would link to ['"].*outerdir['"], """
3861+
+ "which is outside the destination")
3862+
else:
3863+
# 'current/..' normalises to '.'; the rewritten link is
3864+
# created and 'parent/evil' lands harmlessly inside the
3865+
# destination.
3866+
self.expect_file('current', symlink_to='.')
3867+
self.expect_file('parent', symlink_to='.')
3868+
self.expect_file('evil')
38273869

38283870
else:
38293871
# No symlink support. The symlinks are ignored.
@@ -3913,35 +3955,6 @@ def test_parent_symlink2(self):
39133955
# Test interplaying symlinks
39143956
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
39153957

3916-
# Posix and Windows have different pathname resolution:
3917-
# either symlink or a '..' component resolve first.
3918-
# Let's see which we are on.
3919-
if os_helper.can_symlink():
3920-
testpath = os.path.join(TEMPDIR, 'resolution_test')
3921-
os.mkdir(testpath)
3922-
3923-
# testpath/current links to `.` which is all of:
3924-
# - `testpath`
3925-
# - `testpath/current`
3926-
# - `testpath/current/current`
3927-
# - etc.
3928-
os.symlink('.', os.path.join(testpath, 'current'))
3929-
3930-
# we'll test where `testpath/current/../file` ends up
3931-
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3932-
pass
3933-
3934-
if os.path.exists(os.path.join(testpath, 'file')):
3935-
# Windows collapses 'current\..' to '.' first, leaving
3936-
# 'testpath\file'
3937-
dotdot_resolves_early = True
3938-
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3939-
# Posix resolves 'current' to '.' first, leaving
3940-
# 'testpath/../file'
3941-
dotdot_resolves_early = False
3942-
else:
3943-
raise AssertionError('Could not determine link resolution')
3944-
39453958
with ArchiveMaker() as arc:
39463959

39473960
# `current` links to `.` which is both the destination directory
@@ -3977,7 +3990,7 @@ def test_parent_symlink2(self):
39773990

39783991
with self.check_context(arc.open(), 'data'):
39793992
if os_helper.can_symlink():
3980-
if dotdot_resolves_early:
3993+
if self.dotdot_resolves_early:
39813994
# Fail when extracting a file outside destination
39823995
self.expect_exception(
39833996
tarfile.OutsideDestinationError,
@@ -4097,6 +4110,76 @@ def test_sly_relative2(self):
40974110
+ """['"].*moo['"], which is outside the """
40984111
+ "destination")
40994112

4113+
@symlink_test
4114+
@os_helper.skip_unless_symlink
4115+
def test_normpath_realpath_mismatch(self):
4116+
# The link-target check must validate the value that will actually
4117+
# be written to disk (the normalised linkname), not the original.
4118+
# Here 'a' is a symlink to a deep nonexistent path, so realpath()
4119+
# of 'a/../../...' stays inside the destination while normpath()
4120+
# collapses 'a/..' lexically and escapes.
4121+
depth = len(self.destdir.parts) + 5
4122+
deep = '/'.join(f'p{i}' for i in range(depth))
4123+
sneaky = 'a/' + '../' * depth + 'flag'
4124+
for kind in 'symlink_to', 'hardlink_to':
4125+
with self.subTest(kind):
4126+
with ArchiveMaker() as arc:
4127+
arc.add('a', symlink_to=deep)
4128+
arc.add('escape', **{kind: sneaky})
4129+
with self.check_context(arc.open(), 'data'):
4130+
self.expect_exception(
4131+
tarfile.LinkOutsideDestinationError)
4132+
4133+
@symlink_test
4134+
@os_helper.skip_unless_symlink
4135+
def test_symlink_trailing_slash(self):
4136+
# A trailing slash on a symlink member's name must not cause the
4137+
# link target to be resolved relative to the wrong directory.
4138+
with ArchiveMaker() as arc:
4139+
t = tarfile.TarInfo('x/')
4140+
t.type = tarfile.SYMTYPE
4141+
t.linkname = '..'
4142+
arc.tar_w.addfile(t)
4143+
arc.add('x/escaped', content='hi')
4144+
4145+
with self.check_context(arc.open(), 'data'):
4146+
self.expect_exception(tarfile.LinkOutsideDestinationError)
4147+
4148+
@symlink_test
4149+
@os_helper.skip_unless_symlink
4150+
def test_link_at_destination(self):
4151+
# A link member whose name resolves to the destination directory
4152+
# itself must be rejected: otherwise the destination is replaced
4153+
# by a symlink and later members can be redirected through it.
4154+
for name in '', '.', './':
4155+
with ArchiveMaker() as arc:
4156+
t = tarfile.TarInfo(name)
4157+
t.type = tarfile.SYMTYPE
4158+
t.linkname = '.'
4159+
arc.tar_w.addfile(t)
4160+
4161+
with self.check_context(arc.open(), 'data'):
4162+
self.expect_exception(tarfile.OutsideDestinationError)
4163+
4164+
@symlink_test
4165+
@os_helper.skip_unless_symlink
4166+
def test_empty_name_symlink_chain(self):
4167+
# Regression test for a chain of empty-named symlinks that
4168+
# incrementally redirects the destination outwards.
4169+
with ArchiveMaker() as arc:
4170+
for name, target in [('', ''), ('a/', '..'),
4171+
('', 'dummy'), ('', 'a'),
4172+
('b/', '..'),
4173+
('', 'dummy'), ('', 'a/b')]:
4174+
t = tarfile.TarInfo(name)
4175+
t.type = tarfile.SYMTYPE
4176+
t.linkname = target
4177+
arc.tar_w.addfile(t)
4178+
arc.add('escaped', content='hi')
4179+
4180+
with self.check_context(arc.open(), 'data'):
4181+
self.expect_exception(tarfile.FilterError)
4182+
41004183
@symlink_test
41014184
def test_deep_symlink(self):
41024185
# Test that symlinks and hardlinks inside a directory
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:func:`tarfile.data_filter` now validates link targets using the same
2+
normalised value that is written to disk, strips trailing separators from
3+
the member name when resolving a symlink's directory, and rejects link
4+
members that would replace the destination directory itself. This closes
5+
several path-traversal bypasses of the ``data`` extraction filter.

0 commit comments

Comments
 (0)