Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Doc/library/gzip.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ The module defines the following items:
Opening :class:`GzipFile` for writing without specifying the *mode*
argument is deprecated.

.. versionchanged:: 3.11
A UserWarning is now emitted when gzip files contain trailing garbage
instead of throwing a BadGzipFile error.

.. function:: compress(data, compresslevel=9, *, mtime=None)

Expand Down Expand Up @@ -202,6 +205,9 @@ The module defines the following items:
.. versionchanged:: 3.11
Speed is improved by decompressing members at once in memory instead of in
a streamed fashion.
.. versionchanged:: 3.11
A UserWarning is now emitted when gzip data contains trailing garbage
instead of throwing a BadGzipFile error.

.. _gzip-usage-examples:

Expand Down
20 changes: 14 additions & 6 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import zlib
import builtins
import io
import warnings
import _compression

__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]
Expand Down Expand Up @@ -409,18 +410,22 @@ def _read_exact(fp, n):
return data


def _read_gzip_header(fp):
def _read_gzip_header(fp, first_member=True):
'''Read a gzip header from `fp` and progress to the end of the header.

Returns last mtime if header was present or None otherwise.
Returns last mtime if header was present or None otherwise. Raises an
error if the magic does not match the gzip magic, unless first_member is
False, in which case it only warns about trailing garbage.
'''
magic = fp.read(2)
if magic == b'':
return None

if magic != b'\037\213':
raise BadGzipFile('Not a gzipped file (%r)' % magic)

if first_member:
raise BadGzipFile('Not a gzipped file (%r)' % magic)
warnings.warn("Trailing garbage in gzip data ignored.")
return None
(method, flag, last_mtime) = struct.unpack("<BBIxx", _read_exact(fp, 8))
if method != 8:
raise BadGzipFile('Unknown compression method')
Expand Down Expand Up @@ -453,13 +458,15 @@ def __init__(self, fp):
# Set flag indicating start of a new member
self._new_member = True
self._last_mtime = None
self._decompressed_members = 0

def _init_read(self):
self._crc = zlib.crc32(b"")
self._stream_size = 0 # Decompressed size of unconcatenated stream

def _read_gzip_header(self):
last_mtime = _read_gzip_header(self._fp)
last_mtime = _read_gzip_header(self._fp,
not self._decompressed_members)
if last_mtime is None:
return False
self._last_mtime = last_mtime
Expand Down Expand Up @@ -529,6 +536,7 @@ def _read_eof(self):
hex(self._crc)))
elif isize != (self._stream_size & 0xffffffff):
raise BadGzipFile("Incorrect length of data produced")
self._decompressed_members += 1

# Gzip files can be padded with zeroes and still have archives.
# Consume all zero bytes and set the file position to the first
Expand Down Expand Up @@ -590,7 +598,7 @@ def decompress(data):
decompressed_members = []
while True:
fp = io.BytesIO(data)
if _read_gzip_header(fp) is None:
if _read_gzip_header(fp, not decompressed_members) is None:
return b"".join(decompressed_members)
# Use a zlib raw deflate compressor
do = zlib.decompressobj(wbits=-zlib.MAX_WBITS)
Expand Down
11 changes: 11 additions & 0 deletions Lib/test/test_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,17 @@ def test_decompress_missing_trailer(self):
compressed_data = gzip.compress(data1)
self.assertRaises(EOFError, gzip.decompress, compressed_data[:-8])

def test_trailing_garbage_decompress(self):
compressed_data = gzip.compress(data1) + b"garbage"
self.assertWarns(UserWarning, gzip.decompress, compressed_data )

def test_trailing_garbage_gzipfile(self):
compressed_data = gzip.compress(data1) + b"garbage"
fileobj = io.BytesIO(compressed_data)
with gzip.GzipFile(fileobj=fileobj, mode="rb") as g:
with self.assertWarns(UserWarning):
g.read()

def test_read_truncated(self):
data = data1*50
# Drop the CRC (4 bytes) and file size (4 bytes).
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A UserWarning is now emitted when gzip files contain trailing garbage
instead of throwing a BadGzipFile error.