From 6c95e5a13c5d38260737ed80c87bef1f8cb9e2db Mon Sep 17 00:00:00 2001 From: Danny Deschenes <54910444+ddeschenes-1@users.noreply.github.com> Date: Fri, 20 Dec 2024 21:24:21 -0500 Subject: [PATCH 1/3] Optionally add header CRC16 to gzip header. --- .../gzip/GzipCompressorInputStream.java | 1 + .../gzip/GzipCompressorOutputStream.java | 24 ++++++- .../compressors/gzip/GzipParameters.java | 24 +++++++ .../gzip/GzipCompressorOutputStreamTest.java | 70 +++++++++++++++++++ 4 files changed, 117 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java index 7f5d3cd09d4..f95727c744f 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java @@ -266,6 +266,7 @@ private boolean init(final boolean isFirstMember) throws IOException { // doesn't support this field, but zlib seems to be able to at least // skip over it. if ((flg & FHCRC) != 0) { + parameters.setHeaderCRC(true); inData.readShort(); } diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java index b78a67813da..b7863bdd5a2 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java @@ -47,6 +47,9 @@ public class GzipCompressorOutputStream extends CompressorOutputStream>> 8 & 0xff); out.write(extra); + crc.update(extra.length & 0xff); + crc.update(extra.length >>> 8 & 0xff); + crc.update(extra); } writeC(fileName, parameters.getFileNameCharset()); writeC(comment, parameters.getFileNameCharset()); + if (parameters.hasHeaderCRC()) { + int v = (int) crc.getValue() & 0xffff; + out.write(v & 0xff); + out.write((v >>> 8) & 0xff); + } + crc.reset(); } private void writeTrailer() throws IOException { diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java index be42e466c7c..f241745fd22 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java @@ -298,6 +298,7 @@ public int type() { private OS operatingSystem = OS.UNKNOWN; // Unknown OS by default private int bufferSize = 512; private int deflateStrategy = Deflater.DEFAULT_STRATEGY; + private boolean headerCRC; /** * Gets size of the buffer used to retrieve compressed data. @@ -432,6 +433,16 @@ public OS getOS() { return operatingSystem; } + /** + * Returns if the header CRC is to be added (when writing) or was present (when reading). + * + * @return true is header CRC will be added (on write) or was found (after read). + * @since 1.28.0 + */ + public boolean hasHeaderCRC() { + return headerCRC; + } + private String requireNonNulByte(final String text) { if (StringUtils.isNotEmpty(text) && ArrayUtils.contains(text.getBytes(fileNameCharset), (byte) 0)) { throw new IllegalArgumentException("String encoded in Charset '" + fileNameCharset + "' contains the nul byte 0 which is not supported in gzip."); @@ -489,6 +500,7 @@ public void setDeflateStrategy(final int deflateStrategy) { this.deflateStrategy = deflateStrategy; } + /** * Sets the extra subfields. Note that a non-null extra will appear in the gzip header regardless of the presence of subfields, while a null extra will not * appear at all. @@ -539,6 +551,17 @@ public void setFileNameCharset(final Charset charset) { this.fileNameCharset = Charsets.toCharset(charset, GzipUtils.GZIP_ENCODING); } + /** + * Establishes the presence of the header flag FLG.FHCRC and its headers CRC16 value. + * + * @param headerCRC when true, the header CRC16 (actually low 16 buts of a CRC32) is calculated and inserted + * in the gzip header on write; on read it means the field was present. + * @since 1.28.0 + */ + public void setHeaderCRC(boolean headerCRC) { + this.headerCRC = headerCRC; + } + /** * Sets the modification time (MTIME) of the compressed file. * @@ -599,6 +622,7 @@ public void setOS(final OS os) { this.operatingSystem = os != null ? os : OS.UNKNOWN; } + @Override public String toString() { final StringBuilder builder = new StringBuilder(); diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java index 1fd9bced02d..4db102d36bf 100644 --- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java @@ -20,6 +20,7 @@ package org.apache.commons.compress.compressors.gzip; import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertSame; @@ -28,6 +29,8 @@ import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assumptions.assumeTrue; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.Charset; @@ -36,8 +39,13 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.zip.GZIPInputStream; +import java.util.zip.ZipException; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.compress.compressors.gzip.ExtraField.SubField; +import org.apache.commons.compress.compressors.gzip.GzipParameters.OS; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -214,4 +222,66 @@ public void testFileNameChinesePercentEncoded() throws IOException { testFileName("??????.xml", EXPECTED_FILE_NAME); } + + /** + * Tests the gzip header CRC. + * + * @throws IOException When the test has issues with the underlying file system or unexpected gzip operations. + */ + @Test + public void testHcrc() throws IOException, DecoderException { + final GzipParameters parameters = new GzipParameters(); + parameters.setHeaderCRC(true); + parameters.setModificationTime(0x66554433); // avoid changing time + parameters.setFileName("AAAA"); + parameters.setComment("ZZZZ"); + parameters.setOS(OS.UNIX); + final ExtraField extra = new ExtraField(); + extra.addSubField("BB", "CCCC".getBytes(StandardCharsets.ISO_8859_1)); + parameters.setExtraField(extra); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (GzipCompressorOutputStream gos = new GzipCompressorOutputStream(baos, parameters)) { + // nothing to write for this test. + } + final byte[] result = baos.toByteArray(); + final byte[] expected = Hex.decodeHex("1f8b" // id1 id2 + + "08" //cm + + "1e" //flg(FEXTRA|FNAME|FCOMMENT|FHCRC) + + "33445566" // mtime little endian + + "00" + "03" // xfl os + + "0800" + "4242" + "0400" + "43434343" //xlen sfid sflen "CCCC" + + "4141414100" // "AAAA" with \0 + + "5a5a5a5a00" // "ZZZZ" with \0 + + "d842" //crc32 = 839242d8 + + "0300" // empty deflate stream + + "00000000" // crs32 + + "00000000" // isize + ); + assertArrayEquals(expected, result); + assertDoesNotThrow(() -> { + try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) { + //if it does not fail, the hcrc is good. + } + }); + try (GzipCompressorInputStream gis = new GzipCompressorInputStream(new ByteArrayInputStream(result))) { + final GzipParameters metaData = gis.getMetaData(); + assertTrue(metaData.hasHeaderCRC()); + assertEquals(0x66554433, metaData.getModificationTime()); + assertEquals(1, metaData.getExtraField().size()); + final SubField sf = metaData.getExtraField().iterator().next(); + assertEquals("BB", sf.getId()); + assertEquals("CCCC", new String(sf.getPayload(), StandardCharsets.ISO_8859_1)); + assertEquals("AAAA", metaData.getFileName()); + assertEquals("ZZZZ", metaData.getComment()); + assertEquals(OS.UNIX, metaData.getOS()); + } + //verify that the constructor normally fails on bad HCRC + assertThrows(ZipException.class, () -> { + result[30] = 0x77; //corrupt the low byte of header CRC + try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) { + //if it does not fail, the hcrc is good. + } + }, "Header CRC verification is no longer feasible with JDK classes. The earlier assertion would have passed despite a bad header CRC."); + } + } From 1fb5d72351523ff4f6347bdaabf7734926a29c89 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Sun, 22 Dec 2024 15:01:36 -0500 Subject: [PATCH 2/3] Fix Checktyle errors --- .../compress/compressors/gzip/GzipCompressorOutputStream.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java index b7863bdd5a2..0865477607b 100644 --- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java @@ -164,7 +164,7 @@ public void write(final int b) throws IOException { */ private void writeC(final String value, final Charset charset) throws IOException { if (value != null) { - byte[] ba = value.getBytes(charset); + final byte[] ba = value.getBytes(charset); out.write(ba); out.write(0); crc.update(ba); @@ -209,7 +209,7 @@ private void writeHeader(final GzipParameters parameters) throws IOException { writeC(fileName, parameters.getFileNameCharset()); writeC(comment, parameters.getFileNameCharset()); if (parameters.hasHeaderCRC()) { - int v = (int) crc.getValue() & 0xffff; + final int v = (int) crc.getValue() & 0xffff; out.write(v & 0xff); out.write((v >>> 8) & 0xff); } From 3c92256e18f164ad054577688d82d699717ffb09 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Sun, 22 Dec 2024 15:07:04 -0500 Subject: [PATCH 3/3] Normalize whitespace --- .../gzip/GzipCompressorOutputStreamTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java index 4db102d36bf..08b08f4bec5 100644 --- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java @@ -245,8 +245,8 @@ public void testHcrc() throws IOException, DecoderException { } final byte[] result = baos.toByteArray(); final byte[] expected = Hex.decodeHex("1f8b" // id1 id2 - + "08" //cm - + "1e" //flg(FEXTRA|FNAME|FCOMMENT|FHCRC) + + "08" // cm + + "1e" // flg(FEXTRA|FNAME|FCOMMENT|FHCRC) + "33445566" // mtime little endian + "00" + "03" // xfl os + "0800" + "4242" + "0400" + "43434343" //xlen sfid sflen "CCCC" @@ -260,7 +260,7 @@ public void testHcrc() throws IOException, DecoderException { assertArrayEquals(expected, result); assertDoesNotThrow(() -> { try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) { - //if it does not fail, the hcrc is good. + // if it does not fail, the hcrc is good. } }); try (GzipCompressorInputStream gis = new GzipCompressorInputStream(new ByteArrayInputStream(result))) { @@ -268,18 +268,18 @@ public void testHcrc() throws IOException, DecoderException { assertTrue(metaData.hasHeaderCRC()); assertEquals(0x66554433, metaData.getModificationTime()); assertEquals(1, metaData.getExtraField().size()); - final SubField sf = metaData.getExtraField().iterator().next(); + final SubField sf = metaData.getExtraField().iterator().next(); assertEquals("BB", sf.getId()); assertEquals("CCCC", new String(sf.getPayload(), StandardCharsets.ISO_8859_1)); assertEquals("AAAA", metaData.getFileName()); assertEquals("ZZZZ", metaData.getComment()); assertEquals(OS.UNIX, metaData.getOS()); } - //verify that the constructor normally fails on bad HCRC + // verify that the constructor normally fails on bad HCRC assertThrows(ZipException.class, () -> { result[30] = 0x77; //corrupt the low byte of header CRC try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) { - //if it does not fail, the hcrc is good. + // if it does not fail, the hcrc is good. } }, "Header CRC verification is no longer feasible with JDK classes. The earlier assertion would have passed despite a bad header CRC."); }