diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/AbstractTarBuilder.java b/src/main/java/org/apache/commons/compress/archivers/tar/AbstractTarBuilder.java index 0f471a1ca1f..8bfd10ce53f 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/AbstractTarBuilder.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/AbstractTarBuilder.java @@ -31,6 +31,7 @@ public abstract class AbstractTarBuilder> extends AbstractArchiveBuilder { private int blockSize = TarConstants.DEFAULT_BLKSIZE; + private long maxPaxHeaderSize = TarConstants.DEFAULT_MAX_PAX_HEADER_SIZE; private int recordSize = TarConstants.DEFAULT_RCDSIZE; private boolean lenient; @@ -45,6 +46,10 @@ int getBlockSize() { return blockSize; } + long getMaxPaxHeaderSize() { + return maxPaxHeaderSize; + } + int getRecordSize() { return recordSize; } @@ -76,6 +81,27 @@ public B setLenient(final boolean lenient) { return asThis(); } + /** + * Sets the maximum size in bytes of a PAX extended header block that will + * be parsed. PAX headers larger than this limit cause a + * {@link org.apache.commons.compress.MemoryLimitException}. + * + *

The default is {@value TarConstants#DEFAULT_MAX_PAX_HEADER_SIZE} + * (10 MB), which is generous for legitimate archives. Set to + * {@link Long#MAX_VALUE} to restore the previous unlimited behavior.

+ * + * @param maxPaxHeaderSize the maximum PAX header size in bytes; must be positive. + * @return {@code this} instance. + * @throws IllegalArgumentException if {@code maxPaxHeaderSize} is not positive. + */ + public B setMaxPaxHeaderSize(final long maxPaxHeaderSize) { + if (maxPaxHeaderSize <= 0) { + throw new IllegalArgumentException("maxPaxHeaderSize must be positive"); + } + this.maxPaxHeaderSize = maxPaxHeaderSize; + return asThis(); + } + /** * Sets the record size. * diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java index f8249a354a7..fb16dcc56df 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java @@ -171,12 +171,15 @@ public static boolean matches(final byte[] signature, final int length) { private final boolean lenient; + private final long maxPaxHeaderSize; + private TarArchiveInputStream(final Builder builder) throws IOException { super(builder); this.zipEncoding = ZipEncodingHelper.getZipEncoding(builder.getCharset()); this.recordBuffer = new byte[builder.getRecordSize()]; this.blockSize = builder.getBlockSize(); this.lenient = builder.isLenient(); + this.maxPaxHeaderSize = builder.getMaxPaxHeaderSize(); } /** @@ -521,8 +524,8 @@ public TarArchiveEntry getNextEntry() throws IOException { lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry); if (lastWasSpecial) { // Handle PAX, GNU long name, or other special records - TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, getMaxEntryNameLength(), currEntry, paxHeaders, sparseHeaders, - globalPaxHeaders, globalSparseHeaders); + TarUtils.handleSpecialTarRecord(currentInputStream, zipEncoding, getMaxEntryNameLength(), maxPaxHeaderSize, currEntry, paxHeaders, + sparseHeaders, globalPaxHeaders, globalSparseHeaders); } } while (lastWasSpecial); // Apply global and local PAX headers diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java index db6cd1f3ef3..8a570ab8749 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarConstants.java @@ -41,6 +41,13 @@ public interface TarConstants { */ int DEFAULT_BLKSIZE = DEFAULT_RCDSIZE * 20; + /** + * Default maximum PAX extended header size in bytes (10 MB). + * + * @since 1.29.0 + */ + long DEFAULT_MAX_PAX_HEADER_SIZE = 10 * 1024 * 1024; + /** * GNU format as per before tar 1.12. */ diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java index 1973ac07b10..bcaa5caec07 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarFile.java @@ -177,6 +177,8 @@ public static Builder builder() { private final int maxEntryNameLength; + private final long maxPaxHeaderSize; + private TarFile(final Builder builder) throws IOException { this.archive = builder.getChannel(SeekableByteChannel.class); try { @@ -186,6 +188,7 @@ private TarFile(final Builder builder) throws IOException { this.blockSize = builder.getBlockSize(); this.lenient = builder.isLenient(); this.maxEntryNameLength = builder.getMaxEntryNameLength(); + this.maxPaxHeaderSize = builder.getMaxPaxHeaderSize(); // Populate `entries` explicitly here instead of using `forEach`/`stream`, // because both rely on `entries` internally. // Using them would cause a self-referential loop and leave `entries` empty. @@ -474,8 +477,8 @@ private TarArchiveEntry getNextTarEntry() throws IOException { lastWasSpecial = TarUtils.isSpecialTarRecord(currEntry); if (lastWasSpecial) { // Handle PAX, GNU long name, or other special records - TarUtils.handleSpecialTarRecord(currentStream, zipEncoding, maxEntryNameLength, currEntry, paxHeaders, sparseHeaders, globalPaxHeaders, - globalSparseHeaders); + TarUtils.handleSpecialTarRecord(currentStream, zipEncoding, maxEntryNameLength, maxPaxHeaderSize, currEntry, paxHeaders, sparseHeaders, + globalPaxHeaders, globalSparseHeaders); } } while (lastWasSpecial); // Apply global and local PAX headers diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java index c0ad3a113ce..b2859371e1f 100644 --- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java +++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java @@ -354,6 +354,7 @@ public static void formatUnsignedOctalString(final long value, final byte[] buff * @param input the input stream from which to read the special tar entry content. * @param encoding the encoding to use for reading names. * @param maxEntryNameLength the maximum allowed length for entry names. + * @param maxPaxHeaderSize the maximum allowed size in bytes for a PAX extended header block. * @param entry the tar entry to handle. * @param paxHeaders the map to update with PAX headers. * @param sparseHeaders the list to update with sparse headers. @@ -361,9 +362,9 @@ public static void formatUnsignedOctalString(final long value, final byte[] buff * @param globalSparseHeaders the list to update with global sparse headers. * @throws IOException if an I/O error occurs while reading the entry. */ - static void handleSpecialTarRecord(final InputStream input, final ZipEncoding encoding, final int maxEntryNameLength, final TarArchiveEntry entry, - final Map paxHeaders, final List sparseHeaders, final Map globalPaxHeaders, - final List globalSparseHeaders) throws IOException { + static void handleSpecialTarRecord(final InputStream input, final ZipEncoding encoding, final int maxEntryNameLength, final long maxPaxHeaderSize, + final TarArchiveEntry entry, final Map paxHeaders, final List sparseHeaders, + final Map globalPaxHeaders, final List globalSparseHeaders) throws IOException { if (entry.isGNULongLinkEntry()) { // GNU long link entry: read and store the link path final String longLinkName = readLongName(input, encoding, maxEntryNameLength, entry); @@ -376,12 +377,12 @@ static void handleSpecialTarRecord(final InputStream input, final ZipEncoding en // Global PAX header: clear and update global PAX and sparse headers globalSparseHeaders.clear(); globalPaxHeaders.clear(); - globalPaxHeaders.putAll(parsePaxHeaders(input, globalPaxHeaders, entry.getSize(), maxEntryNameLength, globalSparseHeaders)); + globalPaxHeaders.putAll(parsePaxHeaders(input, globalPaxHeaders, entry.getSize(), maxPaxHeaderSize, maxEntryNameLength, globalSparseHeaders)); } else if (entry.isPaxHeader()) { // PAX header: clear and update local PAX and sparse headers, parse GNU sparse headers if present sparseHeaders.clear(); paxHeaders.clear(); - paxHeaders.putAll(parsePaxHeaders(input, globalPaxHeaders, entry.getSize(), maxEntryNameLength, sparseHeaders)); + paxHeaders.putAll(parsePaxHeaders(input, globalPaxHeaders, entry.getSize(), maxPaxHeaderSize, maxEntryNameLength, sparseHeaders)); if (paxHeaders.containsKey(TarGnuSparseKeys.MAP)) { sparseHeaders.addAll(parseFromPAX01SparseHeaders(paxHeaders.get(TarGnuSparseKeys.MAP))); } @@ -674,6 +675,7 @@ static List parsePAX1XSparseHeaders(final InputStream in * @param inputStream The input stream providing PAX header data. * @param globalPaxHeaders The global PAX headers of the tar archive. * @param headerSize The total size of the PAX header block; always non-negative. + * @param maxPaxHeaderSize The maximum allowed size in bytes for a PAX extended header block. * @param maxEntryPathLength The maximum permitted length for entry paths. * @param sparseHeaders Output list to collect any GNU sparse 0.0 headers found. * @return A map of PAX headers merged with the supplied global headers. @@ -683,10 +685,10 @@ static List parsePAX1XSparseHeaders(final InputStream in * @throws IOException If an I/O error occurs while reading. */ static Map parsePaxHeaders(final InputStream inputStream, final Map globalPaxHeaders, final long headerSize, - final int maxEntryPathLength, final List sparseHeaders) throws IOException { + final long maxPaxHeaderSize, final int maxEntryPathLength, final List sparseHeaders) throws IOException { assert headerSize >= 0 : "headerSize must be non-negative"; // Check if there is enough memory to store the headers - MemoryLimitException.checkBytes(headerSize, Long.MAX_VALUE); + MemoryLimitException.checkBytes(headerSize, maxPaxHeaderSize); final Map headers = new HashMap<>(globalPaxHeaders); Long offset = null; // Format is "length keyword=value\n"; diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/PaxHeaderOomTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/PaxHeaderOomTest.java new file mode 100644 index 00000000000..f091bb56080 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/tar/PaxHeaderOomTest.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.tar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link TarConstants#DEFAULT_MAX_PAX_HEADER_SIZE} enforcement. + */ +class PaxHeaderOomTest { + + private static final int BLOCK = 512; + + private static byte[] buildTarGzWithPaxValue(final long valueSize) throws IOException { + final ByteArrayOutputStream buf = new ByteArrayOutputStream(); + try (GZIPOutputStream gz = new GZIPOutputStream(buf, 8192)) { + writeTar(gz, valueSize); + } + return buf.toByteArray(); + } + + private static void writeTar(final OutputStream out, final long valueSize) throws IOException { + final String keyword = "test.data"; + final long fixedPart = 1L + keyword.length() + 1 + 1; + long totalLen = fixedPart + valueSize; + int lenDigits = Long.toString(totalLen).length(); + totalLen = fixedPart + valueSize + lenDigits; + if (Long.toString(totalLen).length() != lenDigits) { + totalLen++; + } + final long paxContentSize = totalLen; + final byte[] paxPrefix = (totalLen + " " + keyword + "=").getBytes(StandardCharsets.UTF_8); + + out.write(tarHeader("PaxHeader/entry", paxContentSize, (byte) 'x')); + out.write(paxPrefix); + final byte[] chunk = new byte[8192]; + Arrays.fill(chunk, (byte) 'A'); + long remaining = valueSize; + while (remaining > 0) { + out.write(chunk, 0, (int) Math.min(remaining, chunk.length)); + remaining -= Math.min(remaining, chunk.length); + } + out.write('\n'); + out.write(new byte[pad(paxContentSize)]); + + final byte[] body = "hello\n".getBytes(StandardCharsets.UTF_8); + out.write(tarHeader("entry.txt", body.length, (byte) '0')); + out.write(body); + out.write(new byte[pad(body.length)]); + out.write(new byte[BLOCK * 2]); + } + + private static byte[] tarHeader(final String name, final long size, final byte type) { + final byte[] h = new byte[BLOCK]; + System.arraycopy(name.getBytes(StandardCharsets.UTF_8), 0, h, 0, Math.min(name.length(), 100)); + System.arraycopy("0000644\0".getBytes(), 0, h, 100, 8); + System.arraycopy("0000000\0".getBytes(), 0, h, 108, 8); + System.arraycopy("0000000\0".getBytes(), 0, h, 116, 8); + System.arraycopy(String.format("%011o", size).getBytes(), 0, h, 124, 11); + h[135] = 0; + System.arraycopy("00000000000\0".getBytes(), 0, h, 136, 12); + h[156] = type; + System.arraycopy("ustar\0".getBytes(), 0, h, 257, 6); + h[263] = '0'; + h[264] = '0'; + Arrays.fill(h, 148, 156, (byte) ' '); + long chk = 0; + for (final byte b : h) { + chk += b & 0xFF; + } + System.arraycopy(String.format("%06o\0 ", chk).getBytes(), 0, h, 148, 8); + return h; + } + + private static int pad(final long len) { + final int rem = (int) (len % BLOCK); + return rem == 0 ? 0 : BLOCK - rem; + } + + @Test + void testDefaultLimitRejectsOversizedPaxHeader() throws Exception { + final byte[] tgz = buildTarGzWithPaxValue(20L * 1024 * 1024); + try (TarArchiveInputStream tis = new TarArchiveInputStream( + new GZIPInputStream(new ByteArrayInputStream(tgz)))) { + tis.getNextEntry(); + fail("Should have thrown MemoryLimitException"); + } catch (final MemoryLimitException ignored) { + } + } + + @Test + void testCustomLimitAllowsHeader() throws Exception { + final byte[] tgz = buildTarGzWithPaxValue(1024); + try (TarArchiveInputStream tis = TarArchiveInputStream.builder() + .setInputStream(new GZIPInputStream(new ByteArrayInputStream(tgz))) + .setMaxPaxHeaderSize(100 * 1024 * 1024) + .get()) { + final TarArchiveEntry entry = tis.getNextEntry(); + assertNotNull(entry); + assertEquals("entry.txt", entry.getName()); + assertEquals(1024, entry.getExtraPaxHeader("test.data").length()); + } + } + + @Test + void testDefaultLimitAllowsNormalHeader() throws Exception { + final byte[] tgz = buildTarGzWithPaxValue(1024); + try (TarArchiveInputStream tis = new TarArchiveInputStream( + new GZIPInputStream(new ByteArrayInputStream(tgz)))) { + final TarArchiveEntry entry = tis.getNextEntry(); + assertNotNull(entry); + assertEquals("entry.txt", entry.getName()); + } + } +} diff --git a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java index df5aceba4e6..3a3ed925f81 100644 --- a/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java +++ b/src/test/java/org/apache/commons/compress/archivers/tar/TarUtilsTest.java @@ -106,7 +106,7 @@ private static byte[] paddedUtf8Bytes(final String s) { private static Map parsePaxHeaders(final byte[] data, final List sparseHeaders, final Map globalPaxHeaders) throws IOException { - return TarUtils.parsePaxHeaders(new ByteArrayInputStream(data), globalPaxHeaders, data.length, Short.MAX_VALUE, sparseHeaders); + return TarUtils.parsePaxHeaders(new ByteArrayInputStream(data), globalPaxHeaders, data.length, Long.MAX_VALUE, Short.MAX_VALUE, sparseHeaders); } static Stream testReadLongNameHandlesLimits() {