From 1244af3b3d15ca30851232de529920257899843e Mon Sep 17 00:00:00 2001 From: sychen Date: Sat, 8 Feb 2025 16:31:17 +0800 Subject: [PATCH 1/2] avoid zlib decompression infinite loop --- java/core/src/java/org/apache/orc/impl/ZlibCodec.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/java/core/src/java/org/apache/orc/impl/ZlibCodec.java b/java/core/src/java/org/apache/orc/impl/ZlibCodec.java index 398ac0d16b..d4275a4c26 100644 --- a/java/core/src/java/org/apache/orc/impl/ZlibCodec.java +++ b/java/core/src/java/org/apache/orc/impl/ZlibCodec.java @@ -169,6 +169,17 @@ public void decompress(ByteBuffer in, ByteBuffer out) throws IOException { out.arrayOffset() + out.position(), out.remaining()); out.position(count + out.position()); + + if (!inflater.finished() && !inflater.needsDictionary() && !inflater.needsInput() && + count == 0) { + if (out.remaining() == 0) { + throw new IOException("Decompress output buffer too small. in = " + in + + ", out = " + out); + } else { + throw new IOException("Decompress error. in = " + in + + ", out = " + out); + } + } } catch (DataFormatException dfe) { throw new IOException("Bad compression data", dfe); } From c02c82c282d1767ab87d7fcecb3364551f29bd60 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 26 Mar 2025 11:51:37 +0800 Subject: [PATCH 2/2] add ut --- .../test/org/apache/orc/impl/TestZlib.java | 28 ++++++++++++++++++ .../src/test/resources/orc_corrupt_zlib.orc | Bin 0 -> 301 bytes 2 files changed, 28 insertions(+) create mode 100644 java/core/src/test/resources/orc_corrupt_zlib.orc diff --git a/java/core/src/test/org/apache/orc/impl/TestZlib.java b/java/core/src/test/org/apache/orc/impl/TestZlib.java index 4ca62ca2af..6e940923ed 100644 --- a/java/core/src/test/org/apache/orc/impl/TestZlib.java +++ b/java/core/src/test/org/apache/orc/impl/TestZlib.java @@ -18,13 +18,21 @@ package org.apache.orc.impl; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.orc.CompressionCodec; +import org.apache.orc.OrcFile; +import org.apache.orc.Reader; +import org.apache.orc.RecordReader; import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.ByteBuffer; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; public class TestZlib { @@ -54,4 +62,24 @@ public void testCorrupt() throws Exception { // EXPECTED } } + + @Test + public void testCorruptZlibFile() { + Configuration conf = new Configuration(); + Path testFilePath = new Path(ClassLoader. + getSystemResource("orc_corrupt_zlib.orc").getPath()); + + IOException exception = assertThrows( + IOException.class, + () -> { + try (Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf))) { + RecordReader rows = reader.rows(); + VectorizedRowBatch batch = reader.getSchema().createRowBatch(); + while (rows.nextBatch(batch)) { + } + } + } + ); + assertTrue(exception.getMessage().contains("Decompress output buffer too small")); + } } diff --git a/java/core/src/test/resources/orc_corrupt_zlib.orc b/java/core/src/test/resources/orc_corrupt_zlib.orc new file mode 100644 index 0000000000000000000000000000000000000000..e083a07c84898a36b5f47fad5fd06fd6b9581fc9 GIT binary patch literal 301 zcmeYdau#M_;9?hI<(S|Xz@Q6ci*PYBFfa)50tJ-VxEO>OBpC2WNH8#1|CrzSP+_Zq zJu@={I|G9Q15gJR!3+il-BXTS31?qxf2X1w`k_CL;of#LC`CrM9dJP^z7Q`)U`3Fv$I*Yg;(uI