From aa3d395f8d4e7d59e1fd823a4713d9889def4488 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Fri, 12 Jul 2019 14:12:36 -0700 Subject: [PATCH 1/2] ORC-363: Enable zStandard codec Support ZSTD codec for java ORC writer and reader. The implementation under the hood leverages airlift compressor library. --- java/core/pom.xml | 5 +++ .../java/org/apache/orc/CompressionKind.java | 2 +- .../java/org/apache/orc/impl/ReaderImpl.java | 1 + .../java/org/apache/orc/impl/WriterImpl.java | 5 +++ .../test/org/apache/orc/impl/TestZstd.java | 45 +++++++++++++++++++ java/pom.xml | 8 +++- 6 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 java/core/src/test/org/apache/orc/impl/TestZstd.java diff --git a/java/core/pom.xml b/java/core/pom.xml index c6de022001..cbc8dbdb57 100644 --- a/java/core/pom.xml +++ b/java/core/pom.xml @@ -97,6 +97,11 @@ mockito-core test + + io.airlift + slice + test + diff --git a/java/core/src/java/org/apache/orc/CompressionKind.java b/java/core/src/java/org/apache/orc/CompressionKind.java index 3cffe57ee9..4a1cd5c883 100644 --- a/java/core/src/java/org/apache/orc/CompressionKind.java +++ b/java/core/src/java/org/apache/orc/CompressionKind.java @@ -23,5 +23,5 @@ * can be applied to ORC files. */ public enum CompressionKind { - NONE, ZLIB, SNAPPY, LZO, LZ4 + NONE, ZLIB, SNAPPY, LZO, LZ4, ZSTD } diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java index d1311b9a81..786e04636d 100644 --- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java +++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java @@ -550,6 +550,7 @@ private static OrcProto.PostScript extractPostScript(ByteBuffer bb, Path path, case SNAPPY: case LZO: case LZ4: + case ZSTD: break; default: throw new IllegalArgumentException("Unknown compression"); diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java index 7f9cb63254..257f8a0cca 100644 --- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java +++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java @@ -34,6 +34,8 @@ import io.airlift.compress.lz4.Lz4Decompressor; import io.airlift.compress.lzo.LzoCompressor; import io.airlift.compress.lzo.LzoDecompressor; +import io.airlift.compress.zstd.ZstdCompressor; +import io.airlift.compress.zstd.ZstdDecompressor; import org.apache.orc.ColumnStatistics; import org.apache.orc.CompressionCodec; import org.apache.orc.CompressionKind; @@ -275,6 +277,9 @@ public static CompressionCodec createCodec(CompressionKind kind) { case LZ4: return new AircompressorCodec(kind, new Lz4Compressor(), new Lz4Decompressor()); + case ZSTD: + return new AircompressorCodec(kind, new ZstdCompressor(), + new ZstdDecompressor()); default: throw new IllegalArgumentException("Unknown compression codec: " + kind); diff --git a/java/core/src/test/org/apache/orc/impl/TestZstd.java b/java/core/src/test/org/apache/orc/impl/TestZstd.java new file mode 100644 index 0000000000..8a6bbea12d --- /dev/null +++ b/java/core/src/test/org/apache/orc/impl/TestZstd.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.orc.impl; + +import io.airlift.compress.zstd.ZstdCompressor; +import io.airlift.compress.zstd.ZstdDecompressor; +import org.apache.orc.CompressionCodec; +import org.apache.orc.CompressionKind; +import org.junit.Test; + +import java.nio.ByteBuffer; + +import static org.junit.Assert.assertEquals; + +public class TestZstd { + + @Test + public void testNoOverflow() throws Exception { + ByteBuffer in = ByteBuffer.allocate(10); + ByteBuffer out = ByteBuffer.allocate(10); + in.put(new byte[]{1,2,3,4,5,6,7,10}); + in.flip(); + CompressionCodec codec = new AircompressorCodec( + CompressionKind.ZSTD, new ZstdCompressor(), new ZstdDecompressor()); + assertEquals(false, codec.compress(in, out, null, + codec.getDefaultOptions())); + } + +} diff --git a/java/pom.xml b/java/pom.xml index 10e245869b..18f5303bd4 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -429,7 +429,7 @@ io.airlift aircompressor - 0.10 + 0.15 io.airlift @@ -689,6 +689,12 @@ 1.9.5 test + + io.airlift + slice + 0.36 + test + From 286d00a5f285a1f517c8927fbbe7088788794fec Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 30 Jul 2019 15:46:25 -0700 Subject: [PATCH 2/2] remove slice exclusion --- java/core/pom.xml | 5 ----- .../src/java/org/apache/orc/impl/WriterImpl.java | 1 + java/pom.xml | 12 ------------ 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/java/core/pom.xml b/java/core/pom.xml index cbc8dbdb57..c6de022001 100644 --- a/java/core/pom.xml +++ b/java/core/pom.xml @@ -97,11 +97,6 @@ mockito-core test - - io.airlift - slice - test - diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java index 257f8a0cca..55d1efde15 100644 --- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java +++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java @@ -537,6 +537,7 @@ private OrcProto.CompressionKind writeCompressionKind(CompressionKind kind) { case SNAPPY: return OrcProto.CompressionKind.SNAPPY; case LZO: return OrcProto.CompressionKind.LZO; case LZ4: return OrcProto.CompressionKind.LZ4; + case ZSTD: return OrcProto.CompressionKind.ZSTD; default: throw new IllegalArgumentException("Unknown compression " + kind); } diff --git a/java/pom.xml b/java/pom.xml index 18f5303bd4..0c16a9ef1b 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -430,12 +430,6 @@ io.airlift aircompressor 0.15 - - - io.airlift - slice - - javax.xml.bind @@ -689,12 +683,6 @@ 1.9.5 test - - io.airlift - slice - 0.36 - test -