From be807251ed8eb29b644acd32de017bc74a048a35 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Sun, 20 Apr 2025 10:07:17 -0400 Subject: [PATCH] Adds a builder to ZstdCompressorOutputStream to configure almost all zstd options - Avoid JVM segmentation fault in zstd-jni 1.5.7-2 See also https://github.com/luben/zstd-jni/pull/356 - Deprecate constructors except the classic one-argument constructor - Add tests - Better Javadocs --- pom.xml | 2 +- .../zstandard/ZstdCompressorOutputStream.java | 524 ++++++++++++++++-- .../compressors/zstandard/ZstdConstants.java | 196 +++++++ .../zstandard/ZstdRoundtripTest.java | 139 ++++- 4 files changed, 821 insertions(+), 40 deletions(-) create mode 100644 src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdConstants.java diff --git a/pom.xml b/pom.xml index 4db05eb319e..4cab1efcb81 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj. org.apache.commons.io;resolution:=optional, org.apache.commons.io.*;resolution:=optional, org.apache.commons.lang3;resolution:=optional, - org.apache.commons.lang3.reflect;resolution:=optional, + org.apache.commons.lang3.*;resolution:=optional, org.apache.commons.codec;resolution:=optional, org.apache.commons.codec.digest;resolution:=optional, * diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java index 78eaf3fb63a..cbfc840c5b9 100644 --- a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdCompressorOutputStream.java @@ -23,81 +23,533 @@ import java.io.OutputStream; import org.apache.commons.compress.compressors.CompressorOutputStream; +import org.apache.commons.io.build.AbstractStreamBuilder; +import org.apache.commons.lang3.ArrayUtils; import com.github.luben.zstd.ZstdOutputStream; /** - * {@link CompressorOutputStream} implementation to create Zstandard encoded stream. Library relies on Zstandard - * JNI + * {@link CompressorOutputStream} implementation to create Zstandard encoded stream. + *

+ * This class avoids making the underlying {@code zstd} classes part of the public or protected API. The underlying implementation is provided through the + * Zstandard JNI library which is based on zstd. + *

* + * @see Zstandard JNI + * @see zstd * @since 1.16 */ public class ZstdCompressorOutputStream extends CompressorOutputStream { + // @formatter:off /** - * Wraps the given stream into a zstd-jni ZstdOutputStream using the default values for {@code level}, {@code - * closeFrameOnFlush} and {@code useChecksum}. + * Builds a new {@link ZstdCompressorOutputStream}. * - * @param outStream the stream to write to - * @throws IOException if zstd-jni does + *

+ * For example: + *

+ *
{@code
+     * ZstdCompressorOutputStream s = ZstdCompressorOutputStream.builder()
+     *   .setPath(path)
+     *   .setLevel(3)
+     *   .setStrategy(0)
+     *   .setWorkers(0)
+     *   .get();
+     * }
+     * 
+ *

+ * This class avoids making the underlying {@code zstd} classes part of the public or protected API. + *

+ * @see #get() + * @see ZstdConstants + * @since 1.28.0 */ + // @formatter:on + public static final class Builder extends AbstractStreamBuilder { + + private int chainLog; + private boolean checksum; + private boolean closeFrameOnFlush; + private byte[] dict; + private int hashLog; + private int jobSize; + private int level = ZstdConstants.ZSTD_CLEVEL_DEFAULT; + private int minMatch; + private int overlapLog; + private int searchLog; + private int strategy; + private int targetLength; + private int windowLog; + private int workers; + + /** + * Constructs a new builder of {@link ZstdCompressorOutputStream}. + */ + public Builder() { + // empty + } + + @Override + public ZstdCompressorOutputStream get() throws IOException { + return new ZstdCompressorOutputStream(this); + } + + /** + * Sets the size of the multi-probe search table, as a power of 2. + *

+ * The value {@code 0} means use the default chainLog. + *

+ *

+ * The resulting memory usage is (in C) {@code (1 << (chainLog + 2))}. The input must be between {@link ZstdConstants#ZSTD_CHAINLOG_MIN} and + * {@link ZstdConstants#ZSTD_CHAINLOG_MAX}. A larger tables result in better and slower compression. This parameter is useless for "fast" strategy but + * still useful when using "dfast" strategy, in which case it defines a secondary probe table. + *

+ * + * @param chainLog the size of the multi-probe search table, as a power of 2. + * @return this instance. + * @see ZstdConstants#ZSTD_CHAINLOG_MIN + * @see ZstdConstants#ZSTD_CHAINLOG_MAX + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setChainLog(final int chainLog) { + this.chainLog = chainLog; + return this; + } + + /** + * Sets whether a 32-bits checksum of content is written at end of frame (defaults to {@code false}). + *

+ * The value {@code false} means no checksum. + *

+ * + * @param checksum Whether a 32-bits checksum of content is written at end of frame. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setChecksum(final boolean checksum) { + this.checksum = checksum; + return this; + } + + /** + * Sets whether to close the frame on flush. + *

+ * This will guarantee that it can be ready fully if the process crashes before closing the stream. The downside is that this negatively affects the + * compression ratio. + *

+ *

+ * The value {@code false} means don't close on flush. + *

+ * + * @param closeFrameOnFlush whether to close the frame on flush. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setCloseFrameOnFlush(final boolean closeFrameOnFlush) { + this.closeFrameOnFlush = closeFrameOnFlush; + return this; + } + + /** + * Sets an internal {@code CDict} from the given {@code dict} buffer. + *

+ * Decompression will have to use same dictionary. + *

+ * Using a dictionary + *
    + *
  • Loading a null (or 0-length) dictionary invalidates the previous dictionary, returning to no-dictionary mode.
  • + *
  • A dictionary is sticky, it will be used for all future compressed frames. To return to the no-dictionary mode, load a null dictionary.
  • + *
  • Loading a dictionary builds tables. This is a CPU consuming operation, with non-negligible impact on latency. Tables are dependent on compression + * parameters, and for this reason, compression parameters can no longer be changed after loading a dictionary.
  • + *
  • The dictionary content will be copied internally.
  • + *
+ * + * @param dict The dictionary buffer. + * @return this instance. + * @see Zstd manual Chapter12 + * @see zstd.h + */ + public Builder setDict(final byte[] dict) { + this.dict = dict; + return this; + } + + /** + * Size of the initial probe table, as a power of 2. + *

+ * The value {@code 0} means "use default hashLog". + *

+ *

+ * The resulting memory usage is (in C) {@code (1 << (hashLog + 2))}. This value must be between {@link ZstdConstants#ZSTD_HASHLOG_MIN} and + * {@link ZstdConstants#ZSTD_HASHLOG_MAX}. Using a larger table improves the compression ratio of strategies <= dFast, and improves speed of + * strategies > dFast. + *

+ * + * @param hashLog Size of the initial probe table, as a power of 2. + * @return this instance. + * @see ZstdConstants#ZSTD_HASHLOG_MIN + * @see ZstdConstants#ZSTD_HASHLOG_MAX + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setHashLog(final int hashLog) { + this.hashLog = hashLog; + return this; + } + + /** + * Size of a compression job. + *

+ * This value is enforced only when {@code workers >= 1}. Each compression job is completed in parallel, so this value can indirectly impact the number + * of active threads. A value of 0 uses a default behavior, which is dynamically determined based on compression parameters. Job size must be a minimum + * of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever + * is largest. The minimum size is automatically and transparently enforced. + *

+ *

+ * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build + * macro {@code ZSTD_MULTITHREAD}). + *

+ * + * @param jobSize Size of a compression job. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstdmt_compress.h + */ + public Builder setJobSize(final int jobSize) { + this.jobSize = jobSize; + return this; + } + + /** + * Sets compression parameters according to a pre-defined {@code cLevel} table, from 0 to 9. + *

+ * The exact compression parameters are dynamically determined, depending on both compression level and srcSize (when known). The default level is + * {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. The special value 0 means default, which is controlled by {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. + *

+ *
    + *
  • The value 0 means use the default, which is controlled by {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}
  • + *
  • You may pass a negative compression level.
  • + *
  • Setting a level does not automatically set all other compression parameters to defaults. Setting this value will eventually dynamically impact + * the compression parameters which have not been manually set. The manually set values are used.
  • + *
+ * + * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. + * @return this instance + * @see ZstdConstants#ZSTD_CLEVEL_DEFAULT + * @see ZstdConstants#ZSTD_CLEVEL_MIN + * @see ZstdConstants#ZSTD_CLEVEL_MAX + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setLevel(final int level) { + this.level = level; + return this; + } + + /** + * Sets minimum match size for long distance matcher. + *

+ * Zstd can still find matches of smaller size, by updating its search algorithm to look for this size and larger. Using larger values increase + * compression and decompression speed, but decrease the ratio. The value must be between {@link ZstdConstants#ZSTD_MINMATCH_MIN} and + * {@link ZstdConstants#ZSTD_MINMATCH_MAX}. Note that currently, for all strategies < {@code btopt}, effective minimum is 4. , for all strategies + * > {@code fast}, effective maximum is {@code 6}. + *

+ *

+ * The value {@code 0} means use the default minMatchLength. + *

+ * + * @param minMatch minimum match size for long distance matcher. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setMinMatch(final int minMatch) { + this.minMatch = minMatch; + return this; + } + + /** + * Sets the overlap size, as a fraction of window size. + *

+ * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. It helps preserve compression ratio, while each job + * is compressed in parallel. This value is enforced only when workers >= 1. Larger values increase compression ratio, but decrease speed. Possible + * values range from 0 to 9: + *

+ *
    + *
  • 0 means "default" : value will be determined by the library, depending on strategy
  • + *
  • 1 means "no overlap"
  • + *
  • 9 means "full overlap", using a full window size.
  • + *
+ *

+ * Each intermediate rank increases/decreases the load size by a factor 2: + *

+ *
    + *
  • 9: full window
  • + *
  • 8: w / 2
  • + *
  • 7: w / 4
  • + *
  • 6: w / 8
  • + *
  • 5: w / 16
  • + *
  • 4: w / 32
  • + *
  • 3: w / 64
  • + *
  • 2: w / 128
  • + *
  • 1: no overlap
  • + *
  • 0: default + *
+ *

+ * The default value varies between 6 and 9, depending on the strategy. + *

+ *

+ * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build + * macro {@code ZSTD_MULTITHREAD}). + *

+ * + * @param overlapLog the overlap size, as a fraction of window size. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setOverlapLog(final int overlapLog) { + this.overlapLog = overlapLog; + return this; + } + + /** + * Sets number of search attempts, as a power of 2. + *

+ * More attempts result in better and slower compression. This parameter is useless for "fast" and "dFast" strategies. + *

+ *

+ * The value {@code 0} means use the default searchLog. + *

+ * + * @param searchLog number of search attempts, as a power of 2. + * @return this instance. + * @see ZstdConstants#ZSTD_SEARCHLOG_MIN + * @see ZstdConstants#ZSTD_SEARCHLOG_MAX + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setSearchLog(final int searchLog) { + this.searchLog = searchLog; + return this; + } + + /** + * Sets the {@code ZSTD_strategy} from the C enum definition. + *

+ * The higher the value of selected strategy, the more complex it is, resulting in stronger and slower compression. + *

+ *

+ * The value {@code 0} means use the default strategy. + *

+ *
    + *
  • {@code ZSTD_fast = 1}
  • + *
  • {@code ZSTD_dfast = 2}
  • + *
  • {@code ZSTD_greedy = 3}
  • + *
  • {@code ZSTD_lazy = 4}
  • + *
  • {@code ZSTD_lazy2 = 5}
  • + *
  • {@code ZSTD_btlazy2 = 6}
  • + *
  • {@code ZSTD_btopt = 7}
  • + *
  • {@code ZSTD_btultra = 8}
  • + *
  • {@code ZSTD_btultra2 = 9}
  • + *
+ * + * @param strategy the {@code ZSTD_strategy} from the C enum definition. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setStrategy(final int strategy) { + this.strategy = strategy; + return this; + } + + /** + * Sets a value that depends on the strategy, see {@code ZSTD_c_targetLength}. + *

+ * For strategies {@code btopt}, {@code btultra} and {@code btultra2}: + *

+ *
    + *
  • Length of Match considered "good enough" to stop search.
  • + *
  • Larger values make compression stronger, and slower.
  • + *
+ *

+ * For strategy {@code fast}: + *

+ *
    + *
  • Distance between match sampling.
  • + *
  • Larger values make compression faster, and weaker.
  • + *
+ *

+ * The value {@code 0} means use the default targetLength. + *

+ * + * @param targetLength a value that depends on the strategy, see {@code ZSTD_c_targetLength}. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setTargetLength(final int targetLength) { + this.targetLength = targetLength; + return this; + } + + /** + * Sets maximum allowed back-reference distance, expressed as power of 2. + *

+ * This will set a memory budget for streaming decompression, with larger values requiring more memory and typically compressing more. This value be + * between {@link ZstdConstants#ZSTD_WINDOWLOG_MIN} and {@link ZstdConstants#ZSTD_WINDOWLOG_MAX}. + *

+ *

+ * Note: Using a windowLog greater than {@link ZstdConstants#ZSTD_WINDOWLOG_LIMIT_DEFAULT} requires explicitly allowing such size at + * streaming decompression stage. + *

+ *

+ * The value {@code 0} means use the default windowLog. + *

+ * + * @param windowLog maximum allowed back-reference distance, expressed as power of 2. + * @return this instance. + * @see ZstdConstants#ZSTD_WINDOWLOG_MIN + * @see ZstdConstants#ZSTD_WINDOWLOG_MAX + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setWindowLog(final int windowLog) { + this.windowLog = windowLog; + return this; + } + + /** + * Sets how many threads will be spawned to compress in parallel. + *

+ * When workers >= 1, this triggers asynchronous mode when compressing which consumes input and flushes output if possible, but immediately gives + * back control to the caller, while compression is performed in parallel, within worker threads. More workers improve speed, but also increase memory + * usage. Compression is performed from the calling thread, and all invocations are blocking. + *

+ *

+ * The value {@code 0} means "single-threaded mode", nothing is spawned. + *

+ *

+ * This is a multi-threading parameters and is only active if multi-threading is enabled ( if the underlying native library is compiled with the build + * macro {@code ZSTD_MULTITHREAD}). + *

+ * + * @param workers How many threads will be spawned to compress in parallel. + * @return this instance. + * @see Zstd manual Chapter5 + * @see zstd.h + */ + public Builder setWorkers(final int workers) { + this.workers = workers; + return this; + } + } + + /** + * Constructs a new builder of {@link ZstdCompressorOutputStream}. + * + * @return a new builder of {@link ZstdCompressorOutputStream}. + * @since 1.28.0 + */ + public static Builder builder() { + return new Builder(); + } + + @SuppressWarnings("resource") // Caller closes + private static ZstdOutputStream toZstdOutputStream(final Builder builder) throws IOException { + final OutputStream outputStream = builder.getOutputStream(); + if (outputStream instanceof ZstdOutputStream) { + // Builder properties are not applied when a ZstdOutputStream is provided. + return (ZstdOutputStream) outputStream; + } + // @formatter:off + return new ZstdOutputStream(outputStream) + .setChainLog(builder.chainLog) + .setChecksum(builder.checksum) + .setCloseFrameOnFlush(builder.closeFrameOnFlush) + // Avoid JVM segmentation fault in zstd-jni 1.5.7-2 + // TODO Remove ternary expression if/when https://github.com/luben/zstd-jni/pull/356 is fixed. + .setDict(builder.dict != null ? builder.dict : ArrayUtils.EMPTY_BYTE_ARRAY) + .setHashLog(builder.hashLog) + .setJobSize(builder.jobSize) + .setLevel(builder.level) + .setMinMatch(builder.minMatch) + .setOverlapLog(builder.overlapLog) + .setSearchLog(builder.searchLog) + .setStrategy(builder.strategy) + .setTargetLength(builder.targetLength) + .setWindowLog(builder.windowLog) + .setWorkers(builder.workers); + // @formatter:on + } + @SuppressWarnings("resource") // Caller closes + private ZstdCompressorOutputStream(final Builder builder) throws IOException { + super(toZstdOutputStream(builder)); + } + + /** + * Constructs a new instance using default Zstd parameter values. + * + * @param outStream the output stream. + * @throws IOException if an I/O error occurs. + */ public ZstdCompressorOutputStream(final OutputStream outStream) throws IOException { - super(new ZstdOutputStream(outStream)); + this(builder().setOutputStream(outStream)); } /** - * Wraps the given stream into a zstd-jni ZstdOutputStream using the default values for {@code closeFrameOnFlush} and {@code useChecksum}. + * Constructs a new instance using default Zstd parameter values plus a compression level. * - * @param outStream the stream to write to - * @param level value for zstd-jni's level argument - * @throws IOException if zstd-jni does + * @param outStream the output stream. + * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. + * @throws IOException if an I/O error occurs. * @since 1.18 + * @deprecated Use {@link #builder()}. */ - @SuppressWarnings("resource") // Caller closes + @Deprecated public ZstdCompressorOutputStream(final OutputStream outStream, final int level) throws IOException { - super(new ZstdOutputStream(outStream, level)); + this(builder().setOutputStream(outStream).setLevel(level)); } /** - * Wraps the given stream into a zstd-jni ZstdOutputStream using the default value for {@code useChecksum}. + * Constructs a new instance using default Zstd parameter values plus a compression level and checksum setting. * - * @param outStream the stream to write to - * @param level value for zstd-jni's level argument - * @param closeFrameOnFlush value for zstd-jni's closeFrameOnFlush argument - * @throws IOException if zstd-jni does + * @param outStream the output stream. + * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. + * @param closeFrameOnFlush whether to close the frame on flush. + * @throws IOException if an I/O error occurs. * @since 1.18 + * @deprecated Use {@link #builder()}. */ - @SuppressWarnings("resource") // Caller closes + @Deprecated public ZstdCompressorOutputStream(final OutputStream outStream, final int level, final boolean closeFrameOnFlush) throws IOException { - super(new ZstdOutputStream(outStream, level)); - out().setCloseFrameOnFlush(closeFrameOnFlush); + this(builder().setOutputStream(outStream).setLevel(level).setCloseFrameOnFlush(closeFrameOnFlush)); } /** - * Wraps the given stream into a zstd-jni ZstdOutputStream. + * Constructs a new instance using default Zstd parameter values plus a compression level, closeFrameOnFlush and checksum settings. * - * @param outStream the stream to write to - * @param level value for zstd-jni's level argument - * @param closeFrameOnFlush value for zstd-jni's closeFrameOnFlush argument - * @param useChecksum value for zstd-jni's useChecksum argument - * @throws IOException if zstd-jni does + * @param outStream the output stream. + * @param level The compression level, from 0 to 9, where the default is {@link ZstdConstants#ZSTD_CLEVEL_DEFAULT}. + * @param closeFrameOnFlush whether to close the frame on flush. + * @param checksum Whether a 32-bits checksum of content is written at end of frame. + * @throws IOException if an I/O error occurs. * @since 1.18 + * @deprecated Use {@link #builder()}. */ - @SuppressWarnings("resource") // Caller closes - public ZstdCompressorOutputStream(final OutputStream outStream, final int level, final boolean closeFrameOnFlush, final boolean useChecksum) + @Deprecated + public ZstdCompressorOutputStream(final OutputStream outStream, final int level, final boolean closeFrameOnFlush, final boolean checksum) throws IOException { - super(new ZstdOutputStream(outStream, level)); - // @formatter:off - out() - .setCloseFrameOnFlush(closeFrameOnFlush) - .setChecksum(useChecksum); - // @formatter:on + this(builder().setOutputStream(outStream).setLevel(level).setCloseFrameOnFlush(closeFrameOnFlush).setChecksum(checksum)); } @Override public void write(final byte[] buf, final int off, final int len) throws IOException { out.write(buf, off, len); } - } diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdConstants.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdConstants.java new file mode 100644 index 00000000000..775526db5bd --- /dev/null +++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdConstants.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.compressors.zstandard; + +import com.github.luben.zstd.Zstd; + +/** + * Zstd constants. + * + * @since 1.28.0 + */ +public class ZstdConstants { + + /** + * Maximum chain log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setLevel(int) + * @see zstd.h + */ + public static final int ZSTD_CHAINLOG_MAX = Zstd.chainLogMax(); + + /** + * Minimum chain log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setLevel(int) + * @see zstd.h + */ + public static final int ZSTD_CHAINLOG_MIN = Zstd.chainLogMin(); + + /** + * Default compression level. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setLevel(int) + * @see zstd.h + */ + public static final int ZSTD_CLEVEL_DEFAULT = Zstd.defaultCompressionLevel(); + + /** + * Maximum compression level. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setLevel(int) + * @see zstd.h + */ + public static final int ZSTD_CLEVEL_MAX = Zstd.maxCompressionLevel(); + + /** + * Minimum compression level. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setLevel(int) + * @see zstd.h + */ + public static final int ZSTD_CLEVEL_MIN = Zstd.minCompressionLevel(); + + /** + * Maximum hash log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setHashLog(int) + * @see zstd.h + */ + public static final int ZSTD_HASHLOG_MAX = Zstd.hashLogMax(); + + /** + * Minimum hash log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setHashLog(int) + * @see zstd.h + */ + public static final int ZSTD_HASHLOG_MIN = Zstd.hashLogMin(); + + /** + * {@code ZSTD_MINMATCH_MAX} = {@value}. Only for ZSTD_fast, other strategies are limited to 6. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setMinMatch(int) + * @see zstd.h + */ + public static final int ZSTD_MINMATCH_MAX = 7; + + /** + * {@code ZSTD_MINMATCH_MAX} = {@value}. Only for ZSTD_btopt+, faster strategies are limited to 4. + */ + public static final int ZSTD_MINMATCH_MIN = 3; + + /** + * Maximum search log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setSearchLog(int) + * @see zstd.h + */ + public static final int ZSTD_SEARCHLOG_MAX = Zstd.searchLogMax(); + + /** + * Minimum search log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setSearchLog(int) + * @see zstd.h + */ + public static final int ZSTD_SEARCHLOG_MIN = Zstd.searchLogMin(); + + /** + * {@code ZSTD_WINDOWLOG_LIMIT_DEFAULT} = {@value}. + *

+ * By default, the streaming decoder will refuse any frame requiring larger than (in C) {@code (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)} window size, to preserve + * host's memory from unreasonable requirements. + *

+ * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setMinMatch(int) + * @see zstd.h + */ + public static final int ZSTD_WINDOWLOG_LIMIT_DEFAULT = 27; + + /** + * Maximum window log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setWindowLog(int) + * @see zstd.h + */ + public static final int ZSTD_WINDOWLOG_MAX = Zstd.windowLogMax(); + + /** + * Minimum window log value. + * + *

+ * This constant name matches the name in the C header file. + *

+ * + * @see ZstdCompressorOutputStream.Builder#setWindowLog(int) + * @see zstd.h + */ + public static final int ZSTD_WINDOWLOG_MIN = Zstd.windowLogMin(); + +} diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java index b25f880abe6..32034735c36 100644 --- a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java +++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdRoundtripTest.java @@ -35,22 +35,29 @@ import org.apache.commons.compress.compressors.CompressorOutputStream; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.io.IOUtils; +import org.apache.commons.io.function.IOFunction; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; + +import com.github.luben.zstd.ZstdOutputStream; /** * Tests {@link ZstdCompressorOutputStream}. */ public class ZstdRoundtripTest extends AbstractTest { - private interface OutputStreamCreator { - ZstdCompressorOutputStream wrap(FileOutputStream os) throws IOException; + private interface OutputStreamCreator extends IOFunction { + // empty } private void roundtrip(final OutputStreamCreator oc) throws IOException { final Path input = getPath("bla.tar"); final File output = newTempFile(input.getFileName() + ".zstd"); try (FileOutputStream os = new FileOutputStream(output); - ZstdCompressorOutputStream zos = oc.wrap(os)) { + ZstdCompressorOutputStream zos = oc.apply(os)) { zos.write(input); zos.close(); assertTrue(zos.isClosed()); @@ -83,19 +90,145 @@ public void testFactoryRoundtrip() throws Exception { } } + @Test + public void testRoundtripSetChainLogNonDefaultMax() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setChainLog(ZstdConstants.ZSTD_CHAINLOG_MAX).get()); + } + + @Test + public void testRoundtripSetChainLogNonDefaultMin() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setChainLog(ZstdConstants.ZSTD_CHAINLOG_MIN).get()); + } + + @Test + public void testRoundtripSetChecksumNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setChecksum(true).get()); + } + + @Test + public void testRoundtripSetCloseFrameOnFlushNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setCloseFrameOnFlush(true).get()); + } + + @Test + public void testRoundtripSetHashLogNonDefaultMax() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setHashLog(ZstdConstants.ZSTD_HASHLOG_MAX).get()); + } + + @Test + public void testRoundtripSetHashLogNonDefaultMin() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setHashLog(ZstdConstants.ZSTD_HASHLOG_MIN).get()); + } + + @Test + public void testRoundtripSetJobSizeNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setJobSize(1).get()); + } + + @Test + public void testRoundtripSetLevelNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setLevel(1).get()); + } + + @Test + public void testRoundtripSetMinMatchNonDefaultMax() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setMinMatch(ZstdConstants.ZSTD_MINMATCH_MAX).get()); + } + + @Test + public void testRoundtripSetMinMatchNonDefaultMin() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setMinMatch(ZstdConstants.ZSTD_MINMATCH_MIN).get()); + } + + @Test + public void testRoundtripSetOverlapLogNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setOverlapLog(1).get()); + } + + @Test + public void testRoundtripSetSearchLogNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setSearchLog(1).get()); + } + + @Test + public void testRoundtripSetStrategyNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setStrategy(1).get()); + } + + @Test + @Disabled("com.github.luben.zstd.ZstdIOException: Frame requires too much memory for decoding") + public void testRoundtripSetWindowLogNonDefaultMax() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setWindowLog(ZstdConstants.ZSTD_WINDOWLOG_MAX).get()); + } + + @Test + public void testRoundtripSetWindowLogNonDefaultMin() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setWindowLog(ZstdConstants.ZSTD_WINDOWLOG_MIN).get()); + } + + @Test + public void testRoundtripSetWorkersNonDefault() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setWorkers(1).get()); + } + + @Test + public void testRoundtripSetZstdDict() throws Exception { + // Avoid JVM segmentation fault in zstd-jni 1.5.7-2 + // TODO Remove ternary expression in the ctor if/when https://github.com/luben/zstd-jni/pull/356 is fixed. + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setDict(null).get()); + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setDict(ArrayUtils.EMPTY_BYTE_ARRAY).get()); + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setDict(new byte[512]).get()); + } + + @CartesianTest + // @formatter:off + public void testRoundtripWithAll( + @Values(ints = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }) final int level, // see zstd.h + @Values(booleans = { false, true }) final boolean checksum, + @Values(booleans = { false, true }) final boolean closeFrameOnFlush, + @Values(ints = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }) final int strategy, // see zstd.h + @Values(ints = { 0, 6, 9 }) final int overlapLog // see zstd.h + ) throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder() + .setChainLog(0) + .setChecksum(checksum) + .setCloseFrameOnFlush(closeFrameOnFlush) + .setDict(null) + .setHashLog(0) + .setJobSize(0) + .setLevel(level) + .setMinMatch(0) + .setOutputStream(os) + .setOverlapLog(overlapLog) + .setSearchLog(0) + .setStrategy(strategy) + .setWindowLog(0) + .setWorkers(0) + .get()); + } + // @formatter:on + @Test public void testRoundtripWithChecksum() throws Exception { roundtrip(os -> new ZstdCompressorOutputStream(os, 3, false, true)); + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setLevel(3).setCloseFrameOnFlush(true).setChecksum(true).get()); } @Test public void testRoundtripWithCloseFrameOnFlush() throws Exception { roundtrip(os -> new ZstdCompressorOutputStream(os, 3, true)); + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setLevel(3).setCloseFrameOnFlush(true).get()); } @Test public void testRoundtripWithCustomLevel() throws Exception { roundtrip(os -> new ZstdCompressorOutputStream(os, 1)); + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(os).setLevel(1).get()); } + @Test + public void testRoundtripWithZstdOutputStream() throws Exception { + roundtrip(os -> ZstdCompressorOutputStream.builder().setOutputStream(new ZstdOutputStream(os)).get()); + roundtrip(os -> new ZstdCompressorOutputStream(new ZstdOutputStream(os))); + } }