Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/benchmarks/LZ4TPCDSDataBenchmark-jdk21-results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
AMD EPYC 7763 64-Core Processor
Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Compression 4 times 2612 2624 17 0.0 652960433.5 1.0X
Compression 4 times 2611 2619 11 0.0 652760335.3 1.0X

OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure
AMD EPYC 7763 64-Core Processor
Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Decompression 4 times 2219 2220 1 0.0 554762743.5 1.0X
Decompression 4 times 896 912 20 0.0 224050201.0 1.0X


4 changes: 2 additions & 2 deletions core/benchmarks/LZ4TPCDSDataBenchmark-results.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
AMD EPYC 7763 64-Core Processor
Compression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Compression 4 times 2605 2611 8 0.0 651243236.8 1.0X
Compression 4 times 2602 2609 10 0.0 650438397.0 1.0X

OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure
AMD EPYC 7763 64-Core Processor
Decompression: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
Decompression 4 times 2361 2367 10 0.0 590134148.0 1.0X
Decompression 4 times 938 966 33 0.0 234424499.5 1.0X


11 changes: 5 additions & 6 deletions core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,11 @@ class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
}

override def compressedInputStream(s: InputStream): InputStream = {
val disableConcatenationOfByteStream = false
new LZ4BlockInputStream(
s,
lz4Factory.fastDecompressor(),
xxHashFactory.newStreamingHash32(defaultSeed).asChecksum,
disableConcatenationOfByteStream)
LZ4BlockInputStream.newBuilder()
.withDecompressor(lz4Factory.safeDecompressor())
.withChecksum(xxHashFactory.newStreamingHash32(defaultSeed).asChecksum)
.withStopOnEmptyBlock(false)
.build(s)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da
import java.math.{MathContext, RoundingMode}
import java.util.Base64

import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream, LZ4Factory}

import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
import org.apache.spark.sql.catalyst.expressions._
Expand Down Expand Up @@ -210,7 +210,10 @@ object HistogramSerializer {
final def deserialize(str: String): Histogram = {
val bytes = Base64.getDecoder().decode(str)
val bis = new ByteArrayInputStream(bytes)
val ins = new DataInputStream(new LZ4BlockInputStream(bis))
val ins = new DataInputStream(
LZ4BlockInputStream.newBuilder()
.withDecompressor(LZ4Factory.fastestInstance().safeDecompressor())
.build(bis))
val height = ins.readDouble()
val numBins = ins.readInt()

Expand Down