Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand Down Expand Up @@ -417,13 +416,10 @@ public void close() {
recordReader = null;
recordMaterializer = null;
nullFilledVectors = null;
try {
if (pageReadStore != null) {
pageReadStore.close();
pageReadStore = null;
}
} catch (IOException e) {
logger.warn("Failure while closing PageReadStore", e);

if (pageReadStore != null) {
pageReadStore.close();
pageReadStore = null;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,7 @@
*/
package org.apache.parquet.hadoop;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import io.netty.buffer.ByteBuf;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.memory.BufferAllocator;
Expand All @@ -47,11 +41,16 @@
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.util.HadoopStreams;

import io.netty.buffer.ByteBuf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class ColumnChunkIncReadStore implements PageReadStore {
private static final Logger logger = LoggerFactory.getLogger(ColumnChunkIncReadStore.class);
Expand Down Expand Up @@ -295,9 +294,13 @@ public void addColumn(ColumnDescriptor descriptor, ColumnChunkMetaData metaData)
columns.put(descriptor, reader);
}

public void close() throws IOException {
public void close() {
for (FSDataInputStream stream : streams) {
stream.close();
try {
stream.close();
} catch (IOException e) {
logger.warn("Error closing stream: {}", e.getMessage(), e);
}
}
for (ColumnChunkIncPageReader reader : columns.values()) {
reader.close();
Expand Down
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cgivre Why did you do the changes? Correct me if I'm missing something, but they don’t seem to add value or improve code clarity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rymarm The unit tests weren't passing. Honestly, I could use some help here. @jnturton believes that Drill is pulling in an old version of parquet in testing and that is why this keeps failing. Any help would be greatly appreciated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cgivre I'll help with this — I'm working on it now. The issue isn't due to pulling in an older version of Parquet. Instead, it stems from our temporary replacement of a few Parquet library class implementations to address specific issues. Since the internal API has changed, we now need to update our implementations accordingly.

https://issues.apache.org/jira/browse/PARQUET-2026
https://issues.apache.org/jira/browse/PARQUET-1006

Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,7 @@
*/
package org.apache.parquet.hadoop;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.CRC32;

import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
import org.apache.parquet.column.ColumnDescriptor;
Expand All @@ -35,6 +26,7 @@
import org.apache.parquet.column.page.DictionaryPage;
import org.apache.parquet.column.page.PageWriteStore;
import org.apache.parquet.column.page.PageWriter;
import org.apache.parquet.column.statistics.SizeStatistics;
import org.apache.parquet.column.statistics.Statistics;
import org.apache.parquet.column.values.bloomfilter.BloomFilter;
import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
Expand All @@ -51,11 +43,20 @@
import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
import org.apache.parquet.io.ParquetEncodingException;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.CRC32;

@InterfaceAudience.Private
public class ParquetColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWriteStore,
AutoCloseable {
Expand Down Expand Up @@ -152,6 +153,20 @@ public void writePage(BytesInput bytesInput, int valueCount, Statistics<?> stati
writePage(bytesInput, valueCount, -1, statistics, rlEncoding, dlEncoding, valuesEncoding);
}

@Override
public void writePage(
BytesInput bytesInput,
int valueCount,
int rowCount,
Statistics<?> statistics,
SizeStatistics sizeStatistics,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
throws IOException {
writePage(bytesInput, valueCount, rowCount, statistics, rlEncoding, dlEncoding, valuesEncoding);
}

@Override
public void writePage(BytesInput bytes,
int valueCount,
Expand Down Expand Up @@ -399,7 +414,7 @@ public void close() {
}
}

private final Map<ColumnDescriptor, ColumnChunkPageWriter> writers = new HashMap<ColumnDescriptor, ColumnChunkPageWriter>();
private final Map<ColumnDescriptor, ParquetColumnChunkPageWriteStore.ColumnChunkPageWriter> writers = new HashMap<ColumnDescriptor, ColumnChunkPageWriter>();
private final MessageType schema;

public ParquetColumnChunkPageWriteStore(BytesInputCompressor compressor, MessageType schema, int initialSlabSize,
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
<antlr.version>4.9.3</antlr.version>
<asm.version>9.5</asm.version>
<avatica.version>1.23.0</avatica.version>
<avro.version>1.11.4</avro.version>
<avro.version>1.12.0</avro.version>
<bouncycastle.version>1.78.1</bouncycastle.version>
<caffeine.version>2.9.3</caffeine.version>
<calcite.groupId>org.apache.calcite</calcite.groupId>
Expand Down Expand Up @@ -125,8 +125,8 @@
<netty.tcnative.classifier />
<netty.tcnative.version>2.0.65.Final</netty.tcnative.version>
<netty.version>4.1.115.Final</netty.version>
<parquet.format.version>2.9.0</parquet.format.version>
<parquet.version>1.12.3</parquet.version>
<parquet.format.version>2.11.0</parquet.format.version>
<parquet.version>1.15.1</parquet.version>
<project.build.outputTimestamp>1676438963</project.build.outputTimestamp>
<protobuf.version>3.25.5</protobuf.version>
<proto.cas.path>${project.basedir}/src/main/protobuf/</proto.cas.path>
Expand Down
Loading