From a9326aebca7b7fc8aa080ad2eb6433a65f5236d4 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Tue, 22 Feb 2022 08:26:31 -0800 Subject: [PATCH 1/2] Parquet: Enabled vectorized reads by default --- core/src/main/java/org/apache/iceberg/TableProperties.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index f4444c228e6a..6f158784ef83 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -148,7 +148,7 @@ private TableProperties() { public static final long SPLIT_OPEN_FILE_COST_DEFAULT = 4 * 1024 * 1024; // 4MB public static final String PARQUET_VECTORIZATION_ENABLED = "read.parquet.vectorization.enabled"; - public static final boolean PARQUET_VECTORIZATION_ENABLED_DEFAULT = false; + public static final boolean PARQUET_VECTORIZATION_ENABLED_DEFAULT = true; public static final String PARQUET_BATCH_SIZE = "read.parquet.vectorization.batch-size"; public static final int PARQUET_BATCH_SIZE_DEFAULT = 5000; From f9d2c1ebe122e4e54a03801b75c1200bfc453825 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Fri, 12 Feb 2021 22:34:13 -0800 Subject: [PATCH 2/2] Read INT96 timestamps without vectorization --- .../spark/source/TestSparkTableUtilWithInMemoryCatalog.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spark/v2.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTableUtilWithInMemoryCatalog.java b/spark/v2.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTableUtilWithInMemoryCatalog.java index 72fea444f56e..0a45c6901d3c 100644 --- a/spark/v2.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTableUtilWithInMemoryCatalog.java +++ b/spark/v2.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTableUtilWithInMemoryCatalog.java @@ -451,9 +451,10 @@ public void testImportTableWithInt96Timestamp() throws IOException { ); Table table = TABLES.create(schema, PartitionSpec.unpartitioned(), tableLocation); - // assign a custom metrics config + // assign a custom metrics config and disable vectorized reads table.updateProperties() .set(TableProperties.DEFAULT_WRITE_METRICS_MODE, "full") + .set(TableProperties.PARQUET_VECTORIZATION_ENABLED, "false") .commit(); File stagingDir = temp.newFolder("staging-dir");