diff --git a/.github/workflows/velox_backend_enhanced.yml b/.github/workflows/velox_backend_enhanced.yml
index 0399946608be..b22d260bc8d9 100644
--- a/.github/workflows/velox_backend_enhanced.yml
+++ b/.github/workflows/velox_backend_enhanced.yml
@@ -127,7 +127,7 @@ jobs:
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l $SPARK_HOME
- $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi \
+ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Piceberg-test -Pdelta -Phudi \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest \
-DargLine="-Dspark.test.home=$SPARK_HOME"
- name: Upload test report
diff --git a/.github/workflows/velox_backend_x86.yml b/.github/workflows/velox_backend_x86.yml
index 4c56b2f0642f..0022b2257075 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -829,7 +829,7 @@ jobs:
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l $SPARK_HOME
- $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Pdelta -Phudi -Ppaimon -Pspark-ut \
+ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg -Piceberg-test -Pdelta -Phudi -Ppaimon -Pspark-ut \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest \
-DargLine="-Dspark.test.home=$SPARK_HOME"
- name: Upload test report
diff --git a/backends-clickhouse/pom.xml b/backends-clickhouse/pom.xml
index ceef10d22df6..765dfdd7621a 100644
--- a/backends-clickhouse/pom.xml
+++ b/backends-clickhouse/pom.xml
@@ -539,8 +539,8 @@
false
- 1.18.1
- 1.18
+ 1.19.3
+ 1.19
diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml
index 798d87f539ba..967687cb7e37 100755
--- a/backends-velox/pom.xml
+++ b/backends-velox/pom.xml
@@ -408,6 +408,13 @@
+
+ org.apache.iceberg
+ iceberg-core
+ ${iceberg.version}
+ test-jar
+ test
+
org.apache.iceberg
iceberg-hive-metastore
@@ -448,16 +455,9 @@
3.26.3
test
-
- junit
- junit
- 4.13.2
- test
-
org.junit.jupiter
- junit-jupiter-api
- 5.11.4
+ junit-jupiter
test
@@ -466,6 +466,50 @@
4.2.2
test
+
+ org.eclipse.jetty
+ jetty-server
+ 11.0.26
+ test
+
+
+ org.eclipse.jetty
+ jetty-servlet
+ 11.0.26
+ test
+
+
+ org.xerial
+ sqlite-jdbc
+ 3.50.3.0
+ test
+
+
+
+
+ iceberg-test
+
+ false
+
+
+ 3.4.1
+
+
+
+ org.apache.iceberg
+ iceberg-open-api
+ ${iceberg.version}
+ test-jar
+ test
+
+
+ org.apache.iceberg
+ iceberg-open-api
+ ${iceberg.version}
+ test-fixtures
+ test-jar
+ test
+
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenCopyOnWriteDelete.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenCopyOnWriteDelete.java
index e03d4aba8c78..d51d04cf194e 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenCopyOnWriteDelete.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenCopyOnWriteDelete.java
@@ -16,36 +16,12 @@
*/
package org.apache.gluten.extensions;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.spark.extensions.TestCopyOnWriteDelete;
import org.junit.Test;
-import java.util.Map;
import java.util.concurrent.ExecutionException;
public class TestGlutenCopyOnWriteDelete extends TestCopyOnWriteDelete {
- public TestGlutenCopyOnWriteDelete(
- String catalogName,
- String implementation,
- Map config,
- String fileFormat,
- Boolean vectorized,
- String distributionMode,
- boolean fanoutEnabled,
- String branch,
- PlanningMode planningMode) {
- super(
- catalogName,
- implementation,
- config,
- fileFormat,
- vectorized,
- distributionMode,
- fanoutEnabled,
- branch,
- planningMode);
- }
-
@Test
public synchronized void testDeleteWithConcurrentTableRefresh() {
System.out.println("Run timeout");
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
index f2fe3e334118..322d74f60b01 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
@@ -16,35 +16,12 @@
*/
package org.apache.gluten.extensions;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.spark.extensions.TestMergeOnReadDelete;
import org.junit.Test;
-import java.util.Map;
import java.util.concurrent.ExecutionException;
public class TestGlutenMergeOnReadDelete extends TestMergeOnReadDelete {
- public TestGlutenMergeOnReadDelete(
- String catalogName,
- String implementation,
- Map config,
- String fileFormat,
- Boolean vectorized,
- String distributionMode,
- boolean fanoutEnabled,
- String branch,
- PlanningMode planningMode) {
- super(
- catalogName,
- implementation,
- config,
- fileFormat,
- vectorized,
- distributionMode,
- fanoutEnabled,
- branch,
- planningMode);
- }
@Test
public synchronized void testDeleteWithConcurrentTableRefresh() {
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadMerge.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadMerge.java
index efb919f1b48c..7b29cb69296a 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadMerge.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadMerge.java
@@ -16,7 +16,6 @@
*/
package org.apache.gluten.extensions;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.RowLevelOperationMode;
import org.apache.iceberg.Table;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
@@ -25,8 +24,7 @@
import org.apache.spark.sql.execution.SparkPlan;
import org.apache.spark.sql.internal.SQLConf;
import org.junit.Test;
-
-import java.util.Map;
+import org.junit.jupiter.api.TestTemplate;
import static org.apache.iceberg.RowLevelOperationMode.COPY_ON_WRITE;
import static org.apache.iceberg.TableProperties.MERGE_MODE;
@@ -34,27 +32,6 @@
import static org.assertj.core.api.Assertions.assertThat;
public class TestGlutenMergeOnReadMerge extends TestMergeOnReadMerge {
- public TestGlutenMergeOnReadMerge(
- String catalogName,
- String implementation,
- Map config,
- String fileFormat,
- boolean vectorized,
- String distributionMode,
- boolean fanoutEnabled,
- String branch,
- PlanningMode planningMode) {
- super(
- catalogName,
- implementation,
- config,
- fileFormat,
- vectorized,
- distributionMode,
- fanoutEnabled,
- branch,
- planningMode);
- }
@Test
public synchronized void testMergeWithConcurrentTableRefresh() {
@@ -72,7 +49,7 @@ public synchronized void testMergeWithSnapshotIsolation() {
}
// The matched join string is changed from Join to ShuffledHashJoinExecTransformer
- @Test
+ @TestTemplate
public void testMergeConditionSplitIntoTargetPredicateAndJoinCondition() {
createAndInitTable(
"id INT, salary INT, dep STRING, sub_dep STRING",
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadUpdate.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadUpdate.java
index f2db135cec3f..09e771ff8bc0 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadUpdate.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadUpdate.java
@@ -16,35 +16,12 @@
*/
package org.apache.gluten.extensions;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.spark.extensions.TestMergeOnReadUpdate;
import org.junit.Test;
-import java.util.Map;
import java.util.concurrent.ExecutionException;
public class TestGlutenMergeOnReadUpdate extends TestMergeOnReadUpdate {
- public TestGlutenMergeOnReadUpdate(
- String catalogName,
- String implementation,
- Map config,
- String fileFormat,
- boolean vectorized,
- String distributionMode,
- boolean fanoutEnabled,
- String branch,
- PlanningMode planningMode) {
- super(
- catalogName,
- implementation,
- config,
- fileFormat,
- vectorized,
- distributionMode,
- fanoutEnabled,
- branch,
- planningMode);
- }
@Test
public synchronized void testUpdateWithConcurrentTableRefresh() {
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenStoragePartitionedJoinsInRowLevelOperations.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenStoragePartitionedJoinsInRowLevelOperations.java
index 9d650c6f6c7a..054689a07b5f 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenStoragePartitionedJoinsInRowLevelOperations.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenStoragePartitionedJoinsInRowLevelOperations.java
@@ -18,12 +18,5 @@
import org.apache.iceberg.spark.extensions.TestStoragePartitionedJoinsInRowLevelOperations;
-import java.util.Map;
-
public class TestGlutenStoragePartitionedJoinsInRowLevelOperations
- extends TestStoragePartitionedJoinsInRowLevelOperations {
- public TestGlutenStoragePartitionedJoinsInRowLevelOperations(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+ extends TestStoragePartitionedJoinsInRowLevelOperations {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownDQL.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownDQL.java
index 059da147255f..3bb778b2a3ea 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownDQL.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownDQL.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.extensions.TestSystemFunctionPushDownDQL;
-import java.util.Map;
-
-public class TestGlutenSystemFunctionPushDownDQL extends TestSystemFunctionPushDownDQL {
- public TestGlutenSystemFunctionPushDownDQL(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenSystemFunctionPushDownDQL extends TestSystemFunctionPushDownDQL {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownInRowLevelOperations.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownInRowLevelOperations.java
index 2eaaa6e5feb3..ce2b06913af7 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownInRowLevelOperations.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenSystemFunctionPushDownInRowLevelOperations.java
@@ -16,12 +16,5 @@
*/
package org.apache.gluten.extensions;
-import java.util.Map;
-
public class TestGlutenSystemFunctionPushDownInRowLevelOperations
- extends TestGlutenSystemFunctionPushDownDQL {
- public TestGlutenSystemFunctionPushDownInRowLevelOperations(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+ extends TestGlutenSystemFunctionPushDownDQL {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestDataFrameWrites.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestDataFrameWrites.java
deleted file mode 100644
index 678cec58d999..000000000000
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestDataFrameWrites.java
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gluten.source;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.iceberg.*;
-import org.apache.iceberg.avro.Avro;
-import org.apache.iceberg.avro.AvroIterable;
-import org.apache.iceberg.hadoop.HadoopTables;
-import org.apache.iceberg.io.FileAppender;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-import org.apache.iceberg.shaded.org.apache.avro.generic.GenericData.Record;
-import org.apache.iceberg.spark.SparkSQLProperties;
-import org.apache.iceberg.spark.SparkSchemaUtil;
-import org.apache.iceberg.spark.SparkWriteOptions;
-import org.apache.iceberg.spark.data.AvroDataTest;
-import org.apache.iceberg.spark.data.RandomData;
-import org.apache.iceberg.spark.data.SparkAvroReader;
-import org.apache.iceberg.types.Types;
-import org.apache.spark.SparkException;
-import org.apache.spark.TaskContext;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.MapPartitionsFunction;
-import org.apache.spark.sql.*;
-import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.catalyst.encoders.RowEncoder;
-import org.junit.*;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.util.*;
-
-import static org.apache.iceberg.spark.SparkSchemaUtil.convert;
-import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsSafe;
-import static org.apache.iceberg.spark.data.TestHelpers.assertEqualsUnsafe;
-import static org.assertj.core.api.Assertions.assertThatThrownBy;
-
-@RunWith(Parameterized.class)
-public class TestDataFrameWrites extends AvroDataTest {
- private static final Configuration CONF = new Configuration();
-
- private final String format;
-
- @Parameterized.Parameters(name = "format = {0}")
- public static Object[] parameters() {
- return new Object[] {"parquet", "avro", "orc"};
- }
-
- public TestDataFrameWrites(String format) {
- this.format = format;
- }
-
- private static SparkSession spark = null;
- private static JavaSparkContext sc = null;
-
- private Map tableProperties;
-
- private final org.apache.spark.sql.types.StructType sparkSchema =
- new org.apache.spark.sql.types.StructType(
- new org.apache.spark.sql.types.StructField[] {
- new org.apache.spark.sql.types.StructField(
- "optionalField",
- org.apache.spark.sql.types.DataTypes.StringType,
- true,
- org.apache.spark.sql.types.Metadata.empty()),
- new org.apache.spark.sql.types.StructField(
- "requiredField",
- org.apache.spark.sql.types.DataTypes.StringType,
- false,
- org.apache.spark.sql.types.Metadata.empty())
- });
-
- private final Schema icebergSchema =
- new Schema(
- Types.NestedField.optional(1, "optionalField", Types.StringType.get()),
- Types.NestedField.required(2, "requiredField", Types.StringType.get()));
-
- private final List data0 =
- Arrays.asList(
- "{\"optionalField\": \"a1\", \"requiredField\": \"bid_001\"}",
- "{\"optionalField\": \"a2\", \"requiredField\": \"bid_002\"}");
- private final List data1 =
- Arrays.asList(
- "{\"optionalField\": \"d1\", \"requiredField\": \"bid_101\"}",
- "{\"optionalField\": \"d2\", \"requiredField\": \"bid_102\"}",
- "{\"optionalField\": \"d3\", \"requiredField\": \"bid_103\"}",
- "{\"optionalField\": \"d4\", \"requiredField\": \"bid_104\"}");
-
- @BeforeClass
- public static void startSpark() {
- TestDataFrameWrites.spark = SparkSession.builder().master("local[2]").getOrCreate();
- TestDataFrameWrites.sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
- }
-
- @AfterClass
- public static void stopSpark() {
- SparkSession currentSpark = TestDataFrameWrites.spark;
- TestDataFrameWrites.spark = null;
- TestDataFrameWrites.sc = null;
- currentSpark.stop();
- }
-
- @Override
- protected void writeAndValidate(Schema schema) throws IOException {
- File location = createTableFolder();
- Table table = createTable(schema, location);
- writeAndValidateWithLocations(table, location, new File(location, "data"));
- }
-
- @Test
- public void testWriteWithCustomDataLocation() throws IOException {
- File location = createTableFolder();
- File tablePropertyDataLocation = temp.newFolder("test-table-property-data-dir");
- Table table = createTable(new Schema(SUPPORTED_PRIMITIVES.fields()), location);
- table
- .updateProperties()
- .set(TableProperties.WRITE_DATA_LOCATION, tablePropertyDataLocation.getAbsolutePath())
- .commit();
- writeAndValidateWithLocations(table, location, tablePropertyDataLocation);
- }
-
- private File createTableFolder() throws IOException {
- File parent = temp.newFolder("parquet");
- File location = new File(parent, "test");
- Assert.assertTrue("Mkdir should succeed", location.mkdirs());
- return location;
- }
-
- private Table createTable(Schema schema, File location) {
- HadoopTables tables = new HadoopTables(CONF);
- return tables.create(schema, PartitionSpec.unpartitioned(), location.toString());
- }
-
- private void writeAndValidateWithLocations(Table table, File location, File expectedDataDir)
- throws IOException {
- Schema tableSchema = table.schema(); // use the table schema because ids are reassigned
-
- table.updateProperties().set(TableProperties.DEFAULT_FILE_FORMAT, format).commit();
-
- Iterable expected = RandomData.generate(tableSchema, 100, 0L);
- writeData(expected, tableSchema, location.toString());
-
- table.refresh();
-
- List actual = readTable(location.toString());
-
- Iterator expectedIter = expected.iterator();
- Iterator actualIter = actual.iterator();
- while (expectedIter.hasNext() && actualIter.hasNext()) {
- assertEqualsSafe(tableSchema.asStruct(), expectedIter.next(), actualIter.next());
- }
- Assert.assertEquals(
- "Both iterators should be exhausted", expectedIter.hasNext(), actualIter.hasNext());
-
- table
- .currentSnapshot()
- .addedDataFiles(table.io())
- .forEach(
- dataFile ->
- Assert.assertTrue(
- String.format(
- "File should have the parent directory %s, but has: %s.",
- expectedDataDir.getAbsolutePath(), dataFile.path()),
- URI.create(dataFile.path().toString())
- .getPath()
- .startsWith(expectedDataDir.getAbsolutePath())));
- }
-
- private List readTable(String location) {
- Dataset result = spark.read().format("iceberg").load(location);
-
- return result.collectAsList();
- }
-
- private void writeData(Iterable records, Schema schema, String location)
- throws IOException {
- Dataset df = createDataset(records, schema);
- DataFrameWriter> writer = df.write().format("iceberg").mode("append");
- writer.save(location);
- }
-
- private void writeDataWithFailOnPartition(
- Iterable records, Schema schema, String location) throws IOException, SparkException {
- final int numPartitions = 10;
- final int partitionToFail = new Random().nextInt(numPartitions);
- MapPartitionsFunction failOnFirstPartitionFunc =
- input -> {
- int partitionId = TaskContext.getPartitionId();
-
- if (partitionId == partitionToFail) {
- throw new SparkException(
- String.format("Intended exception in partition %d !", partitionId));
- }
- return input;
- };
-
- Dataset df =
- createDataset(records, schema)
- .repartition(numPartitions)
- .mapPartitions(failOnFirstPartitionFunc, RowEncoder.apply(convert(schema)));
- // This trick is needed because Spark 3 handles decimal overflow in RowEncoder which "changes"
- // nullability of the column to "true" regardless of original nullability.
- // Setting "check-nullability" option to "false" doesn't help as it fails at Spark analyzer.
- Dataset convertedDf = df.sqlContext().createDataFrame(df.rdd(), convert(schema));
- DataFrameWriter> writer = convertedDf.write().format("iceberg").mode("append");
- writer.save(location);
- }
-
- private Dataset createDataset(Iterable records, Schema schema) throws IOException {
- // this uses the SparkAvroReader to create a DataFrame from the list of records
- // it assumes that SparkAvroReader is correct
- File testFile = temp.newFile();
- Assert.assertTrue("Delete should succeed", testFile.delete());
-
- try (FileAppender writer =
- Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
- for (Record rec : records) {
- writer.add(rec);
- }
- }
-
- // make sure the dataframe matches the records before moving on
- List rows = Lists.newArrayList();
- try (AvroIterable reader =
- Avro.read(Files.localInput(testFile))
- .createReaderFunc(SparkAvroReader::new)
- .project(schema)
- .build()) {
-
- Iterator recordIter = records.iterator();
- Iterator readIter = reader.iterator();
- while (recordIter.hasNext() && readIter.hasNext()) {
- InternalRow row = readIter.next();
- assertEqualsUnsafe(schema.asStruct(), recordIter.next(), row);
- rows.add(row);
- }
- Assert.assertEquals(
- "Both iterators should be exhausted", recordIter.hasNext(), readIter.hasNext());
- }
-
- JavaRDD rdd = sc.parallelize(rows);
- return spark.internalCreateDataFrame(JavaRDD.toRDD(rdd), convert(schema), false);
- }
-
- @Test
- public void testNullableWithWriteOption() throws IOException {
- Assume.assumeTrue(
- "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2"));
-
- File location = new File(temp.newFolder("parquet"), "test");
- String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location);
- String targetPath = String.format("%s/nullable_poc/targetFolder/", location);
-
- tableProperties = ImmutableMap.of(TableProperties.WRITE_DATA_LOCATION, targetPath);
-
- // read this and append to iceberg dataset
- spark
- .read()
- .schema(sparkSchema)
- .json(JavaSparkContext.fromSparkContext(spark.sparkContext()).parallelize(data1))
- .write()
- .parquet(sourcePath);
-
- // this is our iceberg dataset to which we will append data
- new HadoopTables(spark.sessionState().newHadoopConf())
- .create(
- icebergSchema,
- PartitionSpec.builderFor(icebergSchema).identity("requiredField").build(),
- tableProperties,
- targetPath);
-
- // this is the initial data inside the iceberg dataset
- spark
- .read()
- .schema(sparkSchema)
- .json(JavaSparkContext.fromSparkContext(spark.sparkContext()).parallelize(data0))
- .write()
- .format("iceberg")
- .mode(SaveMode.Append)
- .save(targetPath);
-
- // read from parquet and append to iceberg w/ nullability check disabled
- spark
- .read()
- .schema(SparkSchemaUtil.convert(icebergSchema))
- .parquet(sourcePath)
- .write()
- .format("iceberg")
- .option(SparkWriteOptions.CHECK_NULLABILITY, false)
- .mode(SaveMode.Append)
- .save(targetPath);
-
- // read all data
- List rows = spark.read().format("iceberg").load(targetPath).collectAsList();
- Assert.assertEquals("Should contain 6 rows", 6, rows.size());
- }
-
- @Test
- public void testNullableWithSparkSqlOption() throws IOException {
- Assume.assumeTrue(
- "Spark 3 rejects writing nulls to a required column", spark.version().startsWith("2"));
-
- File location = new File(temp.newFolder("parquet"), "test");
- String sourcePath = String.format("%s/nullable_poc/sourceFolder/", location);
- String targetPath = String.format("%s/nullable_poc/targetFolder/", location);
-
- tableProperties = ImmutableMap.of(TableProperties.WRITE_DATA_LOCATION, targetPath);
-
- // read this and append to iceberg dataset
- spark
- .read()
- .schema(sparkSchema)
- .json(JavaSparkContext.fromSparkContext(spark.sparkContext()).parallelize(data1))
- .write()
- .parquet(sourcePath);
-
- SparkSession newSparkSession =
- SparkSession.builder()
- .master("local[2]")
- .appName("NullableTest")
- .config(SparkSQLProperties.CHECK_NULLABILITY, false)
- .getOrCreate();
-
- // this is our iceberg dataset to which we will append data
- new HadoopTables(newSparkSession.sessionState().newHadoopConf())
- .create(
- icebergSchema,
- PartitionSpec.builderFor(icebergSchema).identity("requiredField").build(),
- tableProperties,
- targetPath);
-
- // this is the initial data inside the iceberg dataset
- newSparkSession
- .read()
- .schema(sparkSchema)
- .json(JavaSparkContext.fromSparkContext(spark.sparkContext()).parallelize(data0))
- .write()
- .format("iceberg")
- .mode(SaveMode.Append)
- .save(targetPath);
-
- // read from parquet and append to iceberg
- newSparkSession
- .read()
- .schema(SparkSchemaUtil.convert(icebergSchema))
- .parquet(sourcePath)
- .write()
- .format("iceberg")
- .mode(SaveMode.Append)
- .save(targetPath);
-
- // read all data
- List rows = newSparkSession.read().format("iceberg").load(targetPath).collectAsList();
- Assert.assertEquals("Should contain 6 rows", 6, rows.size());
- }
-
- @Test
- public void testFaultToleranceOnWrite() throws IOException {
- File location = createTableFolder();
- Schema schema = new Schema(SUPPORTED_PRIMITIVES.fields());
- Table table = createTable(schema, location);
-
- Iterable records = RandomData.generate(schema, 100, 0L);
- writeData(records, schema, location.toString());
-
- table.refresh();
-
- Snapshot snapshotBeforeFailingWrite = table.currentSnapshot();
- List resultBeforeFailingWrite = readTable(location.toString());
-
- Iterable records2 = RandomData.generate(schema, 100, 0L);
-
- assertThatThrownBy(() -> writeDataWithFailOnPartition(records2, schema, location.toString()))
- .isInstanceOf(SparkException.class);
-
- table.refresh();
-
- Snapshot snapshotAfterFailingWrite = table.currentSnapshot();
- List resultAfterFailingWrite = readTable(location.toString());
-
- Assert.assertEquals(snapshotAfterFailingWrite, snapshotBeforeFailingWrite);
- Assert.assertEquals(resultAfterFailingWrite, resultBeforeFailingWrite);
- }
-}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2.java
index b66015515d0c..889a46c86ef9 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2.java
@@ -16,6 +16,9 @@
*/
package org.apache.gluten.source;
+import org.apache.iceberg.ParameterizedTestExtension;
import org.apache.iceberg.spark.source.TestDataFrameWriterV2;
+import org.junit.jupiter.api.extension.ExtendWith;
+@ExtendWith(ParameterizedTestExtension.class)
public class TestGlutenDataFrameWriterV2 extends TestDataFrameWriterV2 {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2Coercion.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2Coercion.java
index f40b98bf1868..1c11aae70229 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2Coercion.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenDataFrameWriterV2Coercion.java
@@ -16,11 +16,6 @@
*/
package org.apache.gluten.source;
-import org.apache.iceberg.FileFormat;
import org.apache.iceberg.spark.source.TestDataFrameWriterV2Coercion;
-public class TestGlutenDataFrameWriterV2Coercion extends TestDataFrameWriterV2Coercion {
- public TestGlutenDataFrameWriterV2Coercion(FileFormat format, String dataType) {
- super(format, dataType);
- }
-}
+public class TestGlutenDataFrameWriterV2Coercion extends TestDataFrameWriterV2Coercion {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
index c3e921e3244d..bbfaca15655c 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
@@ -16,7 +16,42 @@
*/
package org.apache.gluten.source;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.spark.source.TestIcebergSourceHiveTables;
+import org.junit.After;
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+import java.util.Map;
// Fallback all the table scan because source table is metadata table with format avro.
-public class TestGlutenIcebergSourceHiveTables extends TestIcebergSourceHiveTables {}
+public class TestGlutenIcebergSourceHiveTables extends TestIcebergSourceHiveTables {
+
+ private static TableIdentifier currentIdentifier;
+
+ // The BeforeAll does not take effect because junit 4 is used in Gluten
+ @BeforeClass
+ public static void start() {
+ Namespace db = Namespace.of(new String[] {"db"});
+ if (!catalog.namespaceExists(db)) {
+ catalog.createNamespace(db);
+ }
+ }
+
+ @After
+ public void dropTable() throws IOException {
+ if (catalog.tableExists(currentIdentifier)) {
+ this.dropTable(currentIdentifier);
+ }
+ }
+
+ public Table createTable(
+ TableIdentifier ident, Schema schema, PartitionSpec spec, Map properties) {
+ currentIdentifier = ident;
+ return catalog.createTable(ident, schema, spec, properties);
+ }
+}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIdentityPartitionData.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIdentityPartitionData.java
index 506f8a5226cd..78b6c2e38243 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIdentityPartitionData.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIdentityPartitionData.java
@@ -16,12 +16,6 @@
*/
package org.apache.gluten.source;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.spark.source.TestIdentityPartitionData;
-public class TestGlutenIdentityPartitionData extends TestIdentityPartitionData {
- public TestGlutenIdentityPartitionData(
- String format, boolean vectorized, PlanningMode planningMode) {
- super(format, vectorized, planningMode);
- }
-}
+public class TestGlutenIdentityPartitionData extends TestIdentityPartitionData {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenPositionDeletesTable.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenPositionDeletesTable.java
index 02d348544db9..3545b538387b 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenPositionDeletesTable.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenPositionDeletesTable.java
@@ -16,14 +16,6 @@
*/
package org.apache.gluten.source;
-import org.apache.iceberg.FileFormat;
import org.apache.iceberg.spark.source.TestPositionDeletesTable;
-import java.util.Map;
-
-public class TestGlutenPositionDeletesTable extends TestPositionDeletesTable {
- public TestGlutenPositionDeletesTable(
- String catalogName, String implementation, Map config, FileFormat format) {
- super(catalogName, implementation, config, format);
- }
-}
+public class TestGlutenPositionDeletesTable extends TestPositionDeletesTable {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenRuntimeFiltering.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenRuntimeFiltering.java
index 90e382899194..976e482ad3fa 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenRuntimeFiltering.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenRuntimeFiltering.java
@@ -16,11 +16,6 @@
*/
package org.apache.gluten.source;
-import org.apache.iceberg.PlanningMode;
import org.apache.iceberg.spark.source.TestRuntimeFiltering;
-public class TestGlutenRuntimeFiltering extends TestRuntimeFiltering {
- public TestGlutenRuntimeFiltering(PlanningMode planningMode) {
- super(planningMode);
- }
-}
+public class TestGlutenRuntimeFiltering extends TestRuntimeFiltering {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkMetadataColumns.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkMetadataColumns.java
index 8e49b5876b43..5189c86c62fd 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkMetadataColumns.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkMetadataColumns.java
@@ -16,12 +16,6 @@
*/
package org.apache.gluten.source;
-import org.apache.iceberg.FileFormat;
import org.apache.iceberg.spark.source.TestSparkMetadataColumns;
-public class TestGlutenSparkMetadataColumns extends TestSparkMetadataColumns {
- public TestGlutenSparkMetadataColumns(
- FileFormat fileFormat, boolean vectorized, int formatVersion) {
- super(fileFormat, vectorized, formatVersion);
- }
-}
+public class TestGlutenSparkMetadataColumns extends TestSparkMetadataColumns {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkStagedScan.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkStagedScan.java
index 09a6583320de..00e8f300d7bd 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkStagedScan.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenSparkStagedScan.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.source.TestSparkStagedScan;
-import java.util.Map;
-
-public class TestGlutenSparkStagedScan extends TestSparkStagedScan {
- public TestGlutenSparkStagedScan(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenSparkStagedScan extends TestSparkStagedScan {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
index 17a578bad8c1..8433e9b2f425 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
@@ -24,42 +24,34 @@
import org.apache.iceberg.hive.HiveCatalog;
import org.apache.iceberg.hive.TestHiveMetastore;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-import org.apache.iceberg.spark.SparkTestBase;
+import org.apache.iceberg.spark.TestBase;
import org.apache.iceberg.spark.sql.TestAggregatePushDown;
import org.apache.spark.sql.SparkSession;
-import org.junit.BeforeClass;
-
-import java.util.Map;
+import org.junit.jupiter.api.BeforeAll;
public class TestGlutenAggregatePushDown extends TestAggregatePushDown {
- public TestGlutenAggregatePushDown(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
- @BeforeClass
+ @BeforeAll
public static void startMetastoreAndSpark() {
- SparkTestBase.metastore = new TestHiveMetastore();
+ TestBase.metastore = new TestHiveMetastore();
metastore.start();
- SparkTestBase.hiveConf = metastore.hiveConf();
-
- SparkTestBase.spark =
+ TestBase.hiveConf = metastore.hiveConf();
+ TestBase.spark.close();
+ TestBase.spark =
SparkSession.builder()
.master("local[2]")
.config("spark.sql.iceberg.aggregate_pushdown", "true")
.config(TestConfUtil.GLUTEN_CONF)
.enableHiveSupport()
.getOrCreate();
-
- SparkTestBase.catalog =
+ TestBase.catalog =
(HiveCatalog)
CatalogUtil.loadCatalog(
HiveCatalog.class.getName(), "hive", ImmutableMap.of(), hiveConf);
try {
- catalog.createNamespace(Namespace.of("default"));
- } catch (AlreadyExistsException ignored) {
- // the default namespace already exists. ignore the create error
+ catalog.createNamespace(Namespace.of(new String[] {"default"}));
+ } catch (AlreadyExistsException var1) {
}
}
}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenDeleteFrom.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenDeleteFrom.java
index f52f0ddb8ace..9c4593e451d4 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenDeleteFrom.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenDeleteFrom.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.sql.TestDeleteFrom;
-import java.util.Map;
-
-public class TestGlutenDeleteFrom extends TestDeleteFrom {
- public TestGlutenDeleteFrom(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenDeleteFrom extends TestDeleteFrom {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesAsSelect.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesAsSelect.java
index 52221d6e8501..72a70adb2e8c 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesAsSelect.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesAsSelect.java
@@ -17,5 +17,12 @@
package org.apache.gluten.sql;
import org.apache.iceberg.spark.sql.TestPartitionedWritesAsSelect;
+import org.junit.Test;
-public class TestGlutenPartitionedWritesAsSelect extends TestPartitionedWritesAsSelect {}
+public class TestGlutenPartitionedWritesAsSelect extends TestPartitionedWritesAsSelect {
+ @Test
+ public void testPartitionedWritesAsSelect() {
+ System.out.println(
+ com.fasterxml.jackson.databind.ObjectMapper.class.getProtectionDomain().getCodeSource());
+ }
+}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToBranch.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToBranch.java
index 6711a7fd2285..818204649b57 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToBranch.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToBranch.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.sql.TestPartitionedWritesToBranch;
-import java.util.Map;
-
-public class TestGlutenPartitionedWritesToBranch extends TestPartitionedWritesToBranch {
- public TestGlutenPartitionedWritesToBranch(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenPartitionedWritesToBranch extends TestPartitionedWritesToBranch {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToWapBranch.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToWapBranch.java
index 935ca6872eac..91c664cf346f 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToWapBranch.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenPartitionedWritesToWapBranch.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.sql.TestPartitionedWritesToWapBranch;
-import java.util.Map;
-
-public class TestGlutenPartitionedWritesToWapBranch extends TestPartitionedWritesToWapBranch {
- public TestGlutenPartitionedWritesToWapBranch(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenPartitionedWritesToWapBranch extends TestPartitionedWritesToWapBranch {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenSelect.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenSelect.java
index eff29920dfa2..6a7951d4faee 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenSelect.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenSelect.java
@@ -18,10 +18,4 @@
import org.apache.iceberg.spark.sql.TestSelect;
-import java.util.Map;
-
-public class TestGlutenSelect extends TestSelect {
- public TestGlutenSelect(String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenSelect extends TestSelect {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenTimestampWithoutZone.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenTimestampWithoutZone.java
index af83dafd1d71..e346cbc48e57 100644
--- a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenTimestampWithoutZone.java
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenTimestampWithoutZone.java
@@ -18,11 +18,4 @@
import org.apache.iceberg.spark.sql.TestTimestampWithoutZone;
-import java.util.Map;
-
-public class TestGlutenTimestampWithoutZone extends TestTimestampWithoutZone {
- public TestGlutenTimestampWithoutZone(
- String catalogName, String implementation, Map config) {
- super(catalogName, implementation, config);
- }
-}
+public class TestGlutenTimestampWithoutZone extends TestTimestampWithoutZone {}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/CatalogTestBase.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/CatalogTestBase.java
new file mode 100644
index 000000000000..00b88080affa
--- /dev/null
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/CatalogTestBase.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.iceberg.spark;
+
+import org.apache.iceberg.ParameterizedTestExtension;
+import org.apache.iceberg.Parameters;
+import org.junit.jupiter.api.extension.ExtendWith;
+
+@ExtendWith(ParameterizedTestExtension.class)
+public abstract class CatalogTestBase extends TestBaseWithCatalog {
+
+ // these parameters are broken out to avoid changes that need to modify lots of test suites
+ @Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
+ protected static Object[][] parameters() {
+ return new Object[][] {
+ {
+ SparkCatalogConfig.HIVE.catalogName(),
+ SparkCatalogConfig.HIVE.implementation(),
+ SparkCatalogConfig.HIVE.properties()
+ },
+ {
+ SparkCatalogConfig.HADOOP.catalogName(),
+ SparkCatalogConfig.HADOOP.implementation(),
+ SparkCatalogConfig.HADOOP.properties()
+ },
+ {
+ SparkCatalogConfig.SPARK.catalogName(),
+ SparkCatalogConfig.SPARK.implementation(),
+ SparkCatalogConfig.SPARK.properties()
+ },
+ };
+ }
+}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
new file mode 100644
index 000000000000..6119d0df3b98
--- /dev/null
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkCatalogTestBase.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.iceberg.spark;
+
+import org.junit.Rule;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+import java.util.Map;
+
+@RunWith(Parameterized.class)
+public abstract class SparkCatalogTestBase extends SparkTestBaseWithCatalog {
+
+ // these parameters are broken out to avoid changes that need to modify lots of test suites
+ @Parameterized.Parameters(name = "catalogName = {0}, implementation = {1}, config = {2}")
+ public static Object[][] parameters() {
+ return new Object[][] {
+ {
+ SparkCatalogConfig.HIVE.catalogName(),
+ SparkCatalogConfig.HIVE.implementation(),
+ SparkCatalogConfig.HIVE.properties()
+ },
+ {
+ SparkCatalogConfig.HADOOP.catalogName(),
+ SparkCatalogConfig.HADOOP.implementation(),
+ SparkCatalogConfig.HADOOP.properties()
+ },
+ {
+ SparkCatalogConfig.SPARK.catalogName(),
+ SparkCatalogConfig.SPARK.implementation(),
+ SparkCatalogConfig.SPARK.properties()
+ }
+ };
+ }
+
+ @Rule public TemporaryFolder temp = new TemporaryFolder();
+
+ public SparkCatalogTestBase(SparkCatalogConfig config) {
+ super(config);
+ }
+
+ public SparkCatalogTestBase(
+ String catalogName, String implementation, Map config) {
+ super(catalogName, implementation, config);
+ }
+}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
new file mode 100644
index 000000000000..89b6f23687b1
--- /dev/null
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/SparkTestBaseWithCatalog.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.iceberg.spark;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.iceberg.CatalogProperties;
+import org.apache.iceberg.PlanningMode;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.catalog.Catalog;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.catalog.SupportsNamespaces;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.hadoop.HadoopCatalog;
+import org.apache.iceberg.util.PropertyUtil;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Map;
+
+public abstract class SparkTestBaseWithCatalog extends SparkTestBase {
+ protected static File warehouse = null;
+
+ @BeforeClass
+ public static void createWarehouse() throws IOException {
+ SparkTestBaseWithCatalog.warehouse = File.createTempFile("warehouse", null);
+ Assert.assertTrue(warehouse.delete());
+ }
+
+ @AfterClass
+ public static void dropWarehouse() throws IOException {
+ if (warehouse != null && warehouse.exists()) {
+ Path warehousePath = new Path(warehouse.getAbsolutePath());
+ FileSystem fs = warehousePath.getFileSystem(hiveConf);
+ Assert.assertTrue("Failed to delete " + warehousePath, fs.delete(warehousePath, true));
+ }
+ }
+
+ @Rule public TemporaryFolder temp = new TemporaryFolder();
+
+ protected final String catalogName;
+ protected final Map catalogConfig;
+ protected final Catalog validationCatalog;
+ protected final SupportsNamespaces validationNamespaceCatalog;
+ protected final TableIdentifier tableIdent = TableIdentifier.of(Namespace.of("default"), "table");
+ protected final String tableName;
+
+ public SparkTestBaseWithCatalog() {
+ this(SparkCatalogConfig.HADOOP);
+ }
+
+ public SparkTestBaseWithCatalog(SparkCatalogConfig config) {
+ this(config.catalogName(), config.implementation(), config.properties());
+ }
+
+ public SparkTestBaseWithCatalog(
+ String catalogName, String implementation, Map config) {
+ this.catalogName = catalogName;
+ this.catalogConfig = config;
+ this.validationCatalog =
+ catalogName.equals("testhadoop")
+ ? new HadoopCatalog(spark.sessionState().newHadoopConf(), "file:" + warehouse)
+ : catalog;
+ this.validationNamespaceCatalog = (SupportsNamespaces) validationCatalog;
+
+ spark.conf().set("spark.sql.catalog." + catalogName, implementation);
+ config.forEach(
+ (key, value) -> spark.conf().set("spark.sql.catalog." + catalogName + "." + key, value));
+
+ if ("hadoop".equalsIgnoreCase(config.get("type"))) {
+ spark.conf().set("spark.sql.catalog." + catalogName + ".warehouse", "file:" + warehouse);
+ }
+
+ this.tableName =
+ (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default.table";
+
+ sql("CREATE NAMESPACE IF NOT EXISTS default");
+ }
+
+ protected String tableName(String name) {
+ return (catalogName.equals("spark_catalog") ? "" : catalogName + ".") + "default." + name;
+ }
+
+ protected String commitTarget() {
+ return tableName;
+ }
+
+ protected String selectTarget() {
+ return tableName;
+ }
+
+ protected boolean cachingCatalogEnabled() {
+ return PropertyUtil.propertyAsBoolean(
+ catalogConfig, CatalogProperties.CACHE_ENABLED, CatalogProperties.CACHE_ENABLED_DEFAULT);
+ }
+
+ protected void configurePlanningMode(PlanningMode planningMode) {
+ configurePlanningMode(tableName, planningMode);
+ }
+
+ protected void configurePlanningMode(String table, PlanningMode planningMode) {
+ sql(
+ "ALTER TABLE %s SET TBLPROPERTIES ('%s' '%s', '%s' '%s')",
+ table,
+ TableProperties.DATA_PLANNING_MODE,
+ planningMode.modeName(),
+ TableProperties.DELETE_PLANNING_MODE,
+ planningMode.modeName());
+ }
+}
diff --git a/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/TestBase.java b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/TestBase.java
new file mode 100644
index 000000000000..b0b40a8f1b1d
--- /dev/null
+++ b/backends-velox/src-iceberg-spark34/test/java/org/apache/iceberg/spark/TestBase.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.iceberg.spark;
+
+import org.apache.gluten.TestConfUtil;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.exceptions.AlreadyExistsException;
+import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.TestHiveMetastore;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.execution.QueryExecution;
+import org.apache.spark.sql.execution.SparkPlan;
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec;
+import org.apache.spark.sql.internal.SQLConf;
+import org.apache.spark.sql.util.QueryExecutionListener;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.net.InetAddress;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.TimeoutException;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTOREURIS;
+import static org.assertj.core.api.Assertions.assertThat;
+
+// Must add the gluten config when create spark session because add plugin config
+public abstract class TestBase extends SparkTestHelperBase {
+
+ protected static TestHiveMetastore metastore = null;
+ protected static HiveConf hiveConf = null;
+ protected static SparkSession spark = null;
+ protected static JavaSparkContext sparkContext = null;
+ protected static HiveCatalog catalog = null;
+
+ @BeforeAll
+ public static void startMetastoreAndSpark() {
+ TestBase.metastore = new TestHiveMetastore();
+ metastore.start();
+ TestBase.hiveConf = metastore.hiveConf();
+
+ TestBase.spark =
+ SparkSession.builder()
+ .master("local[2]")
+ .config(TestConfUtil.GLUTEN_CONF)
+ .config("spark.driver.host", InetAddress.getLoopbackAddress().getHostAddress())
+ .config(SQLConf.PARTITION_OVERWRITE_MODE().key(), "dynamic")
+ .config("spark.hadoop." + METASTOREURIS.varname, hiveConf.get(METASTOREURIS.varname))
+ .config("spark.sql.legacy.respectNullabilityInTextDatasetConversion", "true")
+ .enableHiveSupport()
+ .getOrCreate();
+
+ TestBase.sparkContext = JavaSparkContext.fromSparkContext(spark.sparkContext());
+
+ TestBase.catalog =
+ (HiveCatalog)
+ CatalogUtil.loadCatalog(
+ HiveCatalog.class.getName(), "hive", ImmutableMap.of(), hiveConf);
+
+ try {
+ catalog.createNamespace(Namespace.of("default"));
+ } catch (AlreadyExistsException ignored) {
+ // the default namespace already exists. ignore the create error
+ }
+ }
+
+ @AfterAll
+ public static void stopMetastoreAndSpark() throws Exception {
+ TestBase.catalog = null;
+ if (metastore != null) {
+ metastore.stop();
+ TestBase.metastore = null;
+ }
+ if (spark != null) {
+ spark.stop();
+ TestBase.spark = null;
+ TestBase.sparkContext = null;
+ }
+ }
+
+ protected long waitUntilAfter(long timestampMillis) {
+ long current = System.currentTimeMillis();
+ while (current <= timestampMillis) {
+ current = System.currentTimeMillis();
+ }
+ return current;
+ }
+
+ protected List
+
+ iceberg-test
+
+ false
+
+
+ 3.4.1
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-iceberg-test-sources
+
+ add-test-source
+
+ generate-test-sources
+
+
+ ${project.basedir}/src-iceberg-spark${spark.plain.version}/test/scala
+ ${project.basedir}/src-iceberg-spark${spark.plain.version}/test/java
+
+
+
+
+ add-iceberg-test-resources
+
+ add-test-resource
+
+ generate-test-resources
+
+
${project.basedir}/src-iceberg-spark${spark.plain.version}/test/resources