apache · openinx · Oct 23, 2020 · Oct 26, 2020 · Oct 26, 2020 · Oct 26, 2020
diff --git a/build.gradle b/build.gradle
@@ -311,6 +311,7 @@ project(':iceberg-flink') {
     testCompile project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
     testCompile project(path: ':iceberg-api', configuration: 'testArtifacts')
     testCompile project(path: ':iceberg-data', configuration: 'testArtifacts')
+    testCompile project(path: ':iceberg-core', configuration: 'testArtifacts')
 
     // By default, hive-exec is a fat/uber jar and it exports a guava library
     // that's really old. We use the core classifier to be able to override our guava

diff --git a/core/src/main/java/org/apache/iceberg/ContentFileWriter.java b/core/src/main/java/org/apache/iceberg/ContentFileWriter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.io.Closeable;
+import java.util.Iterator;
+
+public interface ContentFileWriter<T, R> extends Closeable {
+
+  void write(R record);
+
+  default void writeAll(Iterator<R> values) {
+    while (values.hasNext()) {
+      write(values.next());
+    }
+  }
+
+  default void writeAll(Iterable<R> values) {
+    writeAll(values.iterator());
+  }
+
+  /**
+   * Returns the length of this file.
+   */
+  long length();
+
+  /**
+   * Return a {@link ContentFile} which is either {@link DeleteFile} or {@link DataFile}.
+   */
+  T toContentFile();
+}
diff --git a/core/src/main/java/org/apache/iceberg/ContentFileWriterFactory.java b/core/src/main/java/org/apache/iceberg/ContentFileWriterFactory.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import org.apache.iceberg.encryption.EncryptedOutputFile;
+
+/**
+ * Factory to create a new {@link ContentFileWriter} to write INSERT or DELETE records.
+ *
+ * @param <T> Content file type, it's either {@link DataFile} or {@link DeleteFile}.
+ * @param <R> data type of the rows to write.
+ */
+public interface ContentFileWriterFactory<T, R> {
+
+  /**
+   * Create a new {@link ContentFileWriter}
+   *
+   * @param partitionKey an partition key to indicate which partition the rows will be written. Null if it's
+   *                     unpartitioned.
+   * @param outputFile   an OutputFile used to create an output stream.
+   * @param fileFormat   File format.
+   * @return a newly created {@link ContentFileWriter}
+   */
+  ContentFileWriter<T, R> createWriter(PartitionKey partitionKey, EncryptedOutputFile outputFile,
+                                       FileFormat fileFormat);
+}
diff --git a/core/src/main/java/org/apache/iceberg/DataFileWriter.java b/core/src/main/java/org/apache/iceberg/DataFileWriter.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import org.apache.iceberg.encryption.EncryptionKeyMetadata;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+
+public class DataFileWriter<T> implements ContentFileWriter<DataFile, T> {
+  private final FileAppender<T> appender;
+  private final FileFormat format;
+  private final String location;
+  private final PartitionKey partitionKey;
+  private final PartitionSpec spec;
+  private final ByteBuffer keyMetadata;
+  private DataFile dataFile = null;
+
+  public DataFileWriter(FileAppender<T> appender, FileFormat format,
+                        String location, PartitionKey partitionKey, PartitionSpec spec,
+                        EncryptionKeyMetadata keyMetadata) {
+    this.appender = appender;
+    this.format = format;
+    this.location = location;
+    this.partitionKey = partitionKey; // set null if unpartitioned.
+    this.spec = spec;
+    this.keyMetadata = keyMetadata != null ? keyMetadata.buffer() : null;
+  }
+
+  @Override
+  public void write(T row) {
+    appender.add(row);
+  }
+
+  @Override
+  public long length() {
+    return appender.length();
+  }
+
+  @Override
+  public DataFile toContentFile() {
+    Preconditions.checkState(dataFile != null, "Cannot create data file from unclosed writer");
+    return dataFile;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (dataFile == null) {
+      appender.close();
+      this.dataFile = DataFiles.builder(spec)
+          .withEncryptionKeyMetadata(keyMetadata)
+          .withFormat(format)
+          .withPath(location)
+          .withFileSizeInBytes(appender.length())
+          .withPartition(partitionKey) // set null if unpartitioned
+          .withMetrics(appender.metrics())
+          .withSplitOffsets(appender.splitOffsets())
+          .build();
+    }
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/avro/Avro.java b/core/src/main/java/org/apache/iceberg/avro/Avro.java
@@ -38,11 +38,11 @@
 import org.apache.avro.io.Encoder;
 import org.apache.avro.specific.SpecificData;
 import org.apache.iceberg.FileFormat;
-import org.apache.iceberg.MetadataColumns;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.SchemaParser;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.deletes.DeletesUtil;
 import org.apache.iceberg.deletes.EqualityDeleteWriter;
 import org.apache.iceberg.deletes.PositionDelete;
 import org.apache.iceberg.deletes.PositionDeleteWriter;
@@ -53,7 +53,6 @@
 import org.apache.iceberg.mapping.NameMapping;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
-import org.apache.iceberg.types.Types.NestedField;
 import org.apache.iceberg.util.ArrayUtil;
 
 import static org.apache.iceberg.TableProperties.AVRO_COMPRESSION;
@@ -301,21 +300,12 @@ public <T> PositionDeleteWriter<T> buildPositionWriter() throws IOException {
       meta("delete-type", "position");
 
       if (rowSchema != null && createWriterFunc != null) {
-        // the appender uses the row schema wrapped with position fields
-        appenderBuilder.schema(new org.apache.iceberg.Schema(
-            MetadataColumns.DELETE_FILE_PATH,
-            MetadataColumns.DELETE_FILE_POS,
-            NestedField.optional(
-                MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", rowSchema.asStruct(),
-                MetadataColumns.DELETE_FILE_ROW_DOC)));
+        appenderBuilder.schema(DeletesUtil.pathPosSchema(rowSchema));
 
         appenderBuilder.createWriterFunc(
             avroSchema -> new PositionAndRowDatumWriter<>(createWriterFunc.apply(avroSchema)));
-
       } else {
-        appenderBuilder.schema(new org.apache.iceberg.Schema(
-            MetadataColumns.DELETE_FILE_PATH,
-            MetadataColumns.DELETE_FILE_POS));
+        appenderBuilder.schema(DeletesUtil.pathPosSchema());
 
         appenderBuilder.createWriterFunc(ignored -> new PositionDatumWriter());
       }

diff --git a/core/src/main/java/org/apache/iceberg/deletes/DeletesUtil.java b/core/src/main/java/org/apache/iceberg/deletes/DeletesUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.deletes;
+
+import org.apache.iceberg.MetadataColumns;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Types;
+
+public class DeletesUtil {
+
+  private DeletesUtil() {
+  }
+
+  public static Schema pathPosSchema() {
+    return new Schema(
+        MetadataColumns.DELETE_FILE_PATH,
+        MetadataColumns.DELETE_FILE_POS);
+  }
+
+  public static Schema pathPosSchema(Schema rowSchema) {
+    Preconditions.checkNotNull(rowSchema, "Row schema should not be null when constructing the pos-delete schema.");
+
+    // the appender uses the row schema wrapped with position fields
+    return new Schema(
+        MetadataColumns.DELETE_FILE_PATH,
+        MetadataColumns.DELETE_FILE_POS,
+        Types.NestedField.required(
+            MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", rowSchema.asStruct(),
+            MetadataColumns.DELETE_FILE_ROW_DOC));
+  }
+
+  public static Schema posDeleteSchema(Schema rowSchema) {
+    return rowSchema == null ? pathPosSchema() : pathPosSchema(rowSchema);
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/deletes/EqualityDeleteWriter.java b/core/src/main/java/org/apache/iceberg/deletes/EqualityDeleteWriter.java
@@ -19,9 +19,9 @@
 
 package org.apache.iceberg.deletes;
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import org.apache.iceberg.ContentFileWriter;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.FileMetadata;
@@ -31,7 +31,7 @@
 import org.apache.iceberg.io.FileAppender;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
-public class EqualityDeleteWriter<T> implements Closeable {
+public class EqualityDeleteWriter<T> implements ContentFileWriter<DeleteFile, T> {
   private final FileAppender<T> appender;
   private final FileFormat format;
   private final String location;
@@ -53,11 +53,13 @@ public EqualityDeleteWriter(FileAppender<T> appender, FileFormat format, String
     this.equalityFieldIds = equalityFieldIds;
   }
 
-  public void deleteAll(Iterable<T> rows) {
-    appender.addAll(rows);
+  @Override
+  public long length() {
+    return appender.length();
   }
 
-  public void delete(T row) {
+  @Override
+  public void write(T row) {
     appender.add(row);
   }
 
@@ -77,7 +79,8 @@ public void close() throws IOException {
     }
   }
 
-  public DeleteFile toDeleteFile() {
+  @Override
+  public DeleteFile toContentFile() {
     Preconditions.checkState(deleteFile != null, "Cannot create delete file from unclosed writer");
     return deleteFile;
   }

diff --git a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java
@@ -19,10 +19,10 @@
 
 package org.apache.iceberg.deletes;
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Set;
+import org.apache.iceberg.ContentFileWriter;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.FileMetadata;
@@ -33,7 +33,7 @@
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.util.CharSequenceSet;
 
-public class PositionDeleteWriter<T> implements Closeable {
+public class PositionDeleteWriter<T> implements ContentFileWriter<DeleteFile, PositionDelete<T>> {
   private final FileAppender<StructLike> appender;
   private final FileFormat format;
   private final String location;
@@ -85,7 +85,18 @@ public Set<CharSequence> referencedDataFiles() {
     return pathSet;
   }
 
-  public DeleteFile toDeleteFile() {
+  @Override
+  public void write(PositionDelete<T> record) {
+    delete(record.path(), record.pos(), record.row());
+  }
+
+  @Override
+  public long length() {
+    return appender.length();
+  }
+
+  @Override
+  public DeleteFile toContentFile() {
     Preconditions.checkState(deleteFile != null, "Cannot create delete file from unclosed writer");
     return deleteFile;
   }