diff --git a/api/src/main/java/org/apache/iceberg/DataFile.java b/api/src/main/java/org/apache/iceberg/DataFile.java index 90758a3d73d0..aee057d22212 100644 --- a/api/src/main/java/org/apache/iceberg/DataFile.java +++ b/api/src/main/java/org/apache/iceberg/DataFile.java @@ -62,6 +62,21 @@ static StructType getType(StructType partitionType) { ); } + /** + * @return the status of the file, whether EXISTING, ADDED, or DELETED + */ + FileStatus status(); + + /** + * @return id of the snapshot in which the file was added to the table + */ + Long snapshotId(); + + /** + * @return the sequence number of the snapshot in which the file was added to the table + */ + Long sequenceNumber(); + /** * @return fully qualified path to the file, suitable for constructing a Hadoop Path */ diff --git a/api/src/main/java/org/apache/iceberg/FileStatus.java b/api/src/main/java/org/apache/iceberg/FileStatus.java new file mode 100644 index 000000000000..8378e3d962bd --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/FileStatus.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +/** + * The status of a data or delete file in a manifest: + * 0 - existing (added in a different manifest) + * 1 - added to the table in the manifest + * 2 - deleted from the table in the manifest + */ +public enum FileStatus { + EXISTING(0), + ADDED(1), + DELETED(2); + + private final int id; + + FileStatus(int id) { + this.id = id; + } + + public int id() { + return id; + } +} diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java b/api/src/test/java/org/apache/iceberg/TestHelpers.java index 4989281ffe96..b2ad2215076c 100644 --- a/api/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java @@ -303,6 +303,21 @@ public TestDataFile(String path, StructLike partition, long recordCount, this.upperBounds = upperBounds; } + @Override + public FileStatus status() { + return FileStatus.ADDED; + } + + @Override + public Long snapshotId() { + return 0L; + } + + @Override + public Long sequenceNumber() { + return 0L; + } + @Override public CharSequence path() { return path; diff --git a/core/src/main/java/org/apache/iceberg/GenericDataFile.java b/core/src/main/java/org/apache/iceberg/GenericDataFile.java index aa427cbc5e16..d5b34f77777d 100644 --- a/core/src/main/java/org/apache/iceberg/GenericDataFile.java +++ b/core/src/main/java/org/apache/iceberg/GenericDataFile.java @@ -29,6 +29,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.specific.SpecificData; import org.apache.iceberg.avro.AvroSchemaUtil; @@ -49,6 +50,12 @@ public PartitionData copy() { private int[] fromProjectionPos; private Types.StructType partitionType; + // ManifestEntry fields + private ManifestEntry asEntry = null; + private FileStatus status = FileStatus.ADDED; + private Long snapshotId = null; + private Long sequenceNumber = null; + private String filePath = null; private FileFormat format = null; private PartitionData partitionData = null; @@ -70,11 +77,11 @@ public PartitionData copy() { private static final long DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024; /** - * Used by Avro reflection to instantiate this class when reading manifest files. + * Used by AsManifestEntry to instantiate this class when reading manifest files. */ - @SuppressWarnings("checkstyle:RedundantModifier") // Must be public - public GenericDataFile(org.apache.avro.Schema avroSchema) { + private GenericDataFile(org.apache.avro.Schema avroSchema, AsManifestEntry asEntry) { this.avroSchema = avroSchema; + this.asEntry = asEntry; Types.StructType schema = AvroSchemaUtil.convert(avroSchema).asNestedType().asStructType(); @@ -112,6 +119,7 @@ public GenericDataFile(org.apache.avro.Schema avroSchema) { this.format = format; this.partitionData = EMPTY_PARTITION_DATA; this.partitionType = EMPTY_PARTITION_DATA.getPartitionType(); + this.asEntry = new AsManifestEntry(this, partitionType); this.recordCount = recordCount; this.fileSizeInBytes = fileSizeInBytes; } @@ -122,6 +130,7 @@ public GenericDataFile(org.apache.avro.Schema avroSchema) { this.format = format; this.partitionData = partition; this.partitionType = partition.getPartitionType(); + this.asEntry = new AsManifestEntry(this, partitionType); this.recordCount = recordCount; this.fileSizeInBytes = fileSizeInBytes; } @@ -139,6 +148,7 @@ public GenericDataFile(org.apache.avro.Schema avroSchema) { this.partitionData = partition; this.partitionType = partition.getPartitionType(); } + this.asEntry = new AsManifestEntry(this, partitionType); // this will throw NPE if metrics.recordCount is null this.recordCount = metrics.recordCount(); @@ -165,10 +175,14 @@ public GenericDataFile(org.apache.avro.Schema avroSchema) { * @param fullCopy whether to copy all fields or to drop column-level stats */ private GenericDataFile(GenericDataFile toCopy, boolean fullCopy) { + this.status = toCopy.status; + this.snapshotId = toCopy.snapshotId; + this.sequenceNumber = toCopy.sequenceNumber; this.filePath = toCopy.filePath; this.format = toCopy.format; this.partitionData = toCopy.partitionData.copy(); this.partitionType = toCopy.partitionType; + this.asEntry = new AsManifestEntry(this, partitionType); this.recordCount = toCopy.recordCount; this.fileSizeInBytes = toCopy.fileSizeInBytes; if (fullCopy) { @@ -196,6 +210,25 @@ private GenericDataFile(GenericDataFile toCopy, boolean fullCopy) { GenericDataFile() { } + @Override + public FileStatus status() { + return status; + } + + @Override + public Long snapshotId() { + return snapshotId; + } + + @Override + public Long sequenceNumber() { + return sequenceNumber; + } + + public ManifestEntry asEntry() { + return asEntry; + } + @Override public CharSequence path() { return filePath; @@ -399,12 +432,12 @@ public String toString() { } @Override - public DataFile copyWithoutStats() { + public GenericDataFile copyWithoutStats() { return new GenericDataFile(this, false /* drop stats */); } @Override - public DataFile copy() { + public GenericDataFile copy() { return new GenericDataFile(this, true /* full copy */); } @@ -425,4 +458,147 @@ private static List copy(List list) { } return null; } + + /** + * An adapter that makes a DataFile appear like a ManifestEntry for v1 metadata files. + */ + static class AsManifestEntry + implements ManifestEntry, IndexedRecord, SpecificData.SchemaConstructable, StructLike, Serializable { + private transient Schema avroSchema = null; + private GenericDataFile file = null; + + /** + * Used by Avro reflection to instantiate this class when reading manifest files. + * + * @param avroSchema an Avro read schema + */ + protected AsManifestEntry(Schema avroSchema) { + this.avroSchema = avroSchema; + this.file = new GenericDataFile(avroSchema.getField("data_file").schema(), this); + } + + /** + * Used by DataFile to create a ManifestEntry adapter. + * + * @param file a GenericDataFile that contains manifest entry data + */ + protected AsManifestEntry(GenericDataFile file, Types.StructType partitionType) { + this.avroSchema = AvroSchemaUtil.convert(ManifestEntry.getSchema(partitionType), "manifest_entry"); + this.file = file; + } + + /** + * Constructor for Java serialization. + */ + AsManifestEntry() { + } + + @Override + public Schema getSchema() { + return avroSchema; + } + + @Override + public Status status() { + return ManifestEntry.Status.values()[file.status.id()]; + } + + @Override + public Long snapshotId() { + return file.snapshotId; + } + + @Override + public void setSnapshotId(long snapshotId) { + file.snapshotId = snapshotId; + } + + @Override + public Long sequenceNumber() { + return file.sequenceNumber; + } + + @Override + public void setSequenceNumber(long sequenceNumber) { + file.sequenceNumber = sequenceNumber; + } + + @Override + public DataFile file() { + return file; + } + + @Override + public ManifestEntry copy() { + return file.copy().asEntry; + } + + @Override + public ManifestEntry copyWithoutStats() { + return file.copyWithoutStats().asEntry; + } + + @Override + public void put(int pos, Object value) { + switch (pos) { + case 0: + file.status = FileStatus.values()[(Integer) value]; + return; + case 1: + file.snapshotId = (Long) value; + return; + case 2: + file.sequenceNumber = (Long) value; + return; + case 3: + if (file != value) { + throw new IllegalArgumentException("Cannot replace data file"); + } + return; + default: + // ignore the object, it must be from a newer version of the format + } + } + + @Override + public void set(int pos, T value) { + put(pos, value); + } + + @Override + public Object get(int i) { + switch (i) { + case 0: + return file.status.id(); + case 1: + return file.snapshotId; + case 2: + return file.sequenceNumber; + case 3: + return file; + default: + throw new UnsupportedOperationException("Unknown field ordinal: " + i); + } + } + + @Override + public T get(int pos, Class javaClass) { + return javaClass.cast(get(pos)); + } + + @Override + public int size() { + return avroSchema.getFields().size(); + } + + @Override + public String toString() { + return MoreObjects.toStringHelper(this) + .add("status", file.status) + .add("snapshot_id", file.snapshotId) + .add("sequence_number", file.sequenceNumber) + .add("file", file) + .toString(); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/GenericManifestEntry.java b/core/src/main/java/org/apache/iceberg/GenericManifestEntry.java index 9fc29a62bbff..49a812cc8239 100644 --- a/core/src/main/java/org/apache/iceberg/GenericManifestEntry.java +++ b/core/src/main/java/org/apache/iceberg/GenericManifestEntry.java @@ -19,182 +19,21 @@ package org.apache.iceberg; -import com.google.common.base.MoreObjects; -import org.apache.avro.generic.IndexedRecord; -import org.apache.avro.specific.SpecificData; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.types.Types; - -class GenericManifestEntry implements ManifestEntry, IndexedRecord, SpecificData.SchemaConstructable, StructLike { - private final org.apache.avro.Schema schema; - private final V1Metadata.IndexedDataFile fileWrapper; - private Status status = Status.EXISTING; - private Long snapshotId = null; - private Long sequenceNumber = null; - private DataFile file = null; - - GenericManifestEntry(org.apache.avro.Schema schema) { - this.schema = schema; - this.fileWrapper = null; // do not use the file wrapper to read - } - - GenericManifestEntry(Types.StructType partitionType) { - this.schema = AvroSchemaUtil.convert(V1Metadata.entrySchema(partitionType), "manifest_entry"); - this.fileWrapper = new V1Metadata.IndexedDataFile(schema.getField("data_file").schema()); - } - - private GenericManifestEntry(GenericManifestEntry toCopy, boolean fullCopy) { - this.schema = toCopy.schema; - this.fileWrapper = new V1Metadata.IndexedDataFile(schema.getField("data_file").schema()); - this.status = toCopy.status; - this.snapshotId = toCopy.snapshotId; - if (fullCopy) { - this.file = toCopy.file().copy(); - } else { - this.file = toCopy.file().copyWithoutStats(); - } - } - - ManifestEntry wrapExisting(Long newSnapshotId, Long newSequenceNumber, DataFile newFile) { - this.status = Status.EXISTING; - this.snapshotId = newSnapshotId; - this.sequenceNumber = newSequenceNumber; - this.file = newFile; - return this; - } - - ManifestEntry wrapAppend(Long newSnapshotId, DataFile newFile) { - this.status = Status.ADDED; - this.snapshotId = newSnapshotId; - this.sequenceNumber = null; - this.file = newFile; - return this; - } - - ManifestEntry wrapDelete(Long newSnapshotId, DataFile newFile) { - this.status = Status.DELETED; - this.snapshotId = newSnapshotId; - this.sequenceNumber = null; - this.file = newFile; - return this; - } - - /** - * @return the status of the file, whether EXISTING, ADDED, or DELETED - */ - @Override - public Status status() { - return status; - } +import org.apache.avro.Schema; +class GenericManifestEntry extends GenericDataFile.AsManifestEntry { /** - * @return id of the snapshot in which the file was added to the table + * Used by Avro reflection to instantiate this class when reading manifest files. + * + * @param avroSchema an Avro read schema */ - @Override - public Long snapshotId() { - return snapshotId; - } - - @Override - public Long sequenceNumber() { - return sequenceNumber; + GenericManifestEntry(Schema avroSchema) { + super(avroSchema); } /** - * @return a file + * Constructor for Java serialization. */ - @Override - public DataFile file() { - return file; - } - - @Override - public ManifestEntry copy() { - return new GenericManifestEntry(this, true /* full copy */); - } - - @Override - public ManifestEntry copyWithoutStats() { - return new GenericManifestEntry(this, false /* drop stats */); - } - - @Override - public void setSnapshotId(long newSnapshotId) { - this.snapshotId = newSnapshotId; - } - - @Override - public void setSequenceNumber(long newSequenceNumber) { - this.sequenceNumber = newSequenceNumber; - } - - @Override - public void put(int i, Object v) { - switch (i) { - case 0: - this.status = Status.values()[(Integer) v]; - return; - case 1: - this.snapshotId = (Long) v; - return; - case 2: - this.sequenceNumber = (Long) v; - return; - case 3: - this.file = (DataFile) v; - return; - default: - // ignore the object, it must be from a newer version of the format - } - } - - @Override - public void set(int pos, T value) { - put(pos, value); - } - - @Override - public Object get(int i) { - switch (i) { - case 0: - return status.id(); - case 1: - return snapshotId; - case 2: - return sequenceNumber; - case 3: - if (fileWrapper == null || file instanceof GenericDataFile) { - return file; - } else { - return fileWrapper.wrap(file); - } - default: - throw new UnsupportedOperationException("Unknown field ordinal: " + i); - } - } - - @Override - public T get(int pos, Class javaClass) { - return javaClass.cast(get(pos)); - } - - @Override - public org.apache.avro.Schema getSchema() { - return schema; - } - - @Override - public int size() { - return 4; - } - - @Override - public String toString() { - return MoreObjects.toStringHelper(this) - .add("status", status) - .add("snapshot_id", snapshotId) - .add("sequence_number", sequenceNumber) - .add("file", file) - .toString(); + GenericManifestEntry() { } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntry.java b/core/src/main/java/org/apache/iceberg/ManifestEntry.java index bc03850c3633..77f170040f56 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestEntry.java +++ b/core/src/main/java/org/apache/iceberg/ManifestEntry.java @@ -19,6 +19,9 @@ package org.apache.iceberg; +import com.google.common.collect.Sets; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.StructType; @@ -54,7 +57,12 @@ static Schema getSchema(StructType partitionType) { } static Schema wrapFileSchema(StructType fileType) { - return new Schema(STATUS, SNAPSHOT_ID, SEQUENCE_NUMBER, required(DATA_FILE_ID, "data_file", fileType)); + // remove ManifestEntry fields from the file type when wrapping to avoid duplication + Set toRemove = Sets.newHashSet(STATUS.fieldId(), SNAPSHOT_ID.fieldId(), SEQUENCE_NUMBER.fieldId()); + StructType v1FileType = StructType.of(fileType.fields().stream() + .filter(field -> !toRemove.contains(field.fieldId())) + .collect(Collectors.toList())); + return new Schema(STATUS, SNAPSHOT_ID, SEQUENCE_NUMBER, required(DATA_FILE_ID, "data_file", v1FileType)); } /** diff --git a/core/src/main/java/org/apache/iceberg/ManifestEntryWrapper.java b/core/src/main/java/org/apache/iceberg/ManifestEntryWrapper.java new file mode 100644 index 000000000000..ea384254f454 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/ManifestEntryWrapper.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +/** + * Wrapper used to replace ManifestEntry values when writing entries to a manifest. + */ +class ManifestEntryWrapper implements ManifestEntry { + private DataFile wrapped = null; + private Status status = null; + private Long snapshotId = null; + private Long sequenceNumber = null; + + ManifestEntry wrapExisting(Long newSnapshotId, Long newSequenceNumber, DataFile newFile) { + this.status = Status.EXISTING; + this.snapshotId = newSnapshotId; + this.sequenceNumber = newSequenceNumber; + this.wrapped = newFile; + return this; + } + + ManifestEntry wrapAppend(Long newSnapshotId, DataFile newFile) { + this.status = Status.ADDED; + this.snapshotId = newSnapshotId; + this.sequenceNumber = null; + this.wrapped = newFile; + return this; + } + + ManifestEntry wrapDelete(Long newSnapshotId, Long newSequenceNumber, DataFile newFile) { + this.status = Status.DELETED; + this.snapshotId = newSnapshotId; + this.sequenceNumber = newSequenceNumber; + this.wrapped = newFile; + return this; + } + + @Override + public Status status() { + return status; + } + + @Override + public Long snapshotId() { + return snapshotId; + } + + @Override + public Long sequenceNumber() { + return sequenceNumber; + } + + @Override + public DataFile file() { + return wrapped; + } + + @Override + public void setSnapshotId(long snapshotId) { + this.snapshotId = snapshotId; + } + + @Override + public void setSequenceNumber(long sequenceNumber) { + this.sequenceNumber = sequenceNumber; + } + + @Override + public ManifestEntry copy() { + throw new UnsupportedOperationException("Cannot copy a ManifestEntryWrapper"); + } + + @Override + public ManifestEntry copyWithoutStats() { + throw new UnsupportedOperationException("Cannot copy a ManifestEntryWrapper"); + } +} diff --git a/core/src/main/java/org/apache/iceberg/ManifestWriter.java b/core/src/main/java/org/apache/iceberg/ManifestWriter.java index ddd74c8a3e1d..7ba87ae438fd 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestWriter.java +++ b/core/src/main/java/org/apache/iceberg/ManifestWriter.java @@ -54,7 +54,7 @@ public static ManifestWriter write(PartitionSpec spec, OutputFile outputFile) { private final int specId; private final FileAppender writer; private final Long snapshotId; - private final GenericManifestEntry reused; + private final ManifestEntryWrapper reused; private final PartitionSummary stats; private boolean closed = false; @@ -71,7 +71,7 @@ private ManifestWriter(PartitionSpec spec, OutputFile file, Long snapshotId) { this.specId = spec.specId(); this.writer = newAppender(spec, file); this.snapshotId = snapshotId; - this.reused = new GenericManifestEntry(spec.partitionType()); + this.reused = new ManifestEntryWrapper(); this.stats = new PartitionSummary(spec); } @@ -121,11 +121,9 @@ void add(ManifestEntry entry) { * Add an existing entry for a data file. * * @param existingFile a data file - * @param fileSnapshotId snapshot ID when the data file was added to the table - * @param sequenceNumber sequence number for the data file */ - public void existing(DataFile existingFile, long fileSnapshotId, long sequenceNumber) { - addEntry(reused.wrapExisting(fileSnapshotId, sequenceNumber, existingFile)); + public void existing(DataFile existingFile) { + addEntry(reused.wrapExisting(existingFile.snapshotId(), existingFile.sequenceNumber(), existingFile)); } void existing(ManifestEntry entry) { @@ -140,13 +138,13 @@ void existing(ManifestEntry entry) { * @param deletedFile a data file */ public void delete(DataFile deletedFile) { - addEntry(reused.wrapDelete(snapshotId, deletedFile)); + addEntry(reused.wrapDelete(snapshotId, deletedFile.sequenceNumber(), deletedFile)); } void delete(ManifestEntry entry) { // Use the current Snapshot ID for the delete. It is safe to delete the data file from disk // when this Snapshot has been removed or when there are no Snapshots older than this one. - addEntry(reused.wrapDelete(snapshotId, entry.file())); + addEntry(reused.wrapDelete(snapshotId, entry.sequenceNumber(), entry.file())); } @Override @@ -237,4 +235,5 @@ protected FileAppender newAppender(PartitionSpec spec, OutputFile } } } + } diff --git a/core/src/main/java/org/apache/iceberg/V1Metadata.java b/core/src/main/java/org/apache/iceberg/V1Metadata.java index a905b4d6ec55..f8dddd1eafaf 100644 --- a/core/src/main/java/org/apache/iceberg/V1Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V1Metadata.java @@ -337,6 +337,21 @@ public org.apache.avro.Schema getSchema() { return avroSchema; } + @Override + public FileStatus status() { + return wrapped.status(); + } + + @Override + public Long snapshotId() { + return wrapped.snapshotId(); + } + + @Override + public Long sequenceNumber() { + return wrapped.sequenceNumber(); + } + @Override public CharSequence path() { return wrapped.path(); diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java b/core/src/test/java/org/apache/iceberg/TableTestBase.java index c66e8b11ac26..4677be4a99ab 100644 --- a/core/src/test/java/org/apache/iceberg/TableTestBase.java +++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java @@ -183,14 +183,14 @@ ManifestFile writeManifestWithName(String name, DataFile... files) throws IOExce } ManifestEntry manifestEntry(ManifestEntry.Status status, Long snapshotId, DataFile file) { - GenericManifestEntry entry = new GenericManifestEntry(table.spec().partitionType()); + ManifestEntryWrapper entry = new ManifestEntryWrapper(); switch (status) { case ADDED: return entry.wrapAppend(snapshotId, file); case EXISTING: return entry.wrapExisting(snapshotId, 0L, file); case DELETED: - return entry.wrapDelete(snapshotId, file); + return entry.wrapDelete(snapshotId, 0L, file); default: throw new IllegalArgumentException("Unexpected entry status: " + status); } diff --git a/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java b/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java index 1875c3d5ca9e..d7f178cdfd63 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java +++ b/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.iceberg.DataFile; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.FileStatus; import org.apache.iceberg.StructLike; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -56,6 +57,21 @@ public SparkDataFile wrap(Row row) { return this; } + @Override + public FileStatus status() { + return null; + } + + @Override + public Long snapshotId() { + return null; + } + + @Override + public Long sequenceNumber() { + return null; + } + @Override public CharSequence path() { return wrapped.getAs(fieldPositions[0]); diff --git a/spark/src/test/java/org/apache/iceberg/TestDataFileSerialization.java b/spark/src/test/java/org/apache/iceberg/TestDataFileSerialization.java index 64c9d519f16b..3bc75f01e0f5 100644 --- a/spark/src/test/java/org/apache/iceberg/TestDataFileSerialization.java +++ b/spark/src/test/java/org/apache/iceberg/TestDataFileSerialization.java @@ -129,6 +129,12 @@ public void testDataFileJavaSerialization() throws Exception { } private void checkDataFile(DataFile expected, DataFile actual) { + Assert.assertEquals("Should match the serialized status", + expected.status(), actual.status()); + Assert.assertEquals("Should match the serialized snapshot id", + expected.snapshotId(), actual.snapshotId()); + Assert.assertEquals("Should match the serialized sequence number", + expected.sequenceNumber(), actual.sequenceNumber()); Assert.assertEquals("Should match the serialized record path", expected.path(), actual.path()); Assert.assertEquals("Should match the serialized record format", @@ -151,7 +157,7 @@ private void checkDataFile(DataFile expected, DataFile actual) { expected.keyMetadata(), actual.keyMetadata()); Assert.assertEquals("Should match the serialized record offsets", expected.splitOffsets(), actual.splitOffsets()); - Assert.assertEquals("Should match the serialized record offsets", + Assert.assertEquals("Should match the serialized key metadata", expected.keyMetadata(), actual.keyMetadata()); }