diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java index 2263d8151218..99a579da9592 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java @@ -24,6 +24,7 @@ import org.apache.iceberg.Snapshot; import org.apache.iceberg.SnapshotRef; import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.CommitStateUnknownException; import org.apache.iceberg.exceptions.NoSuchTableException; @@ -84,6 +85,12 @@ private TableMetadata loadTableMetadata(String metadataLocation, Reference refer NessieUtil.tableMetadataFromIcebergTable(io(), table, metadataLocation); Map newProperties = Maps.newHashMap(deserialized.properties()); newProperties.put(NESSIE_COMMIT_ID_PROPERTY, reference.getHash()); + // To prevent accidental deletion of files that are still referenced by other branches/tags, + // setting GC_ENABLED to false. So that all Iceberg's gc operations like expire_snapshots, + // remove_orphan_files, drop_table with purge will fail with an error. + // Nessie CLI will provide a reference aware GC functionality for the expired/unreferenced + // files. + newProperties.put(TableProperties.GC_ENABLED, "false"); TableMetadata.Builder builder = TableMetadata.buildFrom(deserialized) .setPreviousFileLocation(null) diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java index 97ded6094ef9..c919431a333b 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestNessieTable.java @@ -40,11 +40,13 @@ import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadataParser; import org.apache.iceberg.TableOperations; +import org.apache.iceberg.TableProperties; import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.AlreadyExistsException; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; import org.assertj.core.api.Assertions; @@ -548,6 +550,22 @@ public void testListTables() { Assertions.assertThat(catalog.tableExists(TABLE_IDENTIFIER)).isTrue(); } + @Test + public void testGCEnabled() { + Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER); + + Assertions.assertThat(icebergTable.properties().get(TableProperties.GC_ENABLED)) + .isNotNull() + .isEqualTo("false"); + + Assertions.assertThatThrownBy( + () -> + icebergTable.expireSnapshots().expireOlderThan(System.currentTimeMillis()).commit()) + .isInstanceOf(ValidationException.class) + .hasMessage( + "Cannot expire snapshots: GC is disabled (deleting files may corrupt other tables)"); + } + private String getTableBasePath(String tableName) { String databasePath = temp.toString() + "/" + DB_NAME; return Paths.get(databasePath, tableName).toAbsolutePath().toString();