diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java index 2efcf80d2b9b..0139ec4f610b 100644 --- a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java +++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java @@ -286,7 +286,9 @@ private Transaction newReplaceTableTransaction(boolean orCreate) { * * @param io a FileIO to use for deletes * @param metadata the last valid TableMetadata instance for a dropped table. + * @deprecated will be removed in 0.11.0; use CatalogUtil.dropTableData instead. */ + @Deprecated protected static void dropTableData(FileIO io, TableMetadata metadata) { // Reads and deletes are done using Tasks.foreach(...).suppressFailureWhenFinished to complete // as much of the delete work as possible and avoid orphaned data or manifest files. diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java new file mode 100644 index 000000000000..e1a1f28717a1 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.exceptions.RuntimeIOException; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.relocated.com.google.common.base.Joiner; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.MapMaker; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.Tasks; +import org.apache.iceberg.util.ThreadPools; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CatalogUtil { + private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class); + + private CatalogUtil() { + } + + /** + * Drops all data and metadata files referenced by TableMetadata. + *
+ * This should be called by dropTable implementations to clean up table files once the table has been dropped in the
+ * metastore.
+ *
+ * @param io a FileIO to use for deletes
+ * @param metadata the last valid TableMetadata instance for a dropped table.
+ */
+ public static void dropTableData(FileIO io, TableMetadata metadata) {
+ // Reads and deletes are done using Tasks.foreach(...).suppressFailureWhenFinished to complete
+ // as much of the delete work as possible and avoid orphaned data or manifest files.
+
+ Set
+ * If purge is set to true the implementation should delete all data and metadata files.
+ *
+ * @param location a path URI (e.g. hdfs:///warehouse/my_table)
+ * @param purge if true, delete all data and metadata files in the table
+ * @return true if the table was dropped, false if it did not exist
+ */
+ public boolean dropTable(String location, boolean purge) {
+ TableOperations ops = newTableOps(location);
+ TableMetadata lastMetadata = null;
+ if (ops.current() != null) {
+ if (purge) {
+ lastMetadata = ops.current();
+ }
+ } else {
+ return false;
+ }
+
+ try {
+ if (purge && lastMetadata != null) {
+ // Since the data files and the metadata files may store in different locations,
+ // so it has to call dropTableData to force delete the data file.
+ CatalogUtil.dropTableData(ops.io(), lastMetadata);
+ }
+ Path tablePath = new Path(location);
+ Util.getFs(tablePath, conf).delete(tablePath, true /* recursive */);
+ return true;
+ } catch (IOException e) {
+ throw new UncheckedIOException("Failed to delete file: " + location, e);
+ }
+ }
+
@VisibleForTesting
TableOperations newTableOps(String location) {
if (location.contains(METADATA_JSON)) {
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
similarity index 52%
rename from core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java
rename to core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
index 0b4e714655ac..7af55c7151e2 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
@@ -20,10 +20,20 @@
package org.apache.iceberg.hadoop;
import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import org.apache.iceberg.AppendFiles;
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
+import org.apache.iceberg.exceptions.NoSuchTableException;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
@@ -38,7 +48,7 @@
import static org.apache.iceberg.SortDirection.ASC;
import static org.apache.iceberg.types.Types.NestedField.required;
-public class TestHadoopTablesSortOrder {
+public class TestHadoopTables {
private static final HadoopTables TABLES = new HadoopTables();
private static final Schema SCHEMA = new Schema(
@@ -48,12 +58,54 @@ public class TestHadoopTablesSortOrder {
@Rule
public TemporaryFolder temp = new TemporaryFolder();
- private String tableLocation = null;
+ private File tableDir = null;
@Before
public void setupTableLocation() throws Exception {
- File tableDir = temp.newFolder();
- this.tableLocation = tableDir.toURI().toString();
+ tableDir = temp.newFolder();
+ }
+
+ @Test
+ public void testDropTable() {
+ TABLES.create(SCHEMA, tableDir.toURI().toString());
+ TABLES.dropTable(tableDir.toURI().toString());
+ AssertHelpers.assertThrows(
+ "Should complain about missing table", NoSuchTableException.class,
+ "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+ }
+
+ @Test
+ public void testDropTableWithPurge() throws IOException {
+ File dataDir = temp.newFolder();
+
+ createDummyTable(tableDir, dataDir);
+
+ TABLES.dropTable(tableDir.toURI().toString(), true);
+ AssertHelpers.assertThrows(
+ "Should complain about missing table", NoSuchTableException.class,
+ "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+
+ Assert.assertEquals(0, dataDir.listFiles().length);
+ Assert.assertFalse(tableDir.exists());
+
+ Assert.assertFalse(TABLES.dropTable(tableDir.toURI().toString()));
+ }
+
+ @Test
+ public void testDropTableWithoutPurge() throws IOException {
+ File dataDir = temp.newFolder();
+
+ createDummyTable(tableDir, dataDir);
+
+ TABLES.dropTable(tableDir.toURI().toString(), false);
+ AssertHelpers.assertThrows(
+ "Should complain about missing table", NoSuchTableException.class,
+ "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+
+ Assert.assertEquals(1, dataDir.listFiles().length);
+ Assert.assertFalse(tableDir.exists());
+
+ Assert.assertFalse(TABLES.dropTable(tableDir.toURI().toString()));
}
@Test
@@ -61,7 +113,7 @@ public void testDefaultSortOrder() {
PartitionSpec spec = PartitionSpec.builderFor(SCHEMA)
.bucket("data", 16)
.build();
- Table table = TABLES.create(SCHEMA, spec, tableLocation);
+ Table table = TABLES.create(SCHEMA, spec, tableDir.toURI().toString());
SortOrder sortOrder = table.sortOrder();
Assert.assertEquals("Order ID must match", 0, sortOrder.orderId());
@@ -76,7 +128,7 @@ public void testCustomSortOrder() {
SortOrder order = SortOrder.builderFor(SCHEMA)
.asc("id", NULLS_FIRST)
.build();
- Table table = TABLES.create(SCHEMA, spec, order, Maps.newHashMap(), tableLocation);
+ Table table = TABLES.create(SCHEMA, spec, order, Maps.newHashMap(), tableDir.toURI().toString());
SortOrder sortOrder = table.sortOrder();
Assert.assertEquals("Order ID must match", 1, sortOrder.orderId());
@@ -86,4 +138,22 @@ public void testCustomSortOrder() {
Transform, ?> transform = Transforms.identity(Types.IntegerType.get());
Assert.assertEquals("Transform must match", transform, sortOrder.fields().get(0).transform());
}
+
+ private static void createDummyTable(File tableDir, File dataDir) throws IOException {
+ Table table = TABLES.create(SCHEMA, tableDir.toURI().toString());
+ AppendFiles append = table.newAppend();
+ String data = dataDir.getPath() + "/data.parquet";
+ Files.write(Paths.get(data), new ArrayList<>(), StandardCharsets.UTF_8);
+ DataFile dataFile = DataFiles.builder(PartitionSpec.unpartitioned())
+ .withPath(data)
+ .withFileSizeInBytes(10)
+ .withRecordCount(1)
+ .build();
+ append.appendFile(dataFile);
+ append.commit();
+
+ // Make sure that the data file and the manifest dir is created
+ Assert.assertEquals(1, dataDir.listFiles().length);
+ Assert.assertEquals(1, tableDir.listFiles().length);
+ }
}
diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
index 4862266749e8..eca5efd89b95 100644
--- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
+++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
@@ -36,6 +36,7 @@
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.iceberg.BaseMetastoreCatalog;
+import org.apache.iceberg.CatalogUtil;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.catalog.Namespace;
@@ -140,7 +141,7 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) {
});
if (purge && lastMetadata != null) {
- dropTableData(ops.io(), lastMetadata);
+ CatalogUtil.dropTableData(ops.io(), lastMetadata);
}
LOG.info("Dropped table: {}", identifier);
diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 75790479f2ee..aba158add705 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -19,9 +19,16 @@
package org.apache.iceberg.mr;
+import java.util.HashMap;
+import java.util.Map;
import java.util.Optional;
import java.util.Properties;
+import java.util.Set;
import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.PartitionSpecParser;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.Catalog;
import org.apache.iceberg.catalog.TableIdentifier;
@@ -32,9 +39,20 @@
import org.apache.iceberg.hive.HiveCatalogs;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+/**
+ * Class for catalog resolution and accessing the common functions for {@link Catalog} API.
+ *
+ * Catalog resolution happens in this order:
+ *
+ * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
+ * the controlling properties.
+ *
+ * Used by HiveIcebergSerDe and HiveIcebergStorageHandler
+ * @param conf a Hadoop
+ * @param props the controlling properties
+ * @return an Iceberg table
+ */
public static Table loadTable(Configuration conf, Properties props) {
return loadTable(conf, props.getProperty(NAME), props.getProperty(LOCATION));
}
@@ -77,6 +104,79 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin
return new HadoopTables(conf).load(tableLocation);
}
+ /**
+ * Creates an Iceberg table using the catalog specified by the configuration.
+ *
+ * The properties should contain the following values:
+ *
+ * Other properties will be handled over to the Table creation. The controlling properties above will not be
+ * propagated.
+ * @param conf a Hadoop conf
+ * @param props the controlling properties
+ * @return the created Iceberg table
+ */
+ public static Table createTable(Configuration conf, Properties props) {
+ String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
+ Preconditions.checkNotNull(schemaString, "Table schema not set");
+ Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA));
+
+ String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC);
+ PartitionSpec spec = PartitionSpec.unpartitioned();
+ if (specString != null) {
+ spec = PartitionSpecParser.fromJson(schema, specString);
+ }
+
+ String location = props.getProperty(LOCATION);
+
+ // Create a table property map without the controlling properties
+ Map
+ * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
+ * the controlling properties.
+ * @param conf a Hadoop conf
+ * @param props the controlling properties
+ * @return the created Iceberg table
+ */
+ public static boolean dropTable(Configuration conf, Properties props) {
+ String location = props.getProperty(LOCATION);
+
+ Optional
+ *
+ */
public final class Catalogs {
private static final Logger LOG = LoggerFactory.getLogger(Catalogs.class);
@@ -44,15 +62,14 @@ public final class Catalogs {
private static final String NAME = "name";
private static final String LOCATION = "location";
+ private static final Set
+ *