From 514575eeed1ded9c4232462e568fbaf3b845fe49 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Mon, 21 Sep 2020 15:28:53 +0200
Subject: [PATCH 1/7] Implement dropTable for HadoopTables

---
 .../java/org/apache/iceberg/CatalogUtil.java  | 119 ++++++++++++++++++
 .../apache/iceberg/hadoop/HadoopCatalog.java  |   4 +-
 .../apache/iceberg/hadoop/HadoopTables.java   |  33 +++++
 ...esSortOrder.java => TestHadoopTables.java} |  78 +++++++++++-
 .../org/apache/iceberg/hive/HiveCatalog.java  |   3 +-
 5 files changed, 229 insertions(+), 8 deletions(-)
 create mode 100644 core/src/main/java/org/apache/iceberg/CatalogUtil.java
 rename core/src/test/java/org/apache/iceberg/hadoop/{TestHadoopTablesSortOrder.java => TestHadoopTables.java} (54%)
diff --git a/core/src/main/java/org/apache/iceberg/CatalogUtil.java b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
new file mode 100644
index 000000000000..e1a1f28717a1
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/CatalogUtil.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.relocated.com.google.common.base.Joiner;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+import org.apache.iceberg.relocated.com.google.common.collect.MapMaker;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.util.Tasks;
+import org.apache.iceberg.util.ThreadPools;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class CatalogUtil {
+  private static final Logger LOG = LoggerFactory.getLogger(CatalogUtil.class);
+
+  private CatalogUtil() {
+  }
+
+  /**
+   * Drops all data and metadata files referenced by TableMetadata.
+   * <p>
+   * This should be called by dropTable implementations to clean up table files once the table has been dropped in the
+   * metastore.
+   *
+   * @param io a FileIO to use for deletes
+   * @param metadata the last valid TableMetadata instance for a dropped table.
+   */
+  public static void dropTableData(FileIO io, TableMetadata metadata) {
+    // Reads and deletes are done using Tasks.foreach(...).suppressFailureWhenFinished to complete
+    // as much of the delete work as possible and avoid orphaned data or manifest files.
+
+    Set<String> manifestListsToDelete = Sets.newHashSet();
+    Set<ManifestFile> manifestsToDelete = Sets.newHashSet();
+    for (Snapshot snapshot : metadata.snapshots()) {
+      // add all manifests to the delete set because both data and delete files should be removed
+      Iterables.addAll(manifestsToDelete, snapshot.allManifests());
+      // add the manifest list to the delete set, if present
+      if (snapshot.manifestListLocation() != null) {
+        manifestListsToDelete.add(snapshot.manifestListLocation());
+      }
+    }
+
+    LOG.info("Manifests to delete: {}", Joiner.on(", ").join(manifestsToDelete));
+
+    // run all of the deletes
+
+    deleteFiles(io, manifestsToDelete);
+
+    Tasks.foreach(Iterables.transform(manifestsToDelete, ManifestFile::path))
+        .noRetry().suppressFailureWhenFinished()
+        .onFailure((manifest, exc) -> LOG.warn("Delete failed for manifest: {}", manifest, exc))
+        .run(io::deleteFile);
+
+    Tasks.foreach(manifestListsToDelete)
+        .noRetry().suppressFailureWhenFinished()
+        .onFailure((list, exc) -> LOG.warn("Delete failed for manifest list: {}", list, exc))
+        .run(io::deleteFile);
+
+    Tasks.foreach(metadata.metadataFileLocation())
+        .noRetry().suppressFailureWhenFinished()
+        .onFailure((list, exc) -> LOG.warn("Delete failed for metadata file: {}", list, exc))
+        .run(io::deleteFile);
+  }
+
+  private static void deleteFiles(FileIO io, Set<ManifestFile> allManifests) {
+    // keep track of deleted files in a map that can be cleaned up when memory runs low
+    Map<String, Boolean> deletedFiles = new MapMaker()
+        .concurrencyLevel(ThreadPools.WORKER_THREAD_POOL_SIZE)
+        .weakKeys()
+        .makeMap();
+
+    Tasks.foreach(allManifests)
+        .noRetry().suppressFailureWhenFinished()
+        .executeWith(ThreadPools.getWorkerPool())
+        .onFailure((item, exc) -> LOG.warn("Failed to get deleted files: this may cause orphaned data files", exc))
+        .run(manifest -> {
+          try (ManifestReader<?> reader = ManifestFiles.open(manifest, io)) {
+            for (ManifestEntry<?> entry : reader.entries()) {
+              // intern the file path because the weak key map uses identity (==) instead of equals
+              String path = entry.file().path().toString().intern();
+              Boolean alreadyDeleted = deletedFiles.putIfAbsent(path, true);
+              if (alreadyDeleted == null || !alreadyDeleted) {
+                try {
+                  io.deleteFile(path);
+                } catch (RuntimeException e) {
+                  // this may happen if the map of deleted files gets cleaned up by gc
+                  LOG.warn("Delete failed for data file: {}", path, e);
+                }
+              }
+            }
+          } catch (IOException e) {
+            throw new RuntimeIOException(e, "Failed to read manifest file: %s", manifest.path());
+          }
+        });
+  }
+}
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
index 235beff36567..4941b1c1469e 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
@@ -33,6 +33,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.iceberg.BaseMetastoreCatalog;
+import org.apache.iceberg.CatalogUtil;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableOperations;
@@ -196,9 +197,10 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) {
 
     try {
       if (purge && lastMetadata != null) {
+        // Since the data files and the metadata files may store in different locations,
         // Since the data files and the metadata files may store in different locations,
         // so it has to call dropTableData to force delete the data file.
-        dropTableData(ops.io(), lastMetadata);
+        CatalogUtil.dropTableData(ops.io(), lastMetadata);
       }
       fs.delete(tablePath, true /* recursive */);
       return true;
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
index d0667ebfa118..8bfafb40ff09 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
@@ -19,11 +19,13 @@
 
 package org.apache.iceberg.hadoop;
 
+import java.io.IOException;
 import java.util.Map;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.iceberg.BaseTable;
+import org.apache.iceberg.CatalogUtil;
 import org.apache.iceberg.MetadataTableType;
 import org.apache.iceberg.MetadataTableUtils;
 import org.apache.iceberg.PartitionSpec;
@@ -36,6 +38,7 @@
 import org.apache.iceberg.Tables;
 import org.apache.iceberg.exceptions.AlreadyExistsException;
 import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.exceptions.RuntimeIOException;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
@@ -144,6 +147,36 @@ public Table create(Schema schema, PartitionSpec spec, SortOrder order,
     return new BaseTable(ops, location);
   }
 
+  public boolean dropTable(String location) {
+    return dropTable(location, true);
+  }
+
+  public boolean dropTable(String location, boolean purge) {
+    // Just for checking if the table exists or not
+    load(location);
+
+    TableOperations ops = newTableOps(location);
+    TableMetadata lastMetadata;
+    if (purge && ops.current() != null) {
+      lastMetadata = ops.current();
+    } else {
+      lastMetadata = null;
+    }
+
+    try {
+      if (purge && lastMetadata != null) {
+        // Since the data files and the metadata files may store in different locations,
+        // so it has to call dropTableData to force delete the data file.
+        CatalogUtil.dropTableData(ops.io(), lastMetadata);
+      }
+      Path tablePath = new Path(location);
+      Util.getFs(tablePath, conf).delete(tablePath, true /* recursive */);
+      return true;
+    } catch (IOException e) {
+      throw new RuntimeIOException(e, "Failed to delete file: %s", location);
+    }
+  }
+
   @VisibleForTesting
   TableOperations newTableOps(String location) {
     if (location.contains(METADATA_JSON)) {
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
similarity index 54%
rename from core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java
rename to core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
index 0b4e714655ac..5bd79a457e3a 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTablesSortOrder.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
@@ -20,10 +20,20 @@
 package org.apache.iceberg.hadoop;
 
 import java.io.File;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import org.apache.iceberg.AppendFiles;
+import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataFiles;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SortOrder;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.transforms.Transform;
 import org.apache.iceberg.transforms.Transforms;
@@ -38,7 +48,7 @@
 import static org.apache.iceberg.SortDirection.ASC;
 import static org.apache.iceberg.types.Types.NestedField.required;
 
-public class TestHadoopTablesSortOrder {
+public class TestHadoopTables {
 
   private static final HadoopTables TABLES = new HadoopTables();
   private static final Schema SCHEMA = new Schema(
@@ -48,12 +58,50 @@ public class TestHadoopTablesSortOrder {
 
   @Rule
   public TemporaryFolder temp = new TemporaryFolder();
-  private String tableLocation = null;
+  private File tableDir = null;
 
   @Before
   public void setupTableLocation() throws Exception {
-    File tableDir = temp.newFolder();
-    this.tableLocation = tableDir.toURI().toString();
+    tableDir = temp.newFolder();
+  }
+
+  @Test
+  public void testDropTable() {
+    TABLES.create(SCHEMA, tableDir.toURI().toString());
+    TABLES.dropTable(tableDir.toURI().toString());
+    AssertHelpers.assertThrows(
+        "Should complain about missing table", NoSuchTableException.class,
+        "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+  }
+
+  @Test
+  public void testDropTableWithPurge() throws IOException {
+    File dataDir = temp.newFolder();
+
+    createDummyTable(tableDir, dataDir);
+
+    TABLES.dropTable(tableDir.toURI().toString(), true);
+    AssertHelpers.assertThrows(
+        "Should complain about missing table", NoSuchTableException.class,
+        "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+
+    Assert.assertEquals(0, dataDir.listFiles().length);
+    Assert.assertFalse(tableDir.exists());
+  }
+
+  @Test
+  public void testDropTableWithoutPurge() throws IOException {
+    File dataDir = temp.newFolder();
+
+    createDummyTable(tableDir, dataDir);
+
+    TABLES.dropTable(tableDir.toURI().toString(), false);
+    AssertHelpers.assertThrows(
+        "Should complain about missing table", NoSuchTableException.class,
+        "Table does not exist", () -> TABLES.load(tableDir.toURI().toString()));
+
+    Assert.assertEquals(1, dataDir.listFiles().length);
+    Assert.assertFalse(tableDir.exists());
   }
 
   @Test
@@ -61,7 +109,7 @@ public void testDefaultSortOrder() {
     PartitionSpec spec = PartitionSpec.builderFor(SCHEMA)
         .bucket("data", 16)
         .build();
-    Table table = TABLES.create(SCHEMA, spec, tableLocation);
+    Table table = TABLES.create(SCHEMA, spec, tableDir.toURI().toString());
 
     SortOrder sortOrder = table.sortOrder();
     Assert.assertEquals("Order ID must match", 0, sortOrder.orderId());
@@ -76,7 +124,7 @@ public void testCustomSortOrder() {
     SortOrder order = SortOrder.builderFor(SCHEMA)
         .asc("id", NULLS_FIRST)
         .build();
-    Table table = TABLES.create(SCHEMA, spec, order, Maps.newHashMap(), tableLocation);
+    Table table = TABLES.create(SCHEMA, spec, order, Maps.newHashMap(), tableDir.toURI().toString());
 
     SortOrder sortOrder = table.sortOrder();
     Assert.assertEquals("Order ID must match", 1, sortOrder.orderId());
@@ -86,4 +134,22 @@ public void testCustomSortOrder() {
     Transform<?, ?> transform = Transforms.identity(Types.IntegerType.get());
     Assert.assertEquals("Transform must match", transform, sortOrder.fields().get(0).transform());
   }
+
+  private static void createDummyTable(File tableDir, File dataDir) throws IOException {
+    Table table = TABLES.create(SCHEMA, tableDir.toURI().toString());
+    AppendFiles append = table.newAppend();
+    String data = dataDir.getPath() + "/data.parquet";
+    Files.write(Paths.get(data), new ArrayList<>(), StandardCharsets.UTF_8);
+    DataFile dataFile = DataFiles.builder(PartitionSpec.unpartitioned())
+        .withPath(data)
+        .withFileSizeInBytes(10)
+        .withRecordCount(1)
+        .build();
+    append.appendFile(dataFile);
+    append.commit();
+
+    // Make sure that the data file and the manifest dir is created
+    Assert.assertEquals(1, dataDir.listFiles().length);
+    Assert.assertEquals(1, tableDir.listFiles().length);
+  }
 }
diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
index 4862266749e8..eca5efd89b95 100644
--- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
+++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java
@@ -36,6 +36,7 @@
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.iceberg.BaseMetastoreCatalog;
+import org.apache.iceberg.CatalogUtil;
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableOperations;
 import org.apache.iceberg.catalog.Namespace;
@@ -140,7 +141,7 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) {
       });
 
       if (purge && lastMetadata != null) {
-        dropTableData(ops.io(), lastMetadata);
+        CatalogUtil.dropTableData(ops.io(), lastMetadata);
       }
 
       LOG.info("Dropped table: {}", identifier);

From a1207f1b2dfc78ede59bc41825aab4c6374d79f8 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Mon, 21 Sep 2020 15:52:54 +0200
Subject: [PATCH 2/7] Implement Catalogs.createTable, Catalogs.dropTable

---
 .../java/org/apache/iceberg/mr/Catalogs.java  | 49 ++++++++++
 .../apache/iceberg/mr/InputFormatConfig.java  |  1 +
 .../org/apache/iceberg/mr/TestCatalogs.java   | 92 +++++++++++++++++++
 3 files changed, 142 insertions(+)

diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 75790479f2ee..ee3992862118 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -19,9 +19,15 @@
 
 package org.apache.iceberg.mr;
 
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.PartitionSpecParser;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SchemaParser;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.catalog.TableIdentifier;
@@ -77,6 +83,49 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin
     return new HadoopTables(conf).load(tableLocation);
   }
 
+  public static Table createTable(Configuration conf, Properties props) {
+    String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
+    Preconditions.checkNotNull(schemaString, "Table schema not set");
+    Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA));
+
+    String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC);
+    PartitionSpec spec = PartitionSpec.unpartitioned();
+    if (specString != null) {
+      spec = PartitionSpecParser.fromJson(schema, specString);
+    }
+
+    String location = props.getProperty(LOCATION);
+
+    Map<String, String> map = new HashMap<>(props.size());
+    props.forEach((k, v) -> map.put(k.toString(), v.toString()));
+
+    Optional<Catalog> catalog = loadCatalog(conf);
+
+    if (catalog.isPresent()) {
+      String name = props.getProperty(NAME);
+      Preconditions.checkNotNull(name, "Table identifier not set");
+      return catalog.get().createTable(TableIdentifier.parse(name), schema, spec, location, map);
+    }
+
+    Preconditions.checkNotNull(location, "Table location not set");
+    return new HadoopTables(conf).create(schema, spec, map, location);
+  }
+
+  public static boolean dropTable(Configuration conf, Properties props) {
+    String location = props.getProperty(LOCATION);
+
+    Optional<Catalog> catalog = loadCatalog(conf);
+
+    if (catalog.isPresent()) {
+      String name = props.getProperty(NAME);
+      Preconditions.checkNotNull(name, "Table identifier not set");
+      return catalog.get().dropTable(TableIdentifier.parse(name));
+    }
+
+    Preconditions.checkNotNull(location, "Table location not set");
+    return new HadoopTables(conf).dropTable(location);
+  }
+
   @VisibleForTesting
   static Optional<Catalog> loadCatalog(Configuration conf) {
     String catalogLoaderClass = conf.get(InputFormatConfig.CATALOG_LOADER_CLASS);
diff --git a/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
index c7d45453b172..1f2502559356 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
@@ -43,6 +43,7 @@ private InputFormatConfig() {
   public static final String TABLE_IDENTIFIER = "iceberg.mr.table.identifier";
   public static final String TABLE_LOCATION = "iceberg.mr.table.location";
   public static final String TABLE_SCHEMA = "iceberg.mr.table.schema";
+  public static final String PARTITION_SPEC = "iceberg.mr.table.partition.spec";
   public static final String LOCALITY = "iceberg.mr.locality";
   public static final String CATALOG = "iceberg.mr.catalog";
   public static final String HADOOP_CATALOG_WAREHOUSE_LOCATION = "iceberg.mr.catalog.hadoop.warehouse.location";
diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
index f92ba2f4afb5..3c6a4fca04a5 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
@@ -21,13 +21,18 @@
 
 import java.io.IOException;
 import java.util.Optional;
+import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.AssertHelpers;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.PartitionSpecParser;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.SchemaParser;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.exceptions.NoSuchNamespaceException;
+import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.hadoop.HadoopCatalog;
 import org.apache.iceberg.hadoop.HadoopTables;
 import org.apache.iceberg.hive.HiveCatalog;
@@ -43,6 +48,7 @@
 public class TestCatalogs {
 
   private static final Schema SCHEMA = new Schema(required(1, "foo", Types.StringType.get()));
+  private static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).identity("foo").build();
 
   private Configuration conf;
 
@@ -85,6 +91,92 @@ public void testLoadTableFromCatalog() throws IOException {
     Assert.assertEquals(hadoopCatalogTable.location(), Catalogs.loadTable(conf).location());
   }
 
+  @Test
+  public void testCreateDropTableToLocation() throws IOException {
+    Properties missingSchema = new Properties();
+    missingSchema.put("location", temp.newFolder("hadoop_tables").toString());
+    AssertHelpers.assertThrows(
+        "Should complain about table schema not set", NullPointerException.class,
+        "schema not set", () -> Catalogs.createTable(conf, missingSchema));
+
+    Properties missingLocation = new Properties();
+    missingLocation.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
+    AssertHelpers.assertThrows(
+        "Should complain about table location not set", NullPointerException.class,
+        "location not set", () -> Catalogs.createTable(conf, missingLocation));
+
+    Properties properties = new Properties();
+    properties.put("location", temp.toString() + "/hadoop_tables");
+    properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
+    properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
+
+    Catalogs.createTable(conf, properties);
+
+    HadoopTables tables = new HadoopTables();
+    Table table = tables.load(properties.getProperty("location"));
+
+    Assert.assertEquals(properties.getProperty("location"), table.location());
+    Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
+    Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
+
+    AssertHelpers.assertThrows(
+        "Should complain about table location not set", NullPointerException.class,
+        "location not set", () -> Catalogs.dropTable(conf, new Properties()));
+
+    Properties dropProperties = new Properties();
+    dropProperties.put("location", temp.toString() + "/hadoop_tables");
+    Catalogs.dropTable(conf, dropProperties);
+
+    AssertHelpers.assertThrows(
+        "Should complain about table not found", NoSuchTableException.class,
+        "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
+  }
+
+  @Test
+  public void testCreateDropTableToCatalog() throws IOException {
+    TableIdentifier identifier = TableIdentifier.of("test", "table");
+
+    conf.set("warehouse.location", temp.newFolder("hadoop", "warehouse").toString());
+    conf.setClass(InputFormatConfig.CATALOG_LOADER_CLASS, CustomHadoopCatalogLoader.class, CatalogLoader.class);
+
+    Properties missingSchema = new Properties();
+    missingSchema.put("name", identifier.toString());
+    AssertHelpers.assertThrows(
+        "Should complain about table schema not set", NullPointerException.class,
+        "schema not set", () -> Catalogs.createTable(conf, missingSchema));
+
+    Properties missingIdentifier = new Properties();
+    missingIdentifier.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
+    AssertHelpers.assertThrows(
+        "Should complain about table identifier not set", NullPointerException.class,
+        "identifier not set", () -> Catalogs.createTable(conf, missingIdentifier));
+
+    Properties properties = new Properties();
+    properties.put("name", identifier.toString());
+    properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
+    properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
+
+    Catalogs.createTable(conf, properties);
+
+    HadoopCatalog catalog = new CustomHadoopCatalog(conf);
+    Table table = catalog.loadTable(identifier);
+
+    Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
+    Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
+
+    AssertHelpers.assertThrows(
+        "Should complain about table identifier not set", NullPointerException.class,
+        "identifier not set", () -> Catalogs.dropTable(conf, new Properties()));
+
+    Properties dropProperties = new Properties();
+    dropProperties.put("name", identifier.toString());
+    Catalogs.dropTable(conf, dropProperties);
+
+    AssertHelpers.assertThrows(
+        "Should complain about table not found", NoSuchTableException.class,
+        "Table does not exist", () -> Catalogs.loadTable(conf, dropProperties));
+  }
+
   @Test
   public void testLoadCatalog() throws IOException {
     Assert.assertFalse(Catalogs.loadCatalog(conf).isPresent());

From e21ad740a497b209a4ae7fcb082ce77bb31857e2 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Tue, 22 Sep 2020 10:01:00 +0200
Subject: [PATCH 3/7] Controlling properties should not be pushed to the actual
 table creation. Javadoc is revisited

---
 .../apache/iceberg/hadoop/HadoopTables.java   | 16 ++++++
 .../java/org/apache/iceberg/mr/Catalogs.java  | 57 +++++++++++++++++--
 .../org/apache/iceberg/mr/TestCatalogs.java   |  5 ++
 3 files changed, 72 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
index 8bfafb40ff09..e2b32f5966f3 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
@@ -147,10 +147,26 @@ public Table create(Schema schema, PartitionSpec spec, SortOrder order,
     return new BaseTable(ops, location);
   }
 
+  /**
+   * Drop a table and delete all data and metadata files. Throws NoSuchTableException if the table does not exists.
+   *
+   * @param location a path URI (e.g. hdfs:///warehouse/my_table)
+   * @return true if the table was dropped
+   */
   public boolean dropTable(String location) {
     return dropTable(location, true);
   }
 
+  /**
+   * Drop a table; optionally delete data and metadata files.
+   * <p>
+   * If purge is set to true the implementation should delete all data and metadata files.
+   * Throws NoSuchTableException if the table does not exists.
+   *
+   * @param location a path URI (e.g. hdfs:///warehouse/my_table)
+   * @param purge if true, delete all data and metadata files in the table
+   * @return true if the table was dropped
+   */
   public boolean dropTable(String location, boolean purge) {
     // Just for checking if the table exists or not
     load(location);
diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index ee3992862118..41877302bd77 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -20,9 +20,13 @@
 package org.apache.iceberg.mr;
 
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.PartitionSpecParser;
@@ -41,6 +45,13 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Class for catalog resolution and accessing the common functions for {@Catalog} API.
+ * Catalog resolution happens in this order:
+ * 1. Custom catalog if specified by {@link InputFormatConfig#CATALOG_LOADER_CLASS}
+ * 2. Hadoop or Hive catalog if specified by {@link InputFormatConfig#CATALOG}
+ * 3. Hadoop Tables
+ */
 public final class Catalogs {
   private static final Logger LOG = LoggerFactory.getLogger(Catalogs.class);
 
@@ -50,15 +61,15 @@ public final class Catalogs {
   private static final String NAME = "name";
   private static final String LOCATION = "location";
 
+  private static final Set<String> PROPERTIES_TO_REMOVE = Stream
+      .of(InputFormatConfig.TABLE_SCHEMA, InputFormatConfig.PARTITION_SPEC, LOCATION, NAME)
+      .collect(Collectors.toCollection(HashSet::new));
+
   private Catalogs() {
   }
 
   /**
    * Load an Iceberg table using the catalog and table identifier (or table path) specified by the configuration.
-   * Catalog resolution happens in this order:
-   * 1. Custom catalog if specified by {@link InputFormatConfig#CATALOG_LOADER_CLASS}
-   * 2. Hadoop or Hive catalog if specified by {@link InputFormatConfig#CATALOG}
-   * 3. Hadoop Tables
    * @param conf a Hadoop conf
    * @return an Iceberg table
    */
@@ -66,7 +77,15 @@ public static Table loadTable(Configuration conf) {
     return loadTable(conf, conf.get(InputFormatConfig.TABLE_IDENTIFIER), conf.get(InputFormatConfig.TABLE_LOCATION));
   }
 
-  // For use in HiveIcebergSerDe and HiveIcebergStorageHandler
+  /**
+   * Load an Iceberg table using the catalog specified by the configuration.
+   * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
+   * the controlling properties.
+   * Used by HiveIcebergSerDe and HiveIcebergStorageHandler
+   * @param conf a Hadoop
+   * @param props the controlling properties
+   * @return an Iceberg table
+   */
   public static Table loadTable(Configuration conf, Properties props) {
     return loadTable(conf, props.getProperty(NAME), props.getProperty(LOCATION));
   }
@@ -83,6 +102,19 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin
     return new HadoopTables(conf).load(tableLocation);
   }
 
+  /**
+   * Creates an Iceberg table using the catalog specified by the configuration.
+   * The properties should contain the following values:
+   *  - Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
+   *  - Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
+   *  - Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
+   *  not provided
+   * Other properties will be handled over to the Table creation. The controlling properties above will not be
+   * propagated.
+   * @param conf a Hadoop conf
+   * @param props the controlling properties
+   * @return the created Iceberg table
+   */
   public static Table createTable(Configuration conf, Properties props) {
     String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
     Preconditions.checkNotNull(schemaString, "Table schema not set");
@@ -96,8 +128,13 @@ public static Table createTable(Configuration conf, Properties props) {
 
     String location = props.getProperty(LOCATION);
 
+    // Create a table property map without the controlling properties
     Map<String, String> map = new HashMap<>(props.size());
-    props.forEach((k, v) -> map.put(k.toString(), v.toString()));
+    for (Object key : props.keySet()) {
+      if (!PROPERTIES_TO_REMOVE.contains(key)) {
+        map.put(key.toString(), props.get(key).toString());
+      }
+    }
 
     Optional<Catalog> catalog = loadCatalog(conf);
 
@@ -111,6 +148,14 @@ public static Table createTable(Configuration conf, Properties props) {
     return new HadoopTables(conf).create(schema, spec, map, location);
   }
 
+  /**
+   * Drops an Iceberg table using the catalog specified by the configuration.
+   * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
+   * the controlling properties.
+   * @param conf a Hadoop conf
+   * @param props the controlling properties
+   * @return the created Iceberg table
+   */
   public static boolean dropTable(Configuration conf, Properties props) {
     String location = props.getProperty(LOCATION);
 
diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
index 3c6a4fca04a5..2cb27d0029f2 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.mr;
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Optional;
 import java.util.Properties;
 import org.apache.hadoop.conf.Configuration;
@@ -109,6 +110,7 @@ public void testCreateDropTableToLocation() throws IOException {
     properties.put("location", temp.toString() + "/hadoop_tables");
     properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
     properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
+    properties.put("dummy", "test");
 
     Catalogs.createTable(conf, properties);
 
@@ -118,6 +120,7 @@ public void testCreateDropTableToLocation() throws IOException {
     Assert.assertEquals(properties.getProperty("location"), table.location());
     Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
     Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
+    Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
 
     AssertHelpers.assertThrows(
         "Should complain about table location not set", NullPointerException.class,
@@ -155,6 +158,7 @@ public void testCreateDropTableToCatalog() throws IOException {
     properties.put("name", identifier.toString());
     properties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(SCHEMA));
     properties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC));
+    properties.put("dummy", "test");
 
     Catalogs.createTable(conf, properties);
 
@@ -163,6 +167,7 @@ public void testCreateDropTableToCatalog() throws IOException {
 
     Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema()));
     Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec()));
+    Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties());
 
     AssertHelpers.assertThrows(
         "Should complain about table identifier not set", NullPointerException.class,

From 3df7efd418452c837ef566c39ecf17d40c9d1bcb Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Tue, 22 Sep 2020 14:45:16 +0200
Subject: [PATCH 4/7] Fixing comment formatting

---
 mr/src/main/java/org/apache/iceberg/mr/Catalogs.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 41877302bd77..ee594fd1ce9a 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -105,10 +105,12 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin
   /**
    * Creates an Iceberg table using the catalog specified by the configuration.
    * The properties should contain the following values:
-   *  - Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
-   *  - Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
-   *  - Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
+   * <p><ul>
+   * <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
+   * <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
+   * <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
    *  not provided
+   * </ul><p>
    * Other properties will be handled over to the Table creation. The controlling properties above will not be
    * propagated.
    * @param conf a Hadoop conf

From fc28ebf41301e49939b571811645458152190dc8 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Wed, 23 Sep 2020 09:12:57 +0200
Subject: [PATCH 5/7] Javadoc warning fixed

---
 mr/src/main/java/org/apache/iceberg/mr/Catalogs.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index ee594fd1ce9a..44481ece20d6 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -46,7 +46,7 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Class for catalog resolution and accessing the common functions for {@Catalog} API.
+ * Class for catalog resolution and accessing the common functions for {@link Catalog} API.
  * Catalog resolution happens in this order:
  * 1. Custom catalog if specified by {@link InputFormatConfig#CATALOG_LOADER_CLASS}
  * 2. Hadoop or Hive catalog if specified by {@link InputFormatConfig#CATALOG}

From 1960cde82595ac31d5c2beee5e806ae6bb9c5eb6 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Thu, 24 Sep 2020 13:11:57 +0200
Subject: [PATCH 6/7] Addressed review comments

---
 .../apache/iceberg/BaseMetastoreCatalog.java  |  2 ++
 .../apache/iceberg/hadoop/HadoopCatalog.java  |  1 -
 .../apache/iceberg/hadoop/HadoopTables.java   | 22 ++++++++---------
 .../iceberg/hadoop/TestHadoopTables.java      |  8 +++++++
 .../java/org/apache/iceberg/mr/Catalogs.java  | 24 +++++++++++--------
 5 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
index 2efcf80d2b9b..0139ec4f610b 100644
--- a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
+++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java
@@ -286,7 +286,9 @@ private Transaction newReplaceTableTransaction(boolean orCreate) {
    *
    * @param io a FileIO to use for deletes
    * @param metadata the last valid TableMetadata instance for a dropped table.
+   * @deprecated will be removed in 0.11.0; use CatalogUtil.dropTableData instead.
    */
+  @Deprecated
   protected static void dropTableData(FileIO io, TableMetadata metadata) {
     // Reads and deletes are done using Tasks.foreach(...).suppressFailureWhenFinished to complete
     // as much of the delete work as possible and avoid orphaned data or manifest files.
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
index 4941b1c1469e..e9408d7f2096 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopCatalog.java
@@ -197,7 +197,6 @@ public boolean dropTable(TableIdentifier identifier, boolean purge) {
 
     try {
       if (purge && lastMetadata != null) {
-        // Since the data files and the metadata files may store in different locations,
         // Since the data files and the metadata files may store in different locations,
         // so it has to call dropTableData to force delete the data file.
         CatalogUtil.dropTableData(ops.io(), lastMetadata);
diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
index e2b32f5966f3..01e01c63c1e2 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.hadoop;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.Map;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
@@ -38,7 +39,6 @@
 import org.apache.iceberg.Tables;
 import org.apache.iceberg.exceptions.AlreadyExistsException;
 import org.apache.iceberg.exceptions.NoSuchTableException;
-import org.apache.iceberg.exceptions.RuntimeIOException;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
@@ -148,10 +148,11 @@ public Table create(Schema schema, PartitionSpec spec, SortOrder order,
   }
 
   /**
-   * Drop a table and delete all data and metadata files. Throws NoSuchTableException if the table does not exists.
+   * Drop a table and delete all data and metadata files.
    *
    * @param location a path URI (e.g. hdfs:///warehouse/my_table)
    * @return true if the table was dropped
+   * @throws NoSuchTableException if the table does not exists.
    */
   public boolean dropTable(String location) {
     return dropTable(location, true);
@@ -161,22 +162,21 @@ public boolean dropTable(String location) {
    * Drop a table; optionally delete data and metadata files.
    * <p>
    * If purge is set to true the implementation should delete all data and metadata files.
-   * Throws NoSuchTableException if the table does not exists.
    *
    * @param location a path URI (e.g. hdfs:///warehouse/my_table)
    * @param purge if true, delete all data and metadata files in the table
    * @return true if the table was dropped
+   * @throws NoSuchTableException if the table does not exists.
    */
   public boolean dropTable(String location, boolean purge) {
-    // Just for checking if the table exists or not
-    load(location);
-
     TableOperations ops = newTableOps(location);
-    TableMetadata lastMetadata;
-    if (purge && ops.current() != null) {
-      lastMetadata = ops.current();
+    TableMetadata lastMetadata = null;
+    if (ops.current() != null) {
+      if (purge) {
+        lastMetadata = ops.current();
+      }
     } else {
-      lastMetadata = null;
+      throw new NoSuchTableException("Table does not exist at location: %s, so it can not be dropped", location);
     }
 
     try {
@@ -189,7 +189,7 @@ public boolean dropTable(String location, boolean purge) {
       Util.getFs(tablePath, conf).delete(tablePath, true /* recursive */);
       return true;
     } catch (IOException e) {
-      throw new RuntimeIOException(e, "Failed to delete file: %s", location);
+      throw new UncheckedIOException("Failed to delete file: " + location, e);
     }
   }
 
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
index 5bd79a457e3a..719c9ef6330a 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
@@ -87,6 +87,10 @@ public void testDropTableWithPurge() throws IOException {
 
     Assert.assertEquals(0, dataDir.listFiles().length);
     Assert.assertFalse(tableDir.exists());
+
+    AssertHelpers.assertThrows(
+        "Should complain about missing table", NoSuchTableException.class,
+        "Table does not exist", () -> TABLES.dropTable(tableDir.toURI().toString()));
   }
 
   @Test
@@ -102,6 +106,10 @@ public void testDropTableWithoutPurge() throws IOException {
 
     Assert.assertEquals(1, dataDir.listFiles().length);
     Assert.assertFalse(tableDir.exists());
+
+    AssertHelpers.assertThrows(
+        "Should complain about missing table", NoSuchTableException.class,
+        "Table does not exist", () -> TABLES.dropTable(tableDir.toURI().toString()));
   }
 
   @Test
diff --git a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
index 44481ece20d6..aba158add705 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/Catalogs.java
@@ -20,13 +20,10 @@
 package org.apache.iceberg.mr;
 
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Properties;
 import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.PartitionSpecParser;
@@ -42,15 +39,19 @@
 import org.apache.iceberg.hive.HiveCatalogs;
 import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
  * Class for catalog resolution and accessing the common functions for {@link Catalog} API.
+ * <p>
  * Catalog resolution happens in this order:
- * 1. Custom catalog if specified by {@link InputFormatConfig#CATALOG_LOADER_CLASS}
- * 2. Hadoop or Hive catalog if specified by {@link InputFormatConfig#CATALOG}
- * 3. Hadoop Tables
+ * <ol>
+ * <li>Custom catalog if specified by {@link InputFormatConfig#CATALOG_LOADER_CLASS}
+ * <li>Hadoop or Hive catalog if specified by {@link InputFormatConfig#CATALOG}
+ * <li>Hadoop Tables
+ * </ol>
  */
 public final class Catalogs {
   private static final Logger LOG = LoggerFactory.getLogger(Catalogs.class);
@@ -61,9 +62,8 @@ public final class Catalogs {
   private static final String NAME = "name";
   private static final String LOCATION = "location";
 
-  private static final Set<String> PROPERTIES_TO_REMOVE = Stream
-      .of(InputFormatConfig.TABLE_SCHEMA, InputFormatConfig.PARTITION_SPEC, LOCATION, NAME)
-      .collect(Collectors.toCollection(HashSet::new));
+  private static final Set<String> PROPERTIES_TO_REMOVE =
+      ImmutableSet.of(InputFormatConfig.TABLE_SCHEMA, InputFormatConfig.PARTITION_SPEC, LOCATION, NAME);
 
   private Catalogs() {
   }
@@ -79,8 +79,10 @@ public static Table loadTable(Configuration conf) {
 
   /**
    * Load an Iceberg table using the catalog specified by the configuration.
+   * <p>
    * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
    * the controlling properties.
+   * <p>
    * Used by HiveIcebergSerDe and HiveIcebergStorageHandler
    * @param conf a Hadoop
    * @param props the controlling properties
@@ -104,8 +106,9 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin
 
   /**
    * Creates an Iceberg table using the catalog specified by the configuration.
+   * <p>
    * The properties should contain the following values:
-   * <p><ul>
+   * <ul>
    * <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
    * <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
    * <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
@@ -152,6 +155,7 @@ public static Table createTable(Configuration conf, Properties props) {
 
   /**
    * Drops an Iceberg table using the catalog specified by the configuration.
+   * <p>
    * The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
    * the controlling properties.
    * @param conf a Hadoop conf

From 66bb2e8999c174579d7c4e48f1f4cd5f7dd4d987 Mon Sep 17 00:00:00 2001
From: Peter Vary <pvary@cloudera.com>
Date: Fri, 25 Sep 2020 13:03:40 +0200
Subject: [PATCH 7/7] Do not throw if the table did not exist

---
 .../main/java/org/apache/iceberg/hadoop/HadoopTables.java | 8 +++-----
 .../java/org/apache/iceberg/hadoop/TestHadoopTables.java  | 8 ++------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
index 01e01c63c1e2..f8c59f48a881 100644
--- a/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
+++ b/core/src/main/java/org/apache/iceberg/hadoop/HadoopTables.java
@@ -151,8 +151,7 @@ public Table create(Schema schema, PartitionSpec spec, SortOrder order,
    * Drop a table and delete all data and metadata files.
    *
    * @param location a path URI (e.g. hdfs:///warehouse/my_table)
-   * @return true if the table was dropped
-   * @throws NoSuchTableException if the table does not exists.
+   * @return true if the table was dropped, false if it did not exist
    */
   public boolean dropTable(String location) {
     return dropTable(location, true);
@@ -165,8 +164,7 @@ public boolean dropTable(String location) {
    *
    * @param location a path URI (e.g. hdfs:///warehouse/my_table)
    * @param purge if true, delete all data and metadata files in the table
-   * @return true if the table was dropped
-   * @throws NoSuchTableException if the table does not exists.
+   * @return true if the table was dropped, false if it did not exist
    */
   public boolean dropTable(String location, boolean purge) {
     TableOperations ops = newTableOps(location);
@@ -176,7 +174,7 @@ public boolean dropTable(String location, boolean purge) {
         lastMetadata = ops.current();
       }
     } else {
-      throw new NoSuchTableException("Table does not exist at location: %s, so it can not be dropped", location);
+      return false;
     }
 
     try {
diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
index 719c9ef6330a..7af55c7151e2 100644
--- a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
+++ b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopTables.java
@@ -88,9 +88,7 @@ public void testDropTableWithPurge() throws IOException {
     Assert.assertEquals(0, dataDir.listFiles().length);
     Assert.assertFalse(tableDir.exists());
 
-    AssertHelpers.assertThrows(
-        "Should complain about missing table", NoSuchTableException.class,
-        "Table does not exist", () -> TABLES.dropTable(tableDir.toURI().toString()));
+    Assert.assertFalse(TABLES.dropTable(tableDir.toURI().toString()));
   }
 
   @Test
@@ -107,9 +105,7 @@ public void testDropTableWithoutPurge() throws IOException {
     Assert.assertEquals(1, dataDir.listFiles().length);
     Assert.assertFalse(tableDir.exists());
 
-    AssertHelpers.assertThrows(
-        "Should complain about missing table", NoSuchTableException.class,
-        "Table does not exist", () -> TABLES.dropTable(tableDir.toURI().toString()));
+    Assert.assertFalse(TABLES.dropTable(tableDir.toURI().toString()));
   }
 
   @Test