diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index 48cbb049f31a..d94f449b4bb4 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -28,9 +28,14 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import org.apache.iceberg.mapping.MappingUtil; +import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @@ -39,6 +44,7 @@ * Schema evolution API implementation. */ class SchemaUpdate implements UpdateSchema { + private static final Logger LOG = LoggerFactory.getLogger(SchemaUpdate.class); private static final int TABLE_ROOT_ID = -1; private final TableOperations ops; @@ -200,7 +206,7 @@ public Schema apply() { @Override public void commit() { - TableMetadata update = base.updateSchema(apply(), lastColumnId); + TableMetadata update = applyChangesToMapping(base.updateSchema(apply(), lastColumnId)); ops.commit(base, update); } @@ -210,6 +216,30 @@ private int assignNewColumnId() { return next; } + private TableMetadata applyChangesToMapping(TableMetadata metadata) { + String mappingJson = metadata.property(TableProperties.DEFAULT_NAME_MAPPING, null); + if (mappingJson != null) { + try { + // parse and update the mapping + NameMapping mapping = NameMappingParser.fromJson(mappingJson); + NameMapping updated = MappingUtil.update(mapping, updates, adds); + + // replace the table property + Map updatedProperties = Maps.newHashMap(); + updatedProperties.putAll(metadata.properties()); + updatedProperties.put(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(updated)); + + return metadata.replaceProperties(updatedProperties); + + } catch (RuntimeException e) { + // log the error, but do not fail the update + LOG.warn("Failed to update external schema mapping: {}", mappingJson, e); + } + } + + return metadata; + } + private static Schema applyChanges(Schema schema, List deletes, Map updates, Multimap adds) { diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index ecb0f31d57ca..4dd778d165b8 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -80,7 +80,7 @@ private TableProperties() {} // This only applies to files written after this property is set. Files previously written aren't // relocated to reflect this parameter. - // If not set, defaults to a "meatdata" folder underneath the root path of the table. + // If not set, defaults to a "metadata" folder underneath the root path of the table. public static final String WRITE_METADATA_LOCATION = "write.metadata.path"; public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled"; @@ -91,4 +91,6 @@ private TableProperties() {} public static final String DEFAULT_WRITE_METRICS_MODE = "write.metadata.metrics.default"; public static final String DEFAULT_WRITE_METRICS_MODE_DEFAULT = "truncate(16)"; + + public static final String DEFAULT_NAME_MAPPING = "schema.name-mapping.default"; } diff --git a/core/src/main/java/org/apache/iceberg/mapping/MappedField.java b/core/src/main/java/org/apache/iceberg/mapping/MappedField.java new file mode 100644 index 000000000000..2a56afac4563 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/mapping/MappedField.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableSet; +import java.util.Objects; +import java.util.Set; + +/** + * An immutable mapping between a field ID and a set of names. + */ +public class MappedField { + + static MappedField of(Integer id, String name) { + return new MappedField(id, ImmutableSet.of(name), null); + } + + static MappedField of(Integer id, Iterable names) { + return new MappedField(id, names, null); + } + + static MappedField of(Integer id, String name, MappedFields nestedMapping) { + return new MappedField(id, ImmutableSet.of(name), nestedMapping); + } + + static MappedField of(Integer id, Iterable names, MappedFields nestedMapping) { + return new MappedField(id, names, nestedMapping); + } + + private final Set names; + private Integer id; + private MappedFields nestedMapping; + + private MappedField(Integer id, Iterable names, MappedFields nested) { + this.id = id; + this.names = ImmutableSet.copyOf(names); + this.nestedMapping = nested; + } + + public Integer id() { + return id; + } + + public Set names() { + return names; + } + + public MappedFields nestedMapping() { + return nestedMapping; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + MappedField that = (MappedField) other; + return names.equals(that.names) && + Objects.equals(id, that.id) && + Objects.equals(nestedMapping, that.nestedMapping); + } + + @Override + public int hashCode() { + return Objects.hash(names, id, nestedMapping); + } + + @Override + public String toString() { + return "([" + Joiner.on(", ").join(names) + "] -> " + (id != null ? id : "?") + + (nestedMapping != null ? ", " + nestedMapping + ")" : ")"); + } +} diff --git a/core/src/main/java/org/apache/iceberg/mapping/MappedFields.java b/core/src/main/java/org/apache/iceberg/mapping/MappedFields.java new file mode 100644 index 000000000000..824257b88761 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/mapping/MappedFields.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class MappedFields { + + static MappedFields of(MappedField... fields) { + return new MappedFields(ImmutableList.copyOf(fields)); + } + + static MappedFields of(List fields) { + return new MappedFields(fields); + } + + private final List fields; + private final Map nameToId; + private final Map idToField; + + private MappedFields(List fields) { + this.fields = ImmutableList.copyOf(fields); + this.nameToId = indexIds(fields); + this.idToField = indexFields(fields); + } + + public MappedField field(int id) { + return idToField.get(id); + } + + public Integer id(String name) { + return nameToId.get(name); + } + + public int size() { + return fields.size(); + } + + private static Map indexIds(List fields) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + fields.forEach(field -> + field.names().forEach(name -> { + Integer id = field.id(); + if (id != null) { + builder.put(name, id); + } + })); + return builder.build(); + } + + private static Map indexFields(List fields) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + fields.forEach(field -> { + Integer id = field.id(); + if (id != null) { + builder.put(id, field); + } + }); + return builder.build(); + } + + public List fields() { + return fields; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + return fields.equals(((MappedFields) other).fields); + } + + @Override + public int hashCode() { + return Objects.hash(fields); + } + + @Override + public String toString() { + return "[ " + Joiner.on(", ").join(fields) + " ]"; + } +} diff --git a/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java b/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java new file mode 100644 index 000000000000..95d8b4e047a5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/mapping/MappingUtil.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.types.Types; + +public class MappingUtil { + private static final Joiner DOT = Joiner.on('.'); + + private MappingUtil() { + } + + /** + * Create a name-based mapping for a schema. + *

+ * The mapping returned by this method will use the schema's name for each field. + * + * @param schema a {@link Schema} + * @return a {@link NameMapping} initialized with the schema's fields and names + */ + public static NameMapping create(Schema schema) { + return new NameMapping(TypeUtil.visit(schema, CreateMapping.INSTANCE)); + } + + /** + * Update a name-based mapping using changes to a schema. + * + * @param mapping a name-based mapping + * @param updates a map from field ID to updated field definitions + * @param adds a map from parent field ID to nested fields to be added + * @return an updated mapping with names added to renamed fields and the mapping extended for new fields + */ + public static NameMapping update(NameMapping mapping, + Map updates, + Multimap adds) { + return new NameMapping(visit(mapping, new UpdateMapping(updates, adds))); + } + + static Map indexById(MappedFields mapping) { + return visit(mapping, new IndexById()); + } + + static Map indexByName(MappedFields mapping) { + return visit(mapping, IndexByName.INSTANCE); + } + + private static class UpdateMapping implements Visitor { + private final Map updates; + private final Multimap adds; + + private UpdateMapping(Map updates, Multimap adds) { + this.updates = updates; + this.adds = adds; + } + + @Override + public MappedFields mapping(NameMapping mapping, MappedFields result) { + return addNewFields(result, -1 /* parent ID used to add top-level fields */); + } + + @Override + public MappedFields fields(MappedFields fields, List fieldResults) { + return MappedFields.of(fieldResults); + } + + @Override + public MappedField field(MappedField field, MappedFields fieldResult) { + // update this field's names + Set fieldNames = Sets.newHashSet(field.names()); + Types.NestedField update = updates.get(field.id()); + if (update != null) { + fieldNames.add(update.name()); + } + + // add a new mapping for any new nested fields + MappedFields nestedMapping = addNewFields(fieldResult, field.id()); + return MappedField.of(field.id(), fieldNames, nestedMapping); + } + + private MappedFields addNewFields(MappedFields mapping, int parentId) { + Collection fieldsToAdd = adds.get(parentId); + if (fieldsToAdd == null || fieldsToAdd.isEmpty()) { + return mapping; + } + + List fields = Lists.newArrayList(); + if (mapping != null) { + fields.addAll(mapping.fields()); + } + + for (Types.NestedField add : fieldsToAdd) { + MappedFields nestedMapping = TypeUtil.visit(add.type(), CreateMapping.INSTANCE); + fields.add(MappedField.of(add.fieldId(), add.name(), nestedMapping)); + } + + return MappedFields.of(fields); + } + } + + private static class IndexByName implements Visitor, Map> { + static final IndexByName INSTANCE = new IndexByName(); + + @Override + public Map mapping(NameMapping mapping, Map result) { + return result; + } + + @Override + public Map fields(MappedFields fields, List> fieldResults) { + // merge the results of each field + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (Map results : fieldResults) { + builder.putAll(results); + } + return builder.build(); + } + + @Override + public Map field(MappedField field, Map fieldResult) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + + if (fieldResult != null) { + for (String name : field.names()) { + for (Map.Entry entry : fieldResult.entrySet()) { + String fullName = DOT.join(name, entry.getKey()); + builder.put(fullName, entry.getValue()); + } + } + } + + for (String name : field.names()) { + builder.put(name, field); + } + + return builder.build(); + } + } + + private static class IndexById implements Visitor, Map> { + private final Map result = Maps.newHashMap(); + + @Override + public Map mapping(NameMapping mapping, Map fieldsResult) { + return fieldsResult; + } + + @Override + public Map fields(MappedFields fields, List> fieldResults) { + return result; + } + + @Override + public Map field(MappedField field, Map fieldResult) { + Preconditions.checkState(!result.containsKey(field.id()), "Invalid mapping: ID %s is not unique", field.id()); + result.put(field.id(), field); + return result; + } + } + + private interface Visitor { + S mapping(NameMapping mapping, S result); + S fields(MappedFields fields, List fieldResults); + T field(MappedField field, S fieldResult); + } + + private static S visit(NameMapping mapping, Visitor visitor) { + return visitor.mapping(mapping, visit(mapping.asMappedFields(), visitor)); + } + + private static S visit(MappedFields mapping, Visitor visitor) { + if (mapping == null) { + return null; + } + + List fieldResults = Lists.newArrayList(); + for (MappedField field : mapping.fields()) { + fieldResults.add(visitor.field(field, visit(field.nestedMapping(), visitor))); + } + + return visitor.fields(mapping, fieldResults); + } + + private static class CreateMapping extends TypeUtil.SchemaVisitor { + private static final CreateMapping INSTANCE = new CreateMapping(); + + private CreateMapping() { + } + + @Override + public MappedFields schema(Schema schema, MappedFields structResult) { + return structResult; + } + + @Override + public MappedFields struct(Types.StructType struct, List fieldResults) { + List fields = Lists.newArrayListWithExpectedSize(fieldResults.size()); + + for (int i = 0; i < fieldResults.size(); i += 1) { + Types.NestedField field = struct.fields().get(i); + MappedFields result = fieldResults.get(i); + fields.add(MappedField.of(field.fieldId(), field.name(), result)); + } + + return MappedFields.of(fields); + } + + @Override + public MappedFields field(Types.NestedField field, MappedFields fieldResult) { + return fieldResult; + } + + @Override + public MappedFields list(Types.ListType list, MappedFields elementResult) { + return MappedFields.of(MappedField.of(list.elementId(), "element", elementResult)); + } + + @Override + public MappedFields map(Types.MapType map, MappedFields keyResult, MappedFields valueResult) { + return MappedFields.of( + MappedField.of(map.keyId(), "key", keyResult), + MappedField.of(map.valueId(), "value", valueResult) + ); + } + + @Override + public MappedFields primitive(Type.PrimitiveType primitive) { + return null; // no mapping because primitives have no nested fields + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java b/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java new file mode 100644 index 000000000000..2d8d4d80d90f --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/mapping/NameMapping.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.google.common.base.Joiner; +import java.util.List; +import java.util.Map; + +/** + * Represents a mapping from external schema names to Iceberg type IDs. + */ +public class NameMapping { + private static final Joiner DOT = Joiner.on('.'); + + private final MappedFields mapping; + private final Map fieldsById; + private final Map fieldsByName; + + NameMapping(MappedFields mapping) { + this.mapping = mapping; + this.fieldsById = MappingUtil.indexById(mapping); + this.fieldsByName = MappingUtil.indexByName(mapping); + } + + public MappedField find(int id) { + return fieldsById.get(id); + } + + public MappedField find(String... names) { + return fieldsByName.get(DOT.join(names)); + } + + public MappedField find(List names) { + return fieldsByName.get(DOT.join(names)); + } + + public MappedFields asMappedFields() { + return mapping; + } + + @Override + public String toString() { + if (mapping.fields().isEmpty()) { + return "[]"; + } else { + return "[\n " + Joiner.on("\n ").join(mapping.fields()) + "\n]"; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/mapping/NameMappingParser.java b/core/src/main/java/org/apache/iceberg/mapping/NameMappingParser.java new file mode 100644 index 000000000000..453165c3d4ff --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/mapping/NameMappingParser.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import java.io.IOException; +import java.io.StringWriter; +import java.util.List; +import java.util.Set; +import org.apache.iceberg.exceptions.RuntimeIOException; +import org.apache.iceberg.util.JsonUtil; + +/** + * Parses external name mappings from a JSON representation. + *

+ * [ { "field-id": 1, "names": ["id", "record_id"] },
+ *   { "field-id": 2, "names": ["data"] },
+ *   { "field-id": 3, "names": ["location"], "fields": [
+ *       { "field-id": 4, "names": ["latitude", "lat"] },
+ *       { "field-id": 5, "names": ["longitude", "long"] }
+ *     ] } ]
+ * 
+ */ +public class NameMappingParser { + + private NameMappingParser() { + } + + private static final String FIELD_ID = "field-id"; + private static final String NAMES = "names"; + private static final String FIELDS = "fields"; + + public static String toJson(NameMapping mapping) { + try { + StringWriter writer = new StringWriter(); + JsonGenerator generator = JsonUtil.factory().createGenerator(writer); + generator.useDefaultPrettyPrinter(); + toJson(mapping, generator); + generator.flush(); + return writer.toString(); + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to write json for: %s", mapping); + } + } + + static void toJson(NameMapping nameMapping, JsonGenerator generator) throws IOException { + toJson(nameMapping.asMappedFields(), generator); + } + + private static void toJson(MappedFields mapping, JsonGenerator generator) throws IOException { + generator.writeStartArray(); + + for (MappedField field : mapping.fields()) { + toJson(field, generator); + } + + generator.writeEndArray(); + } + + private static void toJson(MappedField field, JsonGenerator generator) throws IOException { + generator.writeStartObject(); + + generator.writeNumberField(FIELD_ID, field.id()); + + generator.writeArrayFieldStart(NAMES); + for (String name : field.names()) { + generator.writeString(name); + } + generator.writeEndArray(); + + MappedFields nested = field.nestedMapping(); + if (nested != null) { + generator.writeFieldName(FIELDS); + toJson(nested, generator); + } + + generator.writeEndObject(); + } + + public static NameMapping fromJson(String json) { + try { + return fromJson(JsonUtil.mapper().readValue(json, JsonNode.class)); + } catch (IOException e) { + throw new RuntimeIOException(e, "Failed to convert version from json: %s", json); + } + } + + static NameMapping fromJson(JsonNode node) { + return new NameMapping(fieldsFromJson(node)); + } + + private static MappedFields fieldsFromJson(JsonNode node) { + Preconditions.checkArgument(node.isArray(), "Cannot parse non-array mapping fields: %s", node); + + List fields = Lists.newArrayList(); + node.elements().forEachRemaining(fieldNode -> fields.add(fieldFromJson(fieldNode))); + + return MappedFields.of(fields); + } + + private static MappedField fieldFromJson(JsonNode node) { + Preconditions.checkArgument(node != null && !node.isNull() && node.isObject(), + "Cannot parse non-object mapping field: %s", node); + + Integer id = JsonUtil.getIntOrNull(FIELD_ID, node); + + Set names; + if (node.has(NAMES)) { + names = ImmutableSet.copyOf(JsonUtil.getStringList(NAMES, node)); + } else { + names = ImmutableSet.of(); + } + + MappedFields nested; + if (node.has(FIELDS)) { + nested = fieldsFromJson(node.get(FIELDS)); + } else { + nested = null; + } + + return MappedField.of(id, names, nested); + } +} diff --git a/core/src/main/java/org/apache/iceberg/util/JsonUtil.java b/core/src/main/java/org/apache/iceberg/util/JsonUtil.java index 09a546643a10..976a1cc3933c 100644 --- a/core/src/main/java/org/apache/iceberg/util/JsonUtil.java +++ b/core/src/main/java/org/apache/iceberg/util/JsonUtil.java @@ -52,6 +52,16 @@ public static int getInt(String property, JsonNode node) { return pNode.asInt(); } + public static Integer getIntOrNull(String property, JsonNode node) { + if (!node.has(property)) { + return null; + } + JsonNode pNode = node.get(property); + Preconditions.checkArgument(pNode != null && !pNode.isNull() && pNode.isIntegralNumber() && pNode.canConvertToInt(), + "Cannot parse %s from non-string value: %s", property, pNode); + return pNode.asInt(); + } + public static long getLong(String property, JsonNode node) { Preconditions.checkArgument(node.has(property), "Cannot parse missing int %s", property); JsonNode pNode = node.get(property); diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java b/core/src/test/java/org/apache/iceberg/TableTestBase.java index 6c4f6820631f..c55c07d2c70d 100644 --- a/core/src/test/java/org/apache/iceberg/TableTestBase.java +++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java @@ -40,7 +40,7 @@ public class TableTestBase { // Schema passed to create tables - static final Schema SCHEMA = new Schema( + public static final Schema SCHEMA = new Schema( required(3, "id", Types.IntegerType.get()), required(4, "data", Types.StringType.get()) ); @@ -80,7 +80,7 @@ public class TableTestBase { File tableDir = null; File metadataDir = null; - TestTables.TestTable table = null; + public TestTables.TestTable table = null; @Before public void setupTable() throws Exception { @@ -117,7 +117,7 @@ Integer version() { return TestTables.metadataVersion("test"); } - TableMetadata readMetadata() { + public TableMetadata readMetadata() { return TestTables.readMetadata("test"); } diff --git a/core/src/test/java/org/apache/iceberg/TestTables.java b/core/src/test/java/org/apache/iceberg/TestTables.java index 42752058f63c..13e336ea9eed 100644 --- a/core/src/test/java/org/apache/iceberg/TestTables.java +++ b/core/src/test/java/org/apache/iceberg/TestTables.java @@ -38,7 +38,7 @@ public class TestTables { private TestTables() {} - static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) { + public static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) { TestTableOperations ops = new TestTableOperations(name, temp); if (ops.current() != null) { throw new AlreadyExistsException("Table %s already exists at location: %s", name, temp); @@ -47,7 +47,7 @@ static TestTable create(File temp, String name, Schema schema, PartitionSpec spe return new TestTable(ops, name); } - static Transaction beginCreate(File temp, String name, Schema schema, PartitionSpec spec) { + public static Transaction beginCreate(File temp, String name, Schema schema, PartitionSpec spec) { TableOperations ops = new TestTableOperations(name, temp); if (ops.current() != null) { throw new AlreadyExistsException("Table %s already exists at location: %s", name, temp); @@ -77,12 +77,12 @@ public static Transaction beginReplace(File temp, String name, Schema schema, Pa } } - static TestTable load(File temp, String name) { + public static TestTable load(File temp, String name) { TestTableOperations ops = new TestTableOperations(name, temp); return new TestTable(ops, name); } - static class TestTable extends BaseTable { + public static class TestTable extends BaseTable { private final TestTableOperations ops; private TestTable(TestTableOperations ops, String name) { diff --git a/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java b/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java new file mode 100644 index 000000000000..bd39cb0e389f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import com.google.common.collect.ImmutableList; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.TableTestBase; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.types.Types.NestedField.required; + +public class TestMappingUpdates extends TableTestBase { + @Test + public void testAddColumnMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data")), + mapping.asMappedFields()); + + table.updateSchema() + .addColumn("ts", Types.TimestampType.withZone()) + .commit(); + + NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "ts")), + updated.asMappedFields()); + } + + @Test + public void testAddNestedColumnMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data")), + mapping.asMappedFields()); + + table.updateSchema() + .addColumn("point", Types.StructType.of( + required(1, "x", Types.DoubleType.get()), + required(2, "y", Types.DoubleType.get()))) + .commit(); + + NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "point", MappedFields.of( + MappedField.of(4, "x"), + MappedField.of(5, "y") + ))), + updated.asMappedFields()); + + table.updateSchema() + .addColumn("point", "z", Types.DoubleType.get()) + .commit(); + + NameMapping pointUpdated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "point", MappedFields.of( + MappedField.of(4, "x"), + MappedField.of(5, "y"), + MappedField.of(6, "z") + ))), + pointUpdated.asMappedFields()); + } + + @Test + public void testRenameMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data")), + mapping.asMappedFields()); + + table.updateSchema() + .renameColumn("id", "object_id") + .commit(); + + NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, ImmutableList.of("id", "object_id")), + MappedField.of(2, "data")), + updated.asMappedFields()); + } + + @Test + public void testRenameNestedFieldMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + table.updateSchema() + .addColumn("point", Types.StructType.of( + required(1, "x", Types.DoubleType.get()), + required(2, "y", Types.DoubleType.get()))) + .commit(); + + NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "point", MappedFields.of( + MappedField.of(4, "x"), + MappedField.of(5, "y") + ))), + updated.asMappedFields()); + + table.updateSchema() + .renameColumn("point.x", "X") + .renameColumn("point.y", "Y") + .commit(); + + NameMapping pointUpdated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "point", MappedFields.of( + MappedField.of(4, ImmutableList.of("x", "X")), + MappedField.of(5, ImmutableList.of("y", "Y")) + ))), + pointUpdated.asMappedFields()); + } + + + @Test + public void testRenameComplexFieldMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + table.updateSchema() + .addColumn("point", Types.StructType.of( + required(1, "x", Types.DoubleType.get()), + required(2, "y", Types.DoubleType.get()))) + .commit(); + + NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "point", MappedFields.of( + MappedField.of(4, "x"), + MappedField.of(5, "y") + ))), + updated.asMappedFields()); + + table.updateSchema() + .renameColumn("point", "p2") + .commit(); + + NameMapping pointUpdated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, ImmutableList.of("point", "p2"), MappedFields.of( + MappedField.of(4, "x"), + MappedField.of(5, "y") + ))), + pointUpdated.asMappedFields()); + } + + @Test + public void testMappingUpdateFailureSkipsMappingUpdate() { + NameMapping mapping = MappingUtil.create(table.schema()); + table.updateProperties() + .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping)) + .commit(); + + table.updateSchema() + .renameColumn("id", "object_id") + .commit(); + + String updatedJson = table.properties().get(TableProperties.DEFAULT_NAME_MAPPING); + NameMapping updated = NameMappingParser.fromJson(updatedJson); + + Assert.assertEquals( + MappedFields.of( + MappedField.of(1, ImmutableList.of("id", "object_id")), + MappedField.of(2, "data")), + updated.asMappedFields()); + + // rename data to id, which conflicts in the mapping above + // this update should succeed, even though the mapping update fails + table.updateSchema() + .renameColumn("data", "id") + .commit(); + + Assert.assertEquals("Mapping JSON should not change", + updatedJson, table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); + } +} diff --git a/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java b/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java new file mode 100644 index 000000000000..7a4976455562 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/mapping/TestNameMapping.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iceberg.mapping; + +import org.apache.iceberg.AssertHelpers; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Types; +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.iceberg.types.Types.NestedField.required; + +public class TestNameMapping { + @Test + public void testFlatSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get())); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data")); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testNestedStructSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "location", Types.StructType.of( + required(4, "latitude", Types.FloatType.get()), + required(5, "longitude", Types.FloatType.get()) + ))); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "location", MappedFields.of( + MappedField.of(4, "latitude"), + MappedField.of(5, "longitude") + ))); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testMapSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "map", Types.MapType.ofRequired(4, 5, + Types.StringType.get(), + Types.DoubleType.get()))); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "map", MappedFields.of( + MappedField.of(4, "key"), + MappedField.of(5, "value") + ))); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testComplexKeyMapSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "map", Types.MapType.ofRequired(4, 5, + Types.StructType.of( + required(6, "x", Types.DoubleType.get()), + required(7, "y", Types.DoubleType.get())), + Types.DoubleType.get()))); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "map", MappedFields.of( + MappedField.of(4, "key", MappedFields.of( + MappedField.of(6, "x"), + MappedField.of(7, "y") + )), + MappedField.of(5, "value") + ))); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testComplexValueMapSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "map", Types.MapType.ofRequired(4, 5, + Types.DoubleType.get(), + Types.StructType.of( + required(6, "x", Types.DoubleType.get()), + required(7, "y", Types.DoubleType.get())) + ))); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "map", MappedFields.of( + MappedField.of(4, "key"), + MappedField.of(5, "value", MappedFields.of( + MappedField.of(6, "x"), + MappedField.of(7, "y") + )) + ))); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testListSchemaToMapping() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "list", Types.ListType.ofRequired(4, Types.StringType.get()))); + + MappedFields expected = MappedFields.of( + MappedField.of(1, "id"), + MappedField.of(2, "data"), + MappedField.of(3, "list", MappedFields.of( + MappedField.of(4, "element") + ))); + + NameMapping mapping = MappingUtil.create(schema); + Assert.assertEquals(expected, mapping.asMappedFields()); + } + + @Test + public void testFailsDuplicateId() { + // the schema can be created because ID indexing is lazy + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(1, "data", Types.StringType.get())); + + AssertHelpers.assertThrows("Should fail if IDs are reused", + IllegalArgumentException.class, "Multiple entries with same key", + () -> MappingUtil.create(schema)); + } + + @Test + public void testFailsDuplicateName() { + AssertHelpers.assertThrows("Should fail if names are reused", + IllegalArgumentException.class, "Multiple entries with same key", + () -> new NameMapping(MappedFields.of(MappedField.of(1, "x"), MappedField.of(2, "x")))); + } + + @Test + public void testAllowsDuplicateNamesInSeparateContexts() { + new NameMapping(MappedFields.of( + MappedField.of(1, "x", MappedFields.of(MappedField.of(3, "x"))), + MappedField.of(2, "y", MappedFields.of(MappedField.of(4, "x"))) + )); + } + + @Test + public void testMappingFindById() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "map", Types.MapType.ofRequired(4, 5, + Types.DoubleType.get(), + Types.StructType.of( + required(6, "x", Types.DoubleType.get()), + required(7, "y", Types.DoubleType.get())))), + required(8, "list", Types.ListType.ofRequired(9, + Types.StringType.get())), + required(10, "location", Types.StructType.of( + required(11, "latitude", Types.FloatType.get()), + required(12, "longitude", Types.FloatType.get()) + ))); + + NameMapping mapping = MappingUtil.create(schema); + + Assert.assertNull("Should not return a field mapping for a missing ID", mapping.find(100)); + Assert.assertEquals(MappedField.of(2, "data"), mapping.find(2)); + Assert.assertEquals(MappedField.of(6, "x"), mapping.find(6)); + Assert.assertEquals(MappedField.of(9, "element"), mapping.find(9)); + Assert.assertEquals(MappedField.of(11, "latitude"), mapping.find(11)); + Assert.assertEquals( + MappedField.of(10, "location", MappedFields.of( + MappedField.of(11, "latitude"), + MappedField.of(12, "longitude"))), + mapping.find(10)); + } + + @Test + public void testMappingFindByName() { + Schema schema = new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "map", Types.MapType.ofRequired(4, 5, + Types.DoubleType.get(), + Types.StructType.of( + required(6, "x", Types.DoubleType.get()), + required(7, "y", Types.DoubleType.get())))), + required(8, "list", Types.ListType.ofRequired(9, + Types.StringType.get())), + required(10, "location", Types.StructType.of( + required(11, "latitude", Types.FloatType.get()), + required(12, "longitude", Types.FloatType.get()) + ))); + + NameMapping mapping = MappingUtil.create(schema); + + Assert.assertNull("Should not return a field mapping for a nested name", mapping.find("element")); + Assert.assertNull("Should not return a field mapping for a nested name", mapping.find("x")); + Assert.assertNull("Should not return a field mapping for a nested name", mapping.find("key")); + Assert.assertNull("Should not return a field mapping for a nested name", mapping.find("value")); + Assert.assertEquals(MappedField.of(2, "data"), mapping.find("data")); + Assert.assertEquals(MappedField.of(6, "x"), mapping.find("map", "value", "x")); + Assert.assertEquals(MappedField.of(9, "element"), mapping.find("list", "element")); + Assert.assertEquals(MappedField.of(11, "latitude"), mapping.find("location", "latitude")); + Assert.assertEquals( + MappedField.of(10, "location", MappedFields.of( + MappedField.of(11, "latitude"), + MappedField.of(12, "longitude"))), + mapping.find("location")); + } +}