diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index d305d96cff4..4ff325e36f9 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1528,7 +1528,6 @@ def _temp_path(): generate_duplicate_fieldnames_case() .skip_category('Go') - .skip_category('Java') .skip_category('JS') .skip_category('Rust'), diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index 3656a2cd1a3..f283f6cd255 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -37,7 +37,8 @@ class JavaTester(Tester): FLIGHT_SERVER = True FLIGHT_CLIENT = True - JAVA_OPTS = ['-Dio.netty.tryReflectionSetAccessible=true'] + JAVA_OPTS = ['-Dio.netty.tryReflectionSetAccessible=true', + '-Darrow.struct.conflict.policy=CONFLICT_APPEND'] _arrow_version = load_version_from_pom() ARROW_TOOLS_JAR = os.environ.get( diff --git a/java/README.md b/java/README.md index 773d2ba698d..9e6f657457e 100644 --- a/java/README.md +++ b/java/README.md @@ -80,8 +80,12 @@ variable are set, the system property takes precedence. ## Java Properties -For java 9 or later, should set "-Dio.netty.tryReflectionSetAccessible=true". + * For java 9 or later, should set "-Dio.netty.tryReflectionSetAccessible=true". This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by netty. + * To support duplicate fields in a `StructVector` enable "-Darrow.struct.conflict.policy=CONFLICT_APPEND". +Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for +conflicting or duplicate fields set this JVM flag or use the correct static constructor methods for `StructVector`s. + ## Java Code Style Guide Arrow Java follows the Google style guide [here][3] with the following diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java index b083140e217..ab03cf45e81 100644 --- a/java/vector/src/main/codegen/templates/DenseUnionVector.java +++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java @@ -24,7 +24,9 @@ import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.AbstractStructVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.types.Types; @@ -132,8 +134,13 @@ public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldT this.name = name; this.allocator = allocator; this.fieldType = fieldType; - this.internalStruct = new NonNullableStructVector("internal", allocator, INTERNAL_STRUCT_TYPE, - callBack); + this.internalStruct = new NonNullableStructVector( + "internal", + allocator, + INTERNAL_STRUCT_TYPE, + callBack, + AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, + false); this.validityBuffer = allocator.getEmpty(); this.validityBufferAllocationSizeInBytes = DataSizeRoundingUtil.divideBy8Ceil(BaseValueVector.INITIAL_VALUE_ALLOCATION); diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index 14d3b2ab7aa..845f5380e2b 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -22,6 +22,8 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.AbstractStructVector; +import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.types.UnionMode; @@ -117,8 +119,13 @@ public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, this.name = name; this.allocator = allocator; this.fieldType = fieldType; - this.internalStruct = new NonNullableStructVector("internal", allocator, INTERNAL_STRUCT_TYPE, - callBack); + this.internalStruct = new NonNullableStructVector( + "internal", + allocator, + INTERNAL_STRUCT_TYPE, + callBack, + AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE, + false); this.typeBuffer = allocator.getEmpty(); this.callBack = callBack; this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index 98a0c5e167c..623c7731701 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -19,7 +19,7 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -54,7 +54,7 @@ public class VectorSchemaRoot implements AutoCloseable { private Schema schema; private int rowCount; private final List fieldVectors; - private final Map fieldVectorsMap = new HashMap<>(); + private final Map fieldVectorsMap = new LinkedHashMap<>(); /** @@ -113,7 +113,7 @@ public VectorSchemaRoot(Schema schema, List fieldVectors, int rowCo for (int i = 0; i < schema.getFields().size(); ++i) { Field field = schema.getFields().get(i); FieldVector vector = fieldVectors.get(i); - fieldVectorsMap.put(field.getName(), vector); + fieldVectorsMap.put(field, vector); } } @@ -163,8 +163,22 @@ public List getFieldVectors() { return fieldVectors.stream().collect(Collectors.toList()); } + /** + * gets a vector by name. + * + * if name occurs multiple times this returns the first inserted entry for name + */ public FieldVector getVector(String name) { - return fieldVectorsMap.get(name); + for (Map.Entry entry: fieldVectorsMap.entrySet()) { + if (entry.getKey().getName().equals(name)) { + return entry.getValue(); + } + } + return null; + } + + public FieldVector getVector(Field field) { + return fieldVectorsMap.get(field); } public FieldVector getVector(int index) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java index 828ab087a92..be6d9923389 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java @@ -31,7 +31,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; -import org.apache.arrow.vector.util.MapWithOrdinal; +import org.apache.arrow.vector.util.PromotableMultiMapWithOrdinal; import org.apache.arrow.vector.util.ValueVectorUtility; /** @@ -39,12 +39,70 @@ */ public abstract class AbstractStructVector extends AbstractContainerVector { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); - + private static final String STRUCT_CONFLICT_POLICY_ENV = "ARROW_STRUCT_CONFLICT_POLICY"; + private static final String STRUCT_CONFLICT_POLICY_JVM = "arrow.struct.conflict.policy"; + private static final ConflictPolicy DEFAULT_CONFLICT_POLICY; // Maintains a map with key as field name and value is the vector itself - private final MapWithOrdinal vectors = new MapWithOrdinal<>(); + private final PromotableMultiMapWithOrdinal vectors; + protected final boolean allowConflictPolicyChanges; + private ConflictPolicy conflictPolicy; + + + static { + String conflictPolicyStr = System.getProperty(STRUCT_CONFLICT_POLICY_JVM, + ConflictPolicy.CONFLICT_REPLACE.toString()); + if (conflictPolicyStr == null) { + conflictPolicyStr = System.getenv(STRUCT_CONFLICT_POLICY_ENV); + } + ConflictPolicy conflictPolicy; + try { + conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase()); + } catch (Exception e) { + conflictPolicy = ConflictPolicy.CONFLICT_REPLACE; + } + DEFAULT_CONFLICT_POLICY = conflictPolicy; + } - protected AbstractStructVector(String name, BufferAllocator allocator, CallBack callBack) { + /** + * Policy to determine how to react when duplicate columns are encountered. + */ + public enum ConflictPolicy { + // Ignore the conflict and append the field. This is the default behaviour + CONFLICT_APPEND, + // Keep the existing field and ignore the newer one. + CONFLICT_IGNORE, + // Replace the existing field with the newer one. + CONFLICT_REPLACE, + // Refuse the new field and error out. + CONFLICT_ERROR + } + + /** + * Base coonstructor that sets default conflict policy to APPEND. + */ + protected AbstractStructVector(String name, + BufferAllocator allocator, + CallBack callBack, + ConflictPolicy conflictPolicy, + boolean allowConflictPolicyChanges) { super(name, allocator, callBack); + this.conflictPolicy = conflictPolicy == null ? DEFAULT_CONFLICT_POLICY : conflictPolicy; + this.vectors = new PromotableMultiMapWithOrdinal<>(allowConflictPolicyChanges, this.conflictPolicy); + this.allowConflictPolicyChanges = allowConflictPolicyChanges; + } + + /** + * Set conflict policy and return last conflict policy state. + */ + public ConflictPolicy setConflictPolicy(ConflictPolicy conflictPolicy) { + ConflictPolicy tmp = this.conflictPolicy; + this.conflictPolicy = conflictPolicy; + this.vectors.setConflictPolicy(conflictPolicy); + return tmp; + } + + public ConflictPolicy getConflictPolicy() { + return conflictPolicy; } @Override @@ -114,7 +172,6 @@ public void reAlloc() { * @return resultant {@link org.apache.arrow.vector.ValueVector} * @throws java.lang.IllegalStateException raised if there is a hard schema change */ - @Override public T addOrGet(String childName, FieldType fieldType, Class clazz) { final ValueVector existing = getChild(childName); boolean create = false; @@ -157,25 +214,22 @@ public ValueVector getChildByOrdinal(int id) { * Returns a {@link org.apache.arrow.vector.ValueVector} instance of subtype of T corresponding to the given * field name if exists or null. * + * If there is more than one element for name this will return the first inserted. + * * @param name the name of the child to return * @param clazz the expected type of the child * @return the child corresponding to this name */ @Override public T getChild(String name, Class clazz) { - final ValueVector v = vectors.get(name); - if (v == null) { + final FieldVector f = vectors.get(name); + if (f == null) { return null; } - return typeify(v, clazz); + return typeify(f, clazz); } protected ValueVector add(String childName, FieldType fieldType) { - final ValueVector existing = getChild(childName); - if (existing != null) { - throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", - existing.getClass().getSimpleName(), fieldType)); - } FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack); putChild(childName, vector); if (callBack != null) { @@ -196,21 +250,55 @@ protected void putChild(String name, FieldVector vector) { putVector(name, vector); } + private void put(String name, FieldVector vector, boolean overwrite) { + final boolean old = vectors.put( + Preconditions.checkNotNull(name, "field name cannot be null"), + Preconditions.checkNotNull(vector, "vector cannot be null"), + overwrite + ); + if (old) { + logger.debug("Field [{}] mutated to [{}] ", name, + vector.getClass().getSimpleName()); + } + } + /** - * Inserts the input vector into the map if it does not exist, replaces if it exists already. + * Inserts the input vector into the map if it does not exist. + * + *

+ * If the field name already exists the conflict is handled according to the currently set ConflictPolicy + *

* * @param name field name * @param vector vector to be inserted */ protected void putVector(String name, FieldVector vector) { - final ValueVector old = vectors.put( - Preconditions.checkNotNull(name, "field name cannot be null"), - Preconditions.checkNotNull(vector, "vector cannot be null") - ); - if (old != null && old != vector) { - logger.debug("Field [{}] mutated from [{}] to [{}]", name, old.getClass().getSimpleName(), - vector.getClass().getSimpleName()); + switch (conflictPolicy) { + case CONFLICT_APPEND: + put(name, vector, false); + break; + case CONFLICT_IGNORE: + if (!vectors.containsKey(name)) { + put(name, vector, false); + } + break; + case CONFLICT_REPLACE: + if (vectors.containsKey(name)) { + vectors.removeAll(name); + } + put(name, vector, true); + break; + case CONFLICT_ERROR: + if (vectors.containsKey(name)) { + throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ", + vector.getClass().getSimpleName(), vector.getField().getFieldType())); + } + put(name, vector, false); + break; + default: + throw new IllegalStateException(String.format("%s type not a valid conflict state", conflictPolicy)); } + } /** @@ -284,7 +372,7 @@ private List getPrimitiveVectors(FieldVector v) { } /** - * Get a child vector by name. + * Get a child vector by name. If duplicate names this returns the first inserted. * @param name the name of the child to return * @return a vector with its corresponding ordinal mapping if field exists or null. */ @@ -333,4 +421,5 @@ public int getBufferSize() { public String toString() { return ValueVectorUtility.getToString(this, 0 , getValueCount()); } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java index 92db160ee3f..436b4d170c3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java @@ -53,7 +53,12 @@ public class NonNullableStructVector extends AbstractStructVector { public static NonNullableStructVector empty(String name, BufferAllocator allocator) { FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - return new NonNullableStructVector(name, allocator, fieldType, null); + return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false); + } + + public static NonNullableStructVector emptyWithDuplicates(String name, BufferAllocator allocator) { + FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); + return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true); } private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this); @@ -63,11 +68,21 @@ public static NonNullableStructVector empty(String name, BufferAllocator allocat /** * Constructs a new instance. * - * @deprecated Use FieldType or static constructor instead. + * @param name The name of the instance. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param fieldType The type of this list. */ - @Deprecated - public NonNullableStructVector(String name, BufferAllocator allocator, CallBack callBack) { - this(name, allocator, new FieldType(false, ArrowType.Struct.INSTANCE, null, null), callBack); + public NonNullableStructVector(String name, + BufferAllocator allocator, + FieldType fieldType, + CallBack callBack) { + super(name, + allocator, + callBack, + null, + true); + this.fieldType = checkNotNull(fieldType); + this.valueCount = 0; } /** @@ -77,9 +92,15 @@ public NonNullableStructVector(String name, BufferAllocator allocator, CallBack * @param allocator The allocator to use to allocating/reallocating buffers. * @param fieldType The type of this list. * @param callBack A schema change callback. + * @param conflictPolicy How to handle duplicate field names in the struct. */ - public NonNullableStructVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, allocator, callBack); + public NonNullableStructVector(String name, + BufferAllocator allocator, + FieldType fieldType, + CallBack callBack, + ConflictPolicy conflictPolicy, + boolean allowConflictPolicyChanges) { + super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges); this.fieldType = checkNotNull(fieldType); this.valueCount = 0; } @@ -185,7 +206,12 @@ public TransferPair getTransferPair(BufferAllocator allocator) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new StructTransferPair(this, new NonNullableStructVector(name, allocator, fieldType, callBack), false); + return new StructTransferPair(this, new NonNullableStructVector(name, + allocator, + fieldType, + callBack, + getConflictPolicy(), + allowConflictPolicyChanges), false); } @Override @@ -195,7 +221,12 @@ public TransferPair makeTransferPair(ValueVector to) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new StructTransferPair(this, new NonNullableStructVector(ref, allocator, fieldType, callBack), false); + return new StructTransferPair(this, new NonNullableStructVector(ref, + allocator, + fieldType, + callBack, + getConflictPolicy(), + allowConflictPolicyChanges), false); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java index 8357b323d9b..d89ba46e239 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java @@ -41,7 +41,6 @@ import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; import org.apache.arrow.vector.util.OversizedAllocationException; @@ -56,7 +55,12 @@ public class StructVector extends NonNullableStructVector implements FieldVector public static StructVector empty(String name, BufferAllocator allocator) { FieldType fieldType = FieldType.nullable(Struct.INSTANCE); - return new StructVector(name, allocator, fieldType, null); + return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false); + } + + public static StructVector emptyWithDuplicates(String name, BufferAllocator allocator) { + FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); + return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true); } private final NullableStructReaderImpl reader = new NullableStructReaderImpl(this); @@ -66,23 +70,24 @@ public static StructVector empty(String name, BufferAllocator allocator) { private int validityAllocationSizeInBytes; /** - * Creates a new instance. + * Constructs a new instance. * - * @deprecated Use FieldType or static constructor instead. - */ - @Deprecated - public StructVector(String name, BufferAllocator allocator, CallBack callBack) { - this(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), callBack); - } - - /** - * Creates a new instance. - * - * @deprecated Use FieldType or static constructor instead. + * @param name The name of the instance. + * @param allocator The allocator to use to allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. */ - @Deprecated - public StructVector(String name, BufferAllocator allocator, DictionaryEncoding dictionary, CallBack callBack) { - this(name, allocator, new FieldType(true, ArrowType.Struct.INSTANCE, dictionary, null), callBack); + public StructVector(String name, + BufferAllocator allocator, + FieldType fieldType, + CallBack callBack) { + super(name, + checkNotNull(allocator), + fieldType, + callBack); + this.validityBuffer = allocator.getEmpty(); + this.validityAllocationSizeInBytes = + BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); } /** @@ -92,9 +97,16 @@ public StructVector(String name, BufferAllocator allocator, DictionaryEncoding d * @param allocator The allocator to use to allocating/reallocating buffers. * @param fieldType The type of this list. * @param callBack A schema change callback. + * @param conflictPolicy policy to determine how duplicate names are handled. + * @param allowConflictPolicyChanges wether duplicate names are allowed at all. */ - public StructVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { - super(name, checkNotNull(allocator), fieldType, callBack); + public StructVector(String name, + BufferAllocator allocator, + FieldType fieldType, + CallBack callBack, + ConflictPolicy conflictPolicy, + boolean allowConflictPolicyChanges) { + super(name, checkNotNull(allocator), fieldType, callBack, conflictPolicy, allowConflictPolicyChanges); this.validityBuffer = allocator.getEmpty(); this.validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION); @@ -145,7 +157,12 @@ public NullableStructWriter getWriter() { @Override public TransferPair getTransferPair(BufferAllocator allocator) { - return new NullableStructTransferPair(this, new StructVector(name, allocator, fieldType, null), false); + return new NullableStructTransferPair(this, new StructVector(name, + allocator, + fieldType, + null, + getConflictPolicy(), + allowConflictPolicyChanges), false); } @Override @@ -155,12 +172,22 @@ public TransferPair makeTransferPair(ValueVector to) { @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - return new NullableStructTransferPair(this, new StructVector(ref, allocator, fieldType, null), false); + return new NullableStructTransferPair(this, new StructVector(ref, + allocator, + fieldType, + null, + getConflictPolicy(), + allowConflictPolicyChanges), false); } @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { - return new NullableStructTransferPair(this, new StructVector(ref, allocator, fieldType, callBack), false); + return new NullableStructTransferPair(this, new StructVector(ref, + allocator, + fieldType, + callBack, + getConflictPolicy(), + allowConflictPolicyChanges), false); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index 4809a26a324..2eb16dee018 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -172,7 +172,7 @@ public boolean read(VectorSchemaRoot root) throws IOException { readToken(START_ARRAY); { for (Field field : root.getSchema().getFields()) { - FieldVector vector = root.getVector(field.getName()); + FieldVector vector = root.getVector(field); readFromJsonIntoVector(field, vector); } } @@ -202,7 +202,7 @@ public VectorSchemaRoot read() throws IOException { readToken(START_ARRAY); { for (Field field : schema.getFields()) { - FieldVector vector = recordBatch.getVector(field.getName()); + FieldVector vector = recordBatch.getVector(field); readFromJsonIntoVector(field, vector); } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index a6b8b5f2e55..d0f0f51e38e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -194,7 +194,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { generator.writeObjectField("count", recordBatch.getRowCount()); generator.writeArrayFieldStart("columns"); for (Field field : recordBatch.getSchema().getFields()) { - FieldVector vector = recordBatch.getVector(field.getName()); + FieldVector vector = recordBatch.getVector(field); writeFromVectorIntoJson(field, vector); } generator.writeEndArray(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java index 0e58b5b8cb1..cf157031b84 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java @@ -17,25 +17,12 @@ package org.apache.arrow.vector.util; -import java.util.AbstractMap; -import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; - -import org.apache.arrow.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import io.netty.util.collection.IntObjectHashMap; -import io.netty.util.collection.IntObjectMap; /** - * An implementation of map that supports constant time look-up by a generic key or an ordinal. + * An implementation of a map that supports constant time look-up by a generic key or an ordinal. * *

This class extends the functionality a regular {@link Map} with ordinal lookup support. * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple. @@ -45,212 +32,36 @@ *

For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However, * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are * responsible for explicitly checking the ordinal corresponding to a key via - * {@link org.apache.arrow.vector.util.MapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup + * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup * with an ordinal. * * @param key type * @param value type */ -public class MapWithOrdinal implements Map { - private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinal.class); - - private final Map> primary = new HashMap<>(); - private final IntObjectHashMap secondary = new IntObjectHashMap<>(); - - private final Map delegate = new Map() { - @Override - public boolean isEmpty() { - return size() == 0; - } - - @Override - public int size() { - return primary.size(); - } - - @Override - public boolean containsKey(Object key) { - return primary.containsKey(key); - } - - @Override - public boolean containsValue(Object value) { - return primary.containsValue(value); - } - - @Override - public V get(Object key) { - Entry pair = primary.get(key); - if (pair != null) { - return pair.getValue(); - } - return null; - } - - @Override - public V put(K key, V value) { - final Entry oldPair = primary.get(key); - // if key exists try replacing otherwise, assign a new ordinal identifier - final int ordinal = oldPair == null ? primary.size() : oldPair.getKey(); - primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value)); - secondary.put(ordinal, value); - return oldPair == null ? null : oldPair.getValue(); - } - - @Override - public V remove(Object key) { - final Entry oldPair = primary.remove(key); - if (oldPair != null) { - final int lastOrdinal = secondary.size(); - final V last = secondary.get(lastOrdinal); - // normalize mappings so that all numbers until primary.size() is assigned - // swap the last element with the deleted one - secondary.put(oldPair.getKey(), last); - primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last)); - } - return oldPair == null ? null : oldPair.getValue(); - } - - @Override - public void putAll(Map m) { - throw new UnsupportedOperationException(); - } - - @Override - public void clear() { - primary.clear(); - secondary.clear(); - } - - @Override - public Set keySet() { - return primary.keySet(); - } - - @Override - public Collection values() { - return StreamSupport.stream(secondary.entries().spliterator(), false) - .map((IntObjectMap.PrimitiveEntry t) -> Preconditions.checkNotNull(t).value()) - .collect(Collectors.toList()); - } - - @Override - public Set> entrySet() { - return primary.entrySet().stream() - .map(entry -> new AbstractMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().getValue())) - .collect(Collectors.toSet()); - } - }; - - /** - * Returns the value corresponding to the given ordinal. - * - * @param id ordinal value for lookup - * @return an instance of V - */ - public V getByOrdinal(int id) { - return secondary.get(id); - } - - /** - * Returns the ordinal corresponding to the given key. - * - * @param key key for ordinal lookup - * @return ordinal value corresponding to key if it exists or -1 - */ - public int getOrdinal(K key) { - Entry pair = primary.get(key); - if (pair != null) { - return pair.getKey(); - } - return -1; - } - - @Override - public int size() { - return delegate.size(); - } +public interface MapWithOrdinal { + V getByOrdinal(int id); - @Override - public boolean isEmpty() { - return delegate.isEmpty(); - } + int getOrdinal(K key); - @Override - public V get(Object key) { - return delegate.get(key); - } + int size(); - /** - * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with automatic ordinal - * assignment. A new ordinal is assigned if key does not exists. Otherwise the same ordinal is re-used but the value - * is replaced. - * - * @see java.util.Map#put - */ - @Override - public V put(K key, V value) { - return delegate.put(key, value); - } + boolean isEmpty(); - @Override - public Collection values() { - return delegate.values(); - } + V get(K key); - @Override - public boolean containsKey(Object key) { - return delegate.containsKey(key); - } + Collection getAll(K key); - @Override - public boolean containsValue(Object value) { - return delegate.containsValue(value); - } + boolean put(K key, V value, boolean overwrite); - /** - * Removes the element corresponding to the key if exists extending the semantics of {@link java.util.Map#remove} - * with ordinal re-cycling. The ordinal corresponding to the given key may be re-assigned to another tuple. It is - * important that consumer checks the ordinal value via - * {@link org.apache.arrow.vector.util.MapWithOrdinal#getOrdinal(Object)} before attempting to look-up by ordinal. - * - * @see java.util.Map#remove - */ - @Override - public V remove(Object key) { - return delegate.remove(key); - } + Collection values(); - @Override - public void putAll(Map m) { - delegate.putAll(m); - } + boolean containsKey(K key); - @Override - public void clear() { - delegate.clear(); - } + boolean remove(K key, V value); - @Override - public Set keySet() { - return delegate.keySet(); - } + boolean removeAll(K key); - /** - * Returns a list of keys in ordinal order. - */ - public List keyList() { - int size = size(); - Set keys = keySet(); - List children = new ArrayList<>(size); - for (K key : keys) { - children.add(getOrdinal(key), key); - } - return children; - } + void clear(); - @Override - public Set> entrySet() { - return delegate.entrySet(); - } + Set keys(); } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java new file mode 100644 index 00000000000..41ce1fc0d10 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import org.apache.arrow.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.netty.util.collection.IntObjectHashMap; +import io.netty.util.collection.IntObjectMap; + +/** + * An implementation of map that supports constant time look-up by a generic key or an ordinal. + * + *

This class extends the functionality a regular {@link Map} with ordinal lookup support. + * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple. + * Upon update the same ordinal id is re-used while value is replaced. + * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item. + * + *

For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However, + * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are + * responsible for explicitly checking the ordinal corresponding to a key via + * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to execute a lookup + * with an ordinal. + * + * @param key type + * @param value type + */ +public class MapWithOrdinalImpl implements MapWithOrdinal { + private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class); + + private final Map> primary = new HashMap<>(); + private final IntObjectHashMap secondary = new IntObjectHashMap<>(); + + private final Map delegate = new Map() { + @Override + public boolean isEmpty() { + return size() == 0; + } + + @Override + public int size() { + return primary.size(); + } + + @Override + public boolean containsKey(Object key) { + return primary.containsKey(key); + } + + @Override + public boolean containsValue(Object value) { + return primary.containsValue(value); + } + + @Override + public V get(Object key) { + Entry pair = primary.get(key); + if (pair != null) { + return pair.getValue(); + } + return null; + } + + @Override + public V put(K key, V value) { + final Entry oldPair = primary.get(key); + // if key exists try replacing otherwise, assign a new ordinal identifier + final int ordinal = oldPair == null ? primary.size() : oldPair.getKey(); + primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value)); + secondary.put(ordinal, value); + return oldPair == null ? null : oldPair.getValue(); + } + + public boolean put(K key, V value, boolean override) { + return put(key, value) != null; + } + + @Override + public V remove(Object key) { + final Entry oldPair = primary.remove(key); + if (oldPair != null) { + final int lastOrdinal = secondary.size(); + final V last = secondary.get(lastOrdinal); + // normalize mappings so that all numbers until primary.size() is assigned + // swap the last element with the deleted one + secondary.put(oldPair.getKey(), last); + primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last)); + } + return oldPair == null ? null : oldPair.getValue(); + } + + @Override + public void putAll(Map m) { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() { + primary.clear(); + secondary.clear(); + } + + @Override + public Set keySet() { + return primary.keySet(); + } + + @Override + public Collection values() { + return StreamSupport.stream(secondary.entries().spliterator(), false) + .map((IntObjectMap.PrimitiveEntry t) -> Preconditions.checkNotNull(t).value()) + .collect(Collectors.toList()); + } + + @Override + public Set> entrySet() { + return primary.entrySet().stream() + .map(entry -> new AbstractMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().getValue())) + .collect(Collectors.toSet()); + } + }; + + /** + * Returns the value corresponding to the given ordinal. + * + * @param id ordinal value for lookup + * @return an instance of V + */ + public V getByOrdinal(int id) { + return secondary.get(id); + } + + /** + * Returns the ordinal corresponding to the given key. + * + * @param key key for ordinal lookup + * @return ordinal value corresponding to key if it exists or -1 + */ + public int getOrdinal(K key) { + Map.Entry pair = primary.get(key); + if (pair != null) { + return pair.getKey(); + } + return -1; + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public boolean isEmpty() { + return delegate.isEmpty(); + } + + @Override + public Collection getAll(K key) { + if (delegate.containsKey(key)) { + List list = new ArrayList<>(1); + list.add(get(key)); + return list; + } + return null; + } + + @Override + public V get(K key) { + return delegate.get(key); + } + + /** + * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with automatic ordinal + * assignment. A new ordinal is assigned if key does not exists. Otherwise the same ordinal is re-used but the value + * is replaced. + * + * @see java.util.Map#put + */ + @Override + public boolean put(K key, V value, boolean overwrite) { + return delegate.put(key, value) != null; + } + + @Override + public Collection values() { + return delegate.values(); + } + + @Override + public boolean remove(K key, V value) { + return false; + } + + @Override + public boolean containsKey(Object key) { + return delegate.containsKey(key); + } + + /** + * Removes the element corresponding to the key if exists extending the semantics of {@link java.util.Map#remove} + * with ordinal re-cycling. The ordinal corresponding to the given key may be re-assigned to another tuple. It is + * important that consumer checks the ordinal value via + * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to look-up by ordinal. + * + * @see java.util.Map#remove + */ + @Override + public boolean removeAll(K key) { + return delegate.remove(key) != null; + } + + @Override + public void clear() { + delegate.clear(); + } + + @Override + public Set keys() { + return delegate.keySet(); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java new file mode 100644 index 00000000000..5fbb45a7ac6 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +import io.netty.util.collection.IntObjectHashMap; + +/** + * An implementation of a multimap that supports constant time look-up by a generic key or an ordinal. + * + *

This class extends the functionality a regular {@link Map} with ordinal lookup support. + * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple. + * Upon update the same ordinal id is re-used while value is replaced. + * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item. + * + *

For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However, + * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are + * responsible for explicitly checking the ordinal corresponding to a key via + * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup + * with an ordinal. + * + * @param key type + * @param value type + */ +public class MultiMapWithOrdinal implements MapWithOrdinal { + + private final Map> keyToOrdinal = new LinkedHashMap<>(); + private final IntObjectHashMap ordinalToValue = new IntObjectHashMap<>(); + + /** + * Returns the value corresponding to the given ordinal. + * + * @param id ordinal value for lookup + * @return an instance of V + */ + @Override + public V getByOrdinal(int id) { + return ordinalToValue.get(id); + } + + /** + * Returns the ordinal corresponding to the given key. + * + * @param key key for ordinal lookup + * @return ordinal value corresponding to key if it exists or -1 + */ + @Override + public int getOrdinal(K key) { + Set pair = getOrdinals(key); + if (!pair.isEmpty()) { + return pair.iterator().next(); + } + return -1; + } + + private Set getOrdinals(K key) { + return keyToOrdinal.getOrDefault(key, new HashSet<>()); + } + + @Override + public int size() { + return ordinalToValue.size(); + } + + @Override + public boolean isEmpty() { + return ordinalToValue.isEmpty(); + } + + /** + * get set of values for key. + */ + @Override + public V get(K key) { + Set ordinals = keyToOrdinal.get(key); + if (ordinals == null) { + return null; + } + return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()).get(0); + } + + /** + * get set of values for key. + */ + @Override + public Collection getAll(K key) { + Set ordinals = keyToOrdinal.get(key); + if (ordinals == null) { + return null; + } + return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()); + } + + /** + * Inserts the tuple (key, value) into the multimap with automatic ordinal assignment. + * + * A new ordinal is assigned if key/value pair does not exists. + * + * If overwrite is true the existing key will be overwritten with value else value will be appended to the multimap. + */ + @Override + public boolean put(K key, V value, boolean overwrite) { + if (overwrite) { + removeAll(key); + } + Set ordinalSet = getOrdinals(key); + int nextOrdinal = ordinalToValue.size(); + ordinalToValue.put(nextOrdinal, value); + boolean changed = ordinalSet.add(nextOrdinal); + keyToOrdinal.put(key, ordinalSet); + return changed; + } + + @Override + public Collection values() { + return ordinalToValue.values(); + } + + @Override + public boolean containsKey(K key) { + return keyToOrdinal.containsKey(key); + } + + /** + * Removes the element corresponding to the key/value if exists with ordinal re-cycling. + * + * The ordinal corresponding to the given key may be re-assigned to another tuple. It is + * important that consumer checks the ordinal value via + * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to look-up by ordinal. + * + * If the multimap is changed return true. + */ + @Override + public synchronized boolean remove(K key, V value) { + Set removalSet = getOrdinals(key); + if (removalSet.isEmpty()) { + return false; + } + Optional removeValue = removalSet.stream().map(ordinalToValue::get).filter(value::equals).findFirst(); + if (!removeValue.isPresent()) { + return false; + } + int removalOrdinal = removeKv(removalSet, key, value); + int lastOrdinal = ordinalToValue.size(); + if (lastOrdinal != removalOrdinal) { //we didn't remove the last ordinal + swapOrdinal(lastOrdinal, removalOrdinal); + } + return true; + } + + private void swapOrdinal(int lastOrdinal, int removalOrdinal) { + V swapOrdinalValue = ordinalToValue.remove(lastOrdinal); + ordinalToValue.put(removalOrdinal, swapOrdinalValue); + K swapOrdinalKey = keyToOrdinal.entrySet() + .stream() + .filter(kv -> kv.getValue().stream().anyMatch(o -> o == lastOrdinal)) + .map(Map.Entry::getKey) + .findFirst() + .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state")); + ordinalToValue.put(removalOrdinal, swapOrdinalValue); + Set swapSet = getOrdinals(swapOrdinalKey); + swapSet.remove(lastOrdinal); + swapSet.add(removalOrdinal); + keyToOrdinal.put(swapOrdinalKey, swapSet); + } + + private int removeKv(Set removalSet, K key, V value) { + Integer removalOrdinal = removalSet.stream() + .filter(i -> ordinalToValue.get(i).equals(value)) + .findFirst() + .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state")); + ordinalToValue.remove(removalOrdinal); + removalSet.remove(removalOrdinal); + if (removalSet.isEmpty()) { + keyToOrdinal.remove(key); + } else { + keyToOrdinal.put(key, removalSet); + } + return removalOrdinal; + } + + /** + * remove all entries of key. + */ + @Override + public synchronized boolean removeAll(K key) { + Collection values = this.getAll(key); + if (values == null) { + return false; + } + for (V v: values) { + this.remove(key, v); + } + return true; + } + + @Override + public void clear() { + ordinalToValue.clear(); + keyToOrdinal.clear(); + } + + @Override + public Set keys() { + return keyToOrdinal.keySet(); + } + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java b/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java new file mode 100644 index 00000000000..f2f838af904 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import java.util.Collection; +import java.util.Set; + +import org.apache.arrow.vector.complex.AbstractStructVector; + +/** + * Implementation of MapWithOrdinal that allows for promotion to multimap when duplicate fields exist. + * @param key type + * @param value type + */ +public class PromotableMultiMapWithOrdinal implements MapWithOrdinal { + private final MapWithOrdinalImpl mapWithOrdinal = new MapWithOrdinalImpl<>(); + private final MultiMapWithOrdinal multiMapWithOrdinal = new MultiMapWithOrdinal<>(); + private final boolean promotable; + private AbstractStructVector.ConflictPolicy conflictPolicy; + private MapWithOrdinal delegate; + + /** + * Create promotable map. + * @param promotable if promotion is allowed, otherwise delegate to MapWithOrdinal. + * @param conflictPolicy how to handle name conflicts. + */ + public PromotableMultiMapWithOrdinal(boolean promotable, AbstractStructVector.ConflictPolicy conflictPolicy) { + this.promotable = promotable; + this.conflictPolicy = conflictPolicy; + delegate = mapWithOrdinal; + } + + private void promote() { + if (delegate == multiMapWithOrdinal || + !promotable || + conflictPolicy.equals(AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE)) { + return; + } + for (K key : mapWithOrdinal.keys()) { + V value = mapWithOrdinal.get(key); + multiMapWithOrdinal.put(key, value, false); + } + mapWithOrdinal.clear(); + delegate = multiMapWithOrdinal; + } + + @Override + public V getByOrdinal(int id) { + return delegate.getByOrdinal(id); + } + + @Override + public int getOrdinal(K key) { + return delegate.getOrdinal(key); + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public boolean isEmpty() { + return delegate.isEmpty(); + } + + @Override + public V get(K key) { + return delegate.get(key); + } + + @Override + public Collection getAll(K key) { + return delegate.getAll(key); + } + + @Override + public boolean put(K key, V value, boolean overwrite) { + if (delegate.containsKey(key)) { + promote(); + } + return delegate.put(key, value, overwrite); + } + + @Override + public Collection values() { + return delegate.values(); + } + + @Override + public boolean containsKey(K key) { + return delegate.containsKey(key); + } + + @Override + public boolean remove(K key, V value) { + return delegate.remove(key, value); + } + + @Override + public boolean removeAll(K key) { + return delegate.removeAll(key); + } + + @Override + public void clear() { + delegate.clear(); + } + + @Override + public Set keys() { + return delegate.keys(); + } + + public void setConflictPolicy(AbstractStructVector.ConflictPolicy conflictPolicy) { + this.conflictPolicy = conflictPolicy; + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java new file mode 100644 index 00000000000..ea829060d1c --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.util; + +import org.junit.Assert; +import org.junit.Test; + +public class TestMultiMapWithOrdinal { + + @Test + public void test() { + MultiMapWithOrdinal map = new MultiMapWithOrdinal<>(); + + map.put("x", "1", false); + Assert.assertEquals(1, map.size()); + map.remove("x", "1"); + Assert.assertTrue(map.isEmpty()); + map.put("x", "1", false); + map.put("x", "2", false); + map.put("y", "0", false); + Assert.assertEquals(3, map.size()); + Assert.assertEquals(2, map.getAll("x").size()); + Assert.assertEquals("1", map.getAll("x").stream().findFirst().get()); + Assert.assertEquals("1", map.getByOrdinal(0)); + Assert.assertEquals("2", map.getByOrdinal(1)); + Assert.assertEquals("0", map.getByOrdinal(2)); + Assert.assertTrue(map.remove("x", "1")); + Assert.assertFalse(map.remove("x", "1")); + Assert.assertEquals("0", map.getByOrdinal(0)); + Assert.assertEquals(2, map.size()); + map.put("x", "3", true); + Assert.assertEquals(1, map.getAll("x").size()); + Assert.assertEquals("3", map.getAll("x").stream().findFirst().get()); + map.put("z", "4", false); + Assert.assertEquals(3, map.size()); + map.put("z", "5", false); + map.put("z", "6", false); + Assert.assertEquals(5, map.size()); + map.removeAll("z"); + Assert.assertEquals(2, map.size()); + Assert.assertFalse(map.containsKey("z")); + + + } +}