diff --git a/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMapBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMapBenchmarks.java deleted file mode 100644 index e97bff2e1dd..00000000000 --- a/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMapBenchmarks.java +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.dictionary; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Random; -import java.util.concurrent.TimeUnit; - -import org.junit.Test; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.runner.Runner; -import org.openjdk.jmh.runner.RunnerException; -import org.openjdk.jmh.runner.options.Options; -import org.openjdk.jmh.runner.options.OptionsBuilder; - -/** - * Benchmarks for {@link DictionaryEncodeHashMap}. - */ -@State(Scope.Benchmark) -public class DictionaryEncodeHashMapBenchmarks { - private static final int SIZE = 1000; - - private static final int KEY_LENGTH = 10; - - private List testData = new ArrayList<>(); - - private HashMap hashMap = new HashMap(); - private DictionaryEncodeHashMap dictionaryEncodeHashMap = new DictionaryEncodeHashMap(); - - /** - * Setup benchmarks. - */ - @Setup - public void prepare() { - for (int i = 0; i < SIZE; i++) { - testData.add(getRandomString(KEY_LENGTH)); - } - } - - private String getRandomString(int length) { - String str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - Random random = new Random(); - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < length; i++) { - int number = random.nextInt(62); - sb.append(str.charAt(number)); - } - return sb.toString(); - } - - /** - * Test set/get int values for {@link HashMap}. - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int testHashMap() { - for (int i = 0; i < SIZE; i++) { - hashMap.put(testData.get(i), i); - } - for (int i = 0; i < SIZE; i++) { - hashMap.get(testData.get(i)); - } - return 0; - } - - /** - * Test set/get int values for {@link DictionaryEncodeHashMap}. - * @return useless. To avoid DCE by JIT. - */ - @Benchmark - @BenchmarkMode(Mode.AverageTime) - @OutputTimeUnit(TimeUnit.NANOSECONDS) - public int testDictionaryEncodeHashMap() { - for (int i = 0; i < SIZE; i++) { - dictionaryEncodeHashMap.put(testData.get(i), i); - } - for (int i = 0; i < SIZE; i++) { - dictionaryEncodeHashMap.get(testData.get(i)); - } - return 0; - } - - @Test - public void evaluate() throws RunnerException { - Options opt = new OptionsBuilder() - .include(DictionaryEncodeHashMapBenchmarks.class.getSimpleName()) - .forks(1) - .build(); - - new Runner(opt).run(); - } -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseBinaryVector.java deleted file mode 100644 index 9ddbd10970f..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseBinaryVector.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector; - -import org.apache.arrow.vector.dictionary.ByteArrayWrapper; - -/** - * Interface for VarBinaryVector and FixedSizeBinaryVector. - */ -public interface BaseBinaryVector { - - /** - * Get the {@link ByteArrayWrapper} which holds the byte array at specific index. - * @param index index of object to get - */ - ByteArrayWrapper getByteArrayWrapper(int index); -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java index 862336f90ac..61bd57c1356 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.dictionary.ByteArrayWrapper; import org.apache.arrow.vector.holders.FixedSizeBinaryHolder; import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.types.Types.MinorType; @@ -38,7 +37,7 @@ * binary values which could be null. A validity buffer (bit vector) is * maintained to track which elements in the vector are null. */ -public class FixedSizeBinaryVector extends BaseFixedWidthVector implements BaseBinaryVector { +public class FixedSizeBinaryVector extends BaseFixedWidthVector { private final int byteWidth; private final FieldReader reader; @@ -364,15 +363,6 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((FixedSizeBinaryVector) to); } - @Override - public ByteArrayWrapper getByteArrayWrapper(int index) { - if (isNull(index)) { - return null; - } else { - return new ByteArrayWrapper(getObject(index)); - } - } - private class TransferImpl implements TransferPair { FixedSizeBinaryVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index c8232cfe4df..bd76f3cc03f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -22,7 +22,6 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.dictionary.ByteArrayWrapper; import org.apache.arrow.vector.holders.NullableVarBinaryHolder; import org.apache.arrow.vector.holders.VarBinaryHolder; import org.apache.arrow.vector.types.Types.MinorType; @@ -35,7 +34,7 @@ * values which could be NULL. A validity buffer (bit vector) is maintained * to track which elements in the vector are null. */ -public class VarBinaryVector extends BaseVariableWidthVector implements BaseBinaryVector { +public class VarBinaryVector extends BaseVariableWidthVector { private final FieldReader reader; /** @@ -280,15 +279,6 @@ public TransferPair makeTransferPair(ValueVector to) { return new TransferImpl((VarBinaryVector) to); } - @Override - public ByteArrayWrapper getByteArrayWrapper(int index) { - if (isNull(index)) { - return null; - } else { - return new ByteArrayWrapper(getObject(index)); - } - } - private class TransferImpl implements TransferPair { VarBinaryVector to; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMap.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMap.java deleted file mode 100644 index 659a8d6b634..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncodeHashMap.java +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.dictionary; - -import java.util.Objects; - -/** - * Implementation of the {@link ObjectIntMap} interface, used for DictionaryEncoder. - * Note that value in this map is always not less than 0, and -1 represents a null value. - * @param key type. - */ -public class DictionaryEncodeHashMap implements ObjectIntMap { - - /** - * Represents a null value in map. - */ - static final int NULL_VALUE = -1; - - /** - * The default initial capacity - MUST be a power of two. - */ - static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; - - /** - * The maximum capacity, used if a higher value is implicitly specified - * by either of the constructors with arguments. - */ - static final int MAXIMUM_CAPACITY = 1 << 30; - - /** - * The load factor used when none specified in constructor. - */ - static final float DEFAULT_LOAD_FACTOR = 0.75f; - - static final Entry[] EMPTY_TABLE = {}; - - /** - * The table, initialized on first use, and resized as - * necessary. When allocated, length is always a power of two. - */ - transient Entry[] table = (Entry[]) EMPTY_TABLE; - - /** - * The number of key-value mappings contained in this map. - */ - transient int size; - - /** - * The next size value at which to resize (capacity * load factor). - */ - int threshold; - - /** - * The load factor for the hash table. - */ - final float loadFactor; - - /** - * Constructs an empty map with the specified initial capacity and load factor. - */ - public DictionaryEncodeHashMap(int initialCapacity, float loadFactor) { - if (initialCapacity < 0) { - throw new IllegalArgumentException("Illegal initial capacity: " + - initialCapacity); - } - if (initialCapacity > MAXIMUM_CAPACITY) { - initialCapacity = MAXIMUM_CAPACITY; - } - if (loadFactor <= 0 || Float.isNaN(loadFactor)) { - throw new IllegalArgumentException("Illegal load factor: " + - loadFactor); - } - this.loadFactor = loadFactor; - this.threshold = initialCapacity; - } - - public DictionaryEncodeHashMap(int initialCapacity) { - this(initialCapacity, DEFAULT_LOAD_FACTOR); - } - - public DictionaryEncodeHashMap() { - this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR); - } - - /** - * Compute the capacity with given threshold and create init table. - */ - private void inflateTable(int threshold) { - int capacity = roundUpToPowerOf2(threshold); - this.threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1); - table = new Entry[capacity]; - } - - /** - * Computes key.hashCode() and spreads (XORs) higher bits of hash to lower. - */ - static final int hash(Object key) { - int h; - return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); - } - - /** - * Computes the storage location in an array for the given hashCode. - */ - static int indexFor(int h, int length) { - return h & (length - 1); - } - - /** - * Returns a power of two size for the given size. - */ - static final int roundUpToPowerOf2(int size) { - int n = size - 1; - n |= n >>> 1; - n |= n >>> 2; - n |= n >>> 4; - n |= n >>> 8; - n |= n >>> 16; - return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; - } - - /** - * Returns the value to which the specified key is mapped, - * or -1 if this map contains no mapping for the key. - */ - @Override - public int get(K key) { - int hash = hash(key); - int index = indexFor(hash, table.length); - for (Entry e = table[index] ; e != null ; e = e.next) { - if ((e.hash == hash) && e.key.equals(key)) { - return e.value; - } - } - return NULL_VALUE; - } - - /** - * Associates the specified value with the specified key in this map. - * If the map previously contained a mapping for the key, the old - * value is replaced. - */ - @Override - public int put(K key, int value) { - if (table == EMPTY_TABLE) { - inflateTable(threshold); - } - - if (key == null) { - return putForNullKey(value); - } - - int hash = hash(key); - int i = indexFor(hash, table.length); - for (Entry e = table[i]; e != null; e = e.next) { - Object k; - if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { - int oldValue = e.value; - e.value = value; - return oldValue; - } - } - - addEntry(hash, key, value, i); - return NULL_VALUE; - } - - /** - * Removes the mapping for the specified key from this map if present. - * @param key key whose mapping is to be removed from the map - * @return the previous value associated with key, or - * -1 if there was no mapping for key. - */ - @Override - public int remove(K key) { - Entry e = removeEntryForKey(key); - return (e == null ? NULL_VALUE : e.value); - } - - /** - * Create a new Entry at the specific position of table. - */ - void createEntry(int hash, K key, int value, int bucketIndex) { - Entry e = table[bucketIndex]; - table[bucketIndex] = new Entry<>(hash, key, value, e); - size++; - } - - /** - * Put value when key is null. - */ - private int putForNullKey(int value) { - for (Entry e = table[0]; e != null; e = e.next) { - if (e.key == null) { - int oldValue = e.value; - e.value = value; - return oldValue; - } - } - addEntry(0, null, value, 0); - return NULL_VALUE; - } - - /** - * Add Entry at the specified location of the table. - */ - void addEntry(int hash, K key, int value, int bucketIndex) { - if ((size >= threshold) && (null != table[bucketIndex])) { - resize(2 * table.length); - hash = (null != key) ? hash(key) : 0; - bucketIndex = indexFor(hash, table.length); - } - - createEntry(hash, key, value, bucketIndex); - } - - /** - * Resize table with given new capacity. - */ - void resize(int newCapacity) { - Entry[] oldTable = table; - int oldCapacity = oldTable.length; - if (oldCapacity == MAXIMUM_CAPACITY) { - threshold = Integer.MAX_VALUE; - return; - } - - Entry[] newTable = new Entry[newCapacity]; - transfer(newTable); - table = newTable; - threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1); - } - - /** - * Transfer entries into new table from old table. - * @param newTable new table - */ - void transfer(Entry[] newTable) { - int newCapacity = newTable.length; - for (Entry e : table) { - while (null != e) { - Entry next = e.next; - int i = indexFor(e.hash, newCapacity); - e.next = newTable[i]; - newTable[i] = e; - e = next; - } - } - } - - /** - * Remove entry associated with the given key. - */ - final Entry removeEntryForKey(Object key) { - if (size == 0) { - return null; - } - int hash = (key == null) ? 0 : hash(key); - int i = indexFor(hash, table.length); - Entry prev = table[i]; - Entry e = prev; - - while (e != null) { - Entry next = e.next; - Object k; - if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) { - size--; - if (prev == e) { - table[i] = next; - } else { - prev.next = next; - } - - return e; - } - prev = e; - e = next; - } - - return e; - } - - /** - * Returns the number of mappings in this Map. - */ - public int size() { - return size; - } - - /** - * Removes all elements from this map, leaving it empty. - */ - public void clear() { - size = 0; - for (int i = 0; i < table.length; i++) { - table[i] = null; - } - } - - /** - * Class to keep key-value data within hash map. - * @param key type. - */ - static class Entry { - final K key; - int value; - Entry next; - int hash; - - Entry(int hash, K key, int value, Entry next) { - this.key = key; - this.value = value; - this.hash = hash; - this.next = next; - } - - public final K getKey() { - return key; - } - - public final int getValue() { - return value; - } - - public final int setValue(int newValue) { - int oldValue = value; - value = newValue; - return oldValue; - } - - public final boolean equals(Object o) { - if (!(o instanceof DictionaryEncodeHashMap.Entry)) { - return false; - } - Entry e = (Entry) o; - if (Objects.equals(key, e.getKey())) { - if (value == e.getValue()) { - return true; - } - } - return false; - } - - public final int hashCode() { - return Objects.hashCode(key) ^ Objects.hashCode(value); - } - - public final String toString() { - return getKey() + "=" + getValue(); - } - } - -} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java index bf0c788d5b2..725783591d6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java @@ -255,7 +255,7 @@ public final int getIndex() { } public final boolean equals(Object o) { - if (!(o instanceof DictionaryEncodeHashMap.Entry)) { + if (!(o instanceof DictionaryHashTable.Entry)) { return false; } DictionaryHashTable.Entry e = (DictionaryHashTable.Entry) o; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ObjectIntMap.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ObjectIntMap.java deleted file mode 100644 index 582bb561cb7..00000000000 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ObjectIntMap.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector.dictionary; - -/** - * Specific hash map for int type value, reducing boxing/unboxing operations. - * @param key type. - */ -public interface ObjectIntMap { - - /** - * Associates the specified value with the specified key in this map. - * If the map previously contained a mapping for the key, the old - * value is replaced. - * @param key key with which the specified value is to be associated - * @param value value to be associated with the specified key - * @return the previous value associated with key, or - * -1 if there was no mapping for key. - */ - int put(K key, int value); - - /** - * Returns the value to which the specified key is mapped, - * or -1 if this map contains no mapping for the key. - */ - int get(K key); - - /** - * Removes the mapping for the specified key from this map if present. - * @param key key whose mapping is to be removed from the map - * @return the previous value associated with key, or - * -1 if there was no mapping for key. - */ - int remove(K key); -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryEncodeHashMap.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryEncodeHashMap.java deleted file mode 100644 index 5f4e710b8af..00000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryEncodeHashMap.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector; - -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import org.apache.arrow.vector.dictionary.DictionaryEncodeHashMap; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - - - -public class TestDictionaryEncodeHashMap { - - private List testData = new ArrayList<>(); - - private static final int SIZE = 100; - - private static final int KEY_LENGTH = 5; - - private DictionaryEncodeHashMap map = new DictionaryEncodeHashMap(); - - @Before - public void init() { - for (int i = 0; i < SIZE; i++) { - testData.add(generateUniqueKey(KEY_LENGTH)); - } - } - - @After - public void terminate() throws Exception { - testData.clear(); - } - - private String generateUniqueKey(int length) { - String str = "abcdefghijklmnopqrstuvwxyz"; - Random random = new Random(); - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < length; i++) { - int number = random.nextInt(26); - sb.append(str.charAt(number)); - } - if (testData.contains(sb.toString())) { - return generateUniqueKey(length); - } - return sb.toString(); - } - - @Test - public void testPutAndGet() { - for (int i = 0; i < SIZE; i++) { - map.put(testData.get(i), i); - } - - for (int i = 0; i < SIZE; i++) { - assertEquals(i, map.get(testData.get(i))); - } - } - - @Test - public void testPutExistKey() { - for (int i = 0; i < SIZE; i++) { - map.put(testData.get(i), i); - } - map.put("test_key", 101); - assertEquals(101, map.get("test_key")); - map.put("test_key", 102); - assertEquals(102, map.get("test_key")); - } - - @Test - public void testGetNonExistKey() { - for (int i = 0; i < SIZE; i++) { - map.put(testData.get(i), i); - } - //remove if exists. - map.remove("test_key"); - assertEquals(-1, map.get("test_key")); - } - - @Test - public void testRemove() { - for (int i = 0; i < SIZE; i++) { - map.put(testData.get(i), i); - } - map.put("test_key", 10000); - assertEquals(SIZE + 1, map.size()); - assertEquals(10000, map.get("test_key")); - map.remove("test_key"); - assertEquals(SIZE, map.size()); - assertEquals(-1, map.get("test_key")); - } - - @Test - public void testSize() { - assertEquals(0, map.size()); - for (int i = 0; i < SIZE; i++) { - map.put(testData.get(i), i); - } - assertEquals(SIZE, map.size()); - } -}