From dcd5e293698f7ec5fd6a122f96af9a7a237ceb08 Mon Sep 17 00:00:00 2001 From: Yuliya Feldman Date: Sat, 30 Sep 2017 01:14:32 -0700 Subject: [PATCH] =?UTF-8?q?ARROW-1627:=20New=20class=20to=20handle=20colle?= =?UTF-8?q?ction=20of=20BufferLedger(s)=20within=20AllocationManager=20Low?= =?UTF-8?q?CostIdentityHashMap=20uses=20array=20only=20to=20store=20object?= =?UTF-8?q?s=20since=20key=20(BaseAllocator)=20is=20already=20included=20i?= =?UTF-8?q?n=20the=20value=20(BufferLedger)=20To=20make=20this=20class=20a?= =?UTF-8?q?=20bit=20more=20generic=20it=20uses=20values=20that=20implement?= =?UTF-8?q?=20ValueWithKeyIncluded=20interface=20that=20provides=20API=20:?= =?UTF-8?q?=20getKey()=20BufferLedger=20provides=20implementation=20for=20?= =?UTF-8?q?it.=20LowCostIdentityHashMap=20is=20not=20general=20purpose=20m?= =?UTF-8?q?ap=20it=20just=20provides=20some=20=E2=80=9Cmap=20like=E2=80=9D?= =?UTF-8?q?=20APIs=20to=20simplify=20switch=20over=20from=20IdentityHashMa?= =?UTF-8?q?p.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ARROW-1627: Small improvement in hashing function. ARROW-1627: Addressing review comments --- .../arrow/memory/AllocationManager.java | 17 +- .../arrow/memory/LowCostIdentityHasMap.java | 334 ++++++++++++++++++ .../arrow/memory/ValueWithKeyIncluded.java | 26 ++ .../memory/TestLowCostIdentityHasMap.java | 169 +++++++++ 4 files changed, 541 insertions(+), 5 deletions(-) create mode 100644 java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java create mode 100644 java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java create mode 100644 java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java diff --git a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java index 6877c18f624..14687b54634 100644 --- a/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java +++ b/java/memory/src/main/java/org/apache/arrow/memory/AllocationManager.java @@ -74,7 +74,9 @@ public class AllocationManager { private final long allocatorManagerId = MANAGER_ID_GENERATOR.incrementAndGet(); private final int size; private final UnsafeDirectLittleEndian underlying; - private final IdentityHashMap map = new IdentityHashMap<>(); + // ARROW-1627 Trying to minimize memory overhead caused by previously used IdentityHashMap + // see JIRA for details + private final LowCostIdentityHasMap map = new LowCostIdentityHasMap<>(); private final ReadWriteLock lock = new ReentrantReadWriteLock(); private final AutoCloseableLock readLock = new AutoCloseableLock(lock.readLock()); private final AutoCloseableLock writeLock = new AutoCloseableLock(lock.writeLock()); @@ -144,7 +146,7 @@ private BufferLedger associate(final BaseAllocator allocator, final boolean reta if (retain) { ledger.inc(); } - BufferLedger oldLedger = map.put(allocator, ledger); + BufferLedger oldLedger = map.put(ledger); Preconditions.checkArgument(oldLedger == null); allocator.associateLedger(ledger); return ledger; @@ -174,7 +176,7 @@ private void release(final BufferLedger ledger) { } else { // we need to change the owning allocator. we've been removed so we'll get whatever is // top of list - BufferLedger newLedger = map.values().iterator().next(); + BufferLedger newLedger = map.getNextValue(); // we'll forcefully transfer the ownership and not worry about whether we exceeded the // limit @@ -196,7 +198,7 @@ private void release(final BufferLedger ledger) { * As with AllocationManager, the only reason this is public is due to ArrowBuf being in io * .netty.buffer package. */ - public class BufferLedger { + public class BufferLedger implements ValueWithKeyIncluded { private final IdentityHashMap buffers = BaseAllocator.DEBUG ? new IdentityHashMap() : null; @@ -226,6 +228,11 @@ private BaseAllocator getAllocator() { return allocator; } + @Override + public BaseAllocator getKey() { + return allocator; + } + /** * Transfer any balance the current ledger has to the target ledger. In the case that the * current ledger holds no @@ -457,4 +464,4 @@ boolean isOwningLedger() { } -} \ No newline at end of file +} diff --git a/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java b/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java new file mode 100644 index 00000000000..1153fb59364 --- /dev/null +++ b/java/memory/src/main/java/org/apache/arrow/memory/LowCostIdentityHasMap.java @@ -0,0 +1,334 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.memory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +/** + * Highly specialized IdentityHashMap that implements only partial + * Map APIs + * It incurs low initial cost (just two elements by default) + * It assumes Value includes the Key - Implements @ValueWithKeyIncluded iface + * that provides "getKey" method + * @param + */ +public class LowCostIdentityHasMap> { + + /* + * The internal data structure to hold values. + */ + private Object[] elementData; + + /* Actual number of values. */ + private int size; + + /* + * maximum number of elements that can be put in this map before having to + * rehash. + */ + private int threshold; + + private static final int DEFAULT_MIN_SIZE = 1; + + /* Default load factor of 0.75; */ + private static final int loadFactor = 7500; + + /** + * Creates an Map with default expected maximum size. + */ + public LowCostIdentityHasMap() { + this(DEFAULT_MIN_SIZE); + } + + /** + * Creates an Map with the specified maximum size parameter. + * + * @param maxSize + * The estimated maximum number of entries that will be put in + * this map. + */ + public LowCostIdentityHasMap(int maxSize) { + if (maxSize >= 0) { + this.size = 0; + threshold = getThreshold(maxSize); + elementData = newElementArray(computeElementArraySize()); + } else { + throw new IllegalArgumentException(); + } + } + + private int getThreshold(int maxSize) { + // assign the threshold to maxSize initially, this will change to a + // higher value if rehashing occurs. + return maxSize > 2 ? maxSize : 2; + } + + private int computeElementArraySize() { + int arraySize = (int) (((long) threshold * 10000) / loadFactor); + // ensure arraySize is positive, the above cast from long to int type + // leads to overflow and negative arraySize if threshold is too big + return arraySize < 0 ? -arraySize : arraySize; + } + + /** + * Create a new element array + * + * @param s + * the number of elements + * @return Reference to the element array + */ + private Object[] newElementArray(int s) { + return new Object[s]; + } + + /** + * Removes all elements from this map, leaving it empty. + * + * @see #isEmpty() + * @see #size() + */ + public void clear() { + size = 0; + for (int i = 0; i < elementData.length; i++) { + elementData[i] = null; + } + } + + /** + * Returns whether this map contains the specified key. + * + * @param key + * the key to search for. + * @return {@code true} if this map contains the specified key, + * {@code false} otherwise. + */ + public boolean containsKey(K key) { + Preconditions.checkNotNull(key); + + int index = findIndex(key, elementData); + return (elementData[index] == null) ? false : ((V)elementData[index]).getKey() == key; + } + + /** + * Returns whether this map contains the specified value. + * + * @param value + * the value to search for. + * @return {@code true} if this map contains the specified value, + * {@code false} otherwise. + */ + public boolean containsValue(V value) { + Preconditions.checkNotNull(value); + + for (int i = 0; i < elementData.length; i++) { + if (elementData[i] == value) { + return true; + } + } + return false; + } + + /** + * Returns the value of the mapping with the specified key. + * + * @param key + * the key. + * @return the value of the mapping with the specified key. + */ + public V get(K key) { + Preconditions.checkNotNull(key); + + int index = findIndex(key, elementData); + + return (elementData[index] == null) ? null : + (((V)elementData[index]).getKey() == key) ? (V)elementData[index] : null; + } + + /** + * Returns the index where the key is found at, or the index of the next + * empty spot if the key is not found in this table. + */ + @VisibleForTesting + int findIndex(Object key, Object[] array) { + int length = array.length; + int index = getModuloHash(key, length); + int last = (index + length - 1) % length; + while (index != last) { + if ((array[index] == null) || ((V)array[index]).getKey() == key) { + /* + * Found the key, or the next empty spot (which means key is not + * in the table) + */ + break; + } + index = (index+1) % length; + } + return index; + } + + @VisibleForTesting + static int getModuloHash(Object key, int length) { + return ((System.identityHashCode(key) & 0x7FFFFFFF) % length); + } + + /** + * Maps the specified key to the specified value. + * + * @param value + * the value. + * @return the value of any previous mapping with the specified key or + * {@code null} if there was no such mapping. + */ + public V put(V value) { + Preconditions.checkNotNull(value); + K _key = value.getKey(); + Preconditions.checkNotNull(_key); + V _value = value; + + int index = findIndex(_key, elementData); + + // if the key doesn't exist in the table + if (elementData[index] == null || ((V)elementData[index]).getKey() != _key) { + if (++size > threshold) { + rehash(); + index = findIndex(_key, elementData); + } + + // insert the key and assign the value to null initially + elementData[index] = null; + } + + // insert value to where it needs to go, return the old value + Object result = elementData[index]; + elementData[index] = _value; + + return (V) result; + } + + @VisibleForTesting + void rehash() { + int newlength = elementData.length * 15 / 10; + if (newlength == 0) { + newlength = 1; + } + Object[] newData = newElementArray(newlength); + for (int i = 0; i < elementData.length; i++) { + Object key = (elementData[i] == null) ? null : ((V)elementData[i]).getKey(); + if (key != null) { + // if not empty + int index = findIndex(key, newData); + newData[index] = elementData[i]; + } + } + elementData = newData; + computeMaxSize(); + } + + private void computeMaxSize() { + threshold = (int) ((long) (elementData.length) * loadFactor / 10000); + } + + /** + * Removes the mapping with the specified key from this map. + * + * @param key + * the key of the mapping to remove. + * @return the value of the removed mapping, or {@code null} if no mapping + * for the specified key was found. + */ + public V remove(K key) { + Preconditions.checkNotNull(key); + + boolean hashedOk; + int index, next, hash; + Object result, object; + index = next = findIndex(key, elementData); + + if (elementData[index] == null || ((V)elementData[index]).getKey() != key) { + return null; + } + + // store the value for this key + result = elementData[index]; + // clear value to allow movement of the rest of the elements + elementData[index] = null; + size--; + + // shift the following elements up if needed + // until we reach an empty spot + int length = elementData.length; + while (true) { + next = (next + 1) % length; + object = elementData[next]; + if (object == null) { + break; + } + + hash = getModuloHash(((V)object).getKey(), length); + hashedOk = hash > index; + if (next < index) { + hashedOk = hashedOk || (hash <= next); + } else { + hashedOk = hashedOk && (hash <= next); + } + if (!hashedOk) { + elementData[index] = object; + index = next; + elementData[index] = null; + } + } + + return (V) result; + } + + + + /** + * Returns whether this Map has no elements. + * + * @return {@code true} if this Map has no elements, + * {@code false} otherwise. + * @see #size() + */ + public boolean isEmpty() { + return size == 0; + } + + /** + * Returns the number of mappings in this Map. + * + * @return the number of mappings in this Map. + */ + public int size() { + return size; + } + + /** + * Special API to return next value - substitute of regular Map.values.iterator().next() + * @return next available value or null if none available + */ + public V getNextValue() { + for (int i = 0; i < elementData.length; i++) { + if (elementData[i] != null) { + return (V)elementData[i]; + } + } + return null; + } +} \ No newline at end of file diff --git a/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java new file mode 100644 index 00000000000..7bd9cecf970 --- /dev/null +++ b/java/memory/src/main/java/org/apache/arrow/memory/ValueWithKeyIncluded.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.memory; + +/** + * Helper Iface to generify a value to be included in the map where + * key is part of the value + */ +public interface ValueWithKeyIncluded { + K getKey(); +} diff --git a/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java b/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java new file mode 100644 index 00000000000..c1196147b49 --- /dev/null +++ b/java/memory/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHasMap.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.memory; + +import static junit.framework.TestCase.assertNotNull; +import static junit.framework.TestCase.assertTrue; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.junit.Test; + +/** + * To test simplified implementation of IdentityHashMap + */ +public class TestLowCostIdentityHasMap { + + @Test + public void testIdentityHashMap() throws Exception { + LowCostIdentityHasMap hashMap = new LowCostIdentityHasMap<>(); + + StringWithKey obj1 = new StringWithKey("s1key", "s1value"); + StringWithKey obj2 = new StringWithKey("s2key", "s2value"); + StringWithKey obj3 = new StringWithKey("s3key", "s3value"); + StringWithKey obj4 = new StringWithKey("s1key", "s4value"); + StringWithKey obj5 = new StringWithKey("s5key", "s5value"); + + assertNull(hashMap.put(obj1)); + assertNull(hashMap.put(obj2)); + assertNull(hashMap.put(obj3)); + assertEquals(obj1, hashMap.put(obj4)); + assertNull(hashMap.put(obj5)); + + assertEquals(4, hashMap.size()); + + assertEquals(obj4,hashMap.get("s1key")); + + assertNull(hashMap.remove("abc")); + + assertEquals(obj3,hashMap.remove("s3key")); + + assertEquals(3, hashMap.size()); + + assertTrue(!hashMap.isEmpty()); + + StringWithKey nextValue = hashMap.getNextValue(); + + assertNotNull(nextValue); + + assertTrue((hashMap.get("s1key") == nextValue || hashMap.get("s2key") == nextValue || + hashMap.get("s5key") == nextValue)); + + assertTrue(hashMap.containsValue(obj4)); + assertTrue(hashMap.containsValue(obj2)); + assertTrue(hashMap.containsValue(obj5)); + + assertEquals(obj4,hashMap.remove("s1key")); + + nextValue = hashMap.getNextValue(); + + assertNotNull(nextValue); + + assertTrue(hashMap.get("s2key") == nextValue || hashMap.get("s5key") == nextValue); + + assertEquals(2, hashMap.size()); + + assertEquals(obj2,hashMap.remove("s2key")); + assertEquals(obj5,hashMap.remove("s5key")); + + assertEquals(0, hashMap.size()); + + assertTrue(hashMap.isEmpty()); + } + + @Test + public void testLargeMap() throws Exception { + LowCostIdentityHasMap hashMap = new LowCostIdentityHasMap<>(); + + String [] keys = new String[200]; + for (int i = 0; i < 200; i++) { + keys[i] = "s"+i+"key"; + } + + for (int i = 0; i < 100; i++) { + if (i % 5 == 0 && i != 0) { + StringWithKey obj = new StringWithKey(keys[i-5], "s" + i + "value"); + StringWithKey retObj = hashMap.put(obj); + assertNotNull(retObj); + StringWithKey obj1 = new StringWithKey(keys[i], "s" + 2*i + "value"); + StringWithKey retObj1 = hashMap.put(obj1); + assertNull(retObj1); + } else { + StringWithKey obj = new StringWithKey(keys[i], "s" + i + "value"); + StringWithKey retObj = hashMap.put(obj); + assertNull(retObj); + } + } + assertEquals(100, hashMap.size()); + for (int i = 0; i < 100; i++) { + StringWithKey returnObj = hashMap.get(keys[i]); + assertNotNull(returnObj); + if (i == 95) { + assertEquals("s190value", returnObj.getValue()); + continue; + } + if (i % 5 == 0) { + assertEquals("s" + (i+5) + "value", returnObj.getValue()); + } else { + assertEquals("s" + i + "value", returnObj.getValue()); + } + } + + for (int i = 0; i < 100; i++) { + if (i % 4 == 0) { + StringWithKey returnObj = hashMap.remove(keys[i]); + assertNotNull(returnObj); + assertTrue(!hashMap.containsKey(keys[i])); + } + StringWithKey obj = new StringWithKey(keys[100+i], "s" + (100+i) + "value"); + StringWithKey retObj = hashMap.put(obj); + assertNull(retObj); + assertTrue(hashMap.containsKey(keys[100+i])); + } + assertEquals(175, hashMap.size()); + for (int i = 0; i < 100; i++) { + StringWithKey retObj = hashMap.getNextValue(); + assertNotNull(retObj); + hashMap.remove(retObj.getKey()); + } + assertTrue(!hashMap.isEmpty()); + assertEquals(75, hashMap.size()); + hashMap.clear(); + assertTrue(hashMap.isEmpty()); + } + + private class StringWithKey implements ValueWithKeyIncluded { + + private String myValue; + private String myKey; + + StringWithKey(String myKey, String myValue) { + this.myKey = myKey; + this.myValue = myValue; + } + + @Override + public String getKey() { + return myKey; + } + + String getValue() { + return myValue; + } + } +}