From 314d697ceead61acea7fae27c7212b262a8ecb34 Mon Sep 17 00:00:00 2001 From: tianchen Date: Wed, 10 Jul 2019 17:12:24 +0800 Subject: [PATCH 1/8] ARROW-1184: [Java] Dictionary.equals is not working correctly --- .../arrow/vector/dictionary/Dictionary.java | 32 ++- .../apache/arrow/vector/TestDictionary.java | 192 ++++++++++++++++++ 2 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index ccbdc9c19f4..c37e2404e72 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -17,6 +17,7 @@ package org.apache.arrow.vector.dictionary; +import java.util.Arrays; import java.util.Objects; import org.apache.arrow.vector.FieldVector; @@ -63,11 +64,40 @@ public boolean equals(Object o) { return false; } Dictionary that = (Dictionary) o; - return Objects.equals(encoding, that.encoding) && Objects.equals(dictionary, that.dictionary); + return Objects.equals(encoding, that.encoding) && equals(dictionary, that.dictionary); } @Override public int hashCode() { return Objects.hash(encoding, dictionary); } + + private boolean equals(FieldVector vector1, FieldVector vector2) { + + if (vector1.getClass() != vector2.getClass()) { + return false; + } + int valueCount = vector1.getValueCount(); + if (valueCount != vector2.getValueCount()) { + return false; + } + ArrowType fieldType = vector1.getField().getType(); + for (int j = 0; j < valueCount; j++) { + Object obj1 = vector1.getObject(j); + Object obj2 = vector2.getObject(j); + if (!equals(fieldType, obj1, obj2)) { + return false; + } + } + return true; + } + + private boolean equals(ArrowType type, final Object o1, final Object o2) { + if (type instanceof ArrowType.Binary || type instanceof ArrowType.FixedSizeBinary) { + //TODO use ByteArrayWrapper to compare, see ARROW-5835 + return Arrays.equals((byte[]) o1, (byte[]) o2); + } + + return Objects.equals(o1, o2); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java new file mode 100644 index 00000000000..6c0e721c29b --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.complex.impl.NullableStructWriter; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.holders.NullableIntHolder; +import org.apache.arrow.vector.holders.NullableUInt4Holder; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestDictionary { + + private BufferAllocator allocator; + + @Before + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @After + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testEquals() { + + //test Int + try (final IntVector vector1 = new IntVector("v1", allocator); + final IntVector vector2 = new IntVector("v2", allocator)) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + vector1.allocateNew(3); + vector1.setValueCount(3); + vector2.allocateNew(3); + vector2.setValueCount(3); + + vector1.setSafe(0, 1); + vector1.setSafe(1, 2); + vector1.setSafe(2, 3); + + vector2.setSafe(0, 1); + vector2.setSafe(1, 2); + vector2.setSafe(2, 0); + + + assertFalse(dict1.equals(dict2)); + + vector2.setSafe(2, 3); + assertTrue(dict1.equals(dict2)); + + } + + //test List + try (final ListVector vector1 = ListVector.empty("v1", allocator); + final ListVector vector2 = ListVector.empty("v2", allocator);) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + UnionListWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + //set some values + writeListVector(writer1, new int[]{1, 2}); + writeListVector(writer1, new int[]{3, 4}); + writeListVector(writer1, new int[]{5, 6}); + writer1.setValueCount(3); + + UnionListWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + //set some values + writeListVector(writer2, new int[]{1, 2}); + writeListVector(writer2, new int[]{3, 4}); + writeListVector(writer2, new int[]{5, 6}); + writer2.setValueCount(3); + + assertTrue(dict1.equals(dict2)); + + } + + //test Struct + try (final StructVector vector1 = StructVector.empty("v1", allocator); + final StructVector vector2 = StructVector.empty("v2", allocator);) { + vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); + vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + NullableStructWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + writeStructVector(writer1, 1, 10L); + writeStructVector(writer1, 2, 20L); + writer1.setValueCount(2); + + NullableStructWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + writeStructVector(writer2, 1, 10L); + writeStructVector(writer2, 2, 20L); + writer2.setValueCount(2); + + assertTrue(dict1.equals(dict2)); + + } + + //test Union + try (final UnionVector vector1 = new UnionVector("v1", allocator, null); + final UnionVector vector2 = new UnionVector("v1", allocator, null);) { + + final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); + uInt4Holder.value = 10; + uInt4Holder.isSet = 1; + + final NullableIntHolder intHolder = new NullableIntHolder(); + uInt4Holder.value = 20; + uInt4Holder.isSet = 1; + + vector1.setType(0, Types.MinorType.UINT4); + vector1.setSafe(0, uInt4Holder); + + vector1.setType(2, Types.MinorType.INT); + vector1.setSafe(2, intHolder); + vector1.setValueCount(3); + + vector2.setType(0, Types.MinorType.UINT4); + vector2.setSafe(0, uInt4Holder); + + vector2.setType(2, Types.MinorType.INT); + vector2.setSafe(2, intHolder); + vector2.setValueCount(3); + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + assertTrue(dict1.equals(dict2)); + + } + + } + + private void writeStructVector(NullableStructWriter writer, int value1, long value2) { + writer.start(); + writer.integer("f0").writeInt(value1); + writer.bigInt("f1").writeBigInt(value2); + writer.end(); + } + + private void writeListVector(UnionListWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } +} From be74915615352cc2918d8035db09f9b866213ef1 Mon Sep 17 00:00:00 2001 From: tianchen Date: Fri, 12 Jul 2019 10:11:09 +0800 Subject: [PATCH 2/8] move UT --- .../arrow/vector/dictionary/Dictionary.java | 2 +- .../apache/arrow/vector/TestDictionary.java | 192 ------------------ .../arrow/vector/TestDictionaryVector.java | 170 ++++++++++++++-- 3 files changed, 149 insertions(+), 215 deletions(-) delete mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index c37e2404e72..96bc8ac18ef 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -94,7 +94,7 @@ private boolean equals(FieldVector vector1, FieldVector vector2) { private boolean equals(ArrowType type, final Object o1, final Object o2) { if (type instanceof ArrowType.Binary || type instanceof ArrowType.FixedSizeBinary) { - //TODO use ByteArrayWrapper to compare, see ARROW-5835 + //TODO use ByteArrayWrapper to compare and add UT, see ARROW-5835 return Arrays.equals((byte[]) o1, (byte[]) o2); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java deleted file mode 100644 index 6c0e721c29b..00000000000 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionary.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.vector; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import org.apache.arrow.memory.BufferAllocator; -import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.StructVector; -import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.complex.impl.NullableStructWriter; -import org.apache.arrow.vector.complex.impl.UnionListWriter; -import org.apache.arrow.vector.dictionary.Dictionary; -import org.apache.arrow.vector.holders.NullableIntHolder; -import org.apache.arrow.vector.holders.NullableUInt4Holder; -import org.apache.arrow.vector.types.Types; -import org.apache.arrow.vector.types.pojo.ArrowType; -import org.apache.arrow.vector.types.pojo.DictionaryEncoding; -import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class TestDictionary { - - private BufferAllocator allocator; - - @Before - public void init() { - allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); - } - - @After - public void terminate() throws Exception { - allocator.close(); - } - - @Test - public void testEquals() { - - //test Int - try (final IntVector vector1 = new IntVector("v1", allocator); - final IntVector vector2 = new IntVector("v2", allocator)) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - vector1.allocateNew(3); - vector1.setValueCount(3); - vector2.allocateNew(3); - vector2.setValueCount(3); - - vector1.setSafe(0, 1); - vector1.setSafe(1, 2); - vector1.setSafe(2, 3); - - vector2.setSafe(0, 1); - vector2.setSafe(1, 2); - vector2.setSafe(2, 0); - - - assertFalse(dict1.equals(dict2)); - - vector2.setSafe(2, 3); - assertTrue(dict1.equals(dict2)); - - } - - //test List - try (final ListVector vector1 = ListVector.empty("v1", allocator); - final ListVector vector2 = ListVector.empty("v2", allocator);) { - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - UnionListWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - //set some values - writeListVector(writer1, new int[]{1, 2}); - writeListVector(writer1, new int[]{3, 4}); - writeListVector(writer1, new int[]{5, 6}); - writer1.setValueCount(3); - - UnionListWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - //set some values - writeListVector(writer2, new int[]{1, 2}); - writeListVector(writer2, new int[]{3, 4}); - writeListVector(writer2, new int[]{5, 6}); - writer2.setValueCount(3); - - assertTrue(dict1.equals(dict2)); - - } - - //test Struct - try (final StructVector vector1 = StructVector.empty("v1", allocator); - final StructVector vector2 = StructVector.empty("v2", allocator);) { - vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); - vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - NullableStructWriter writer1 = vector1.getWriter(); - writer1.allocate(); - - writeStructVector(writer1, 1, 10L); - writeStructVector(writer1, 2, 20L); - writer1.setValueCount(2); - - NullableStructWriter writer2 = vector2.getWriter(); - writer2.allocate(); - - writeStructVector(writer2, 1, 10L); - writeStructVector(writer2, 2, 20L); - writer2.setValueCount(2); - - assertTrue(dict1.equals(dict2)); - - } - - //test Union - try (final UnionVector vector1 = new UnionVector("v1", allocator, null); - final UnionVector vector2 = new UnionVector("v1", allocator, null);) { - - final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); - uInt4Holder.value = 10; - uInt4Holder.isSet = 1; - - final NullableIntHolder intHolder = new NullableIntHolder(); - uInt4Holder.value = 20; - uInt4Holder.isSet = 1; - - vector1.setType(0, Types.MinorType.UINT4); - vector1.setSafe(0, uInt4Holder); - - vector1.setType(2, Types.MinorType.INT); - vector1.setSafe(2, intHolder); - vector1.setValueCount(3); - - vector2.setType(0, Types.MinorType.UINT4); - vector2.setSafe(0, uInt4Holder); - - vector2.setType(2, Types.MinorType.INT); - vector2.setSafe(2, intHolder); - vector2.setValueCount(3); - - Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); - Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); - - assertTrue(dict1.equals(dict2)); - - } - - } - - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } - - private void writeListVector(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v: values) { - writer.integer().writeInt(v); - } - writer.endList(); - } -} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index e0bd218d47b..906db710d78 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -20,6 +20,7 @@ import static org.apache.arrow.vector.TestUtils.newVarBinaryVector; import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.nio.charset.StandardCharsets; @@ -67,7 +68,7 @@ public void terminate() throws Exception { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = newVarCharVector("foo", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) { + final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) { vector.allocateNew(512, 5); // set some values @@ -85,13 +86,14 @@ public void testEncodeStrings() { dictionaryVector.setSafe(2, two, 0, two.length); dictionaryVector.setValueCount(3); - Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + Dictionary dictionary = + new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); try (final ValueVector encoded = (FieldVector) DictionaryEncoder.encode(vector, dictionary)) { // verify indices assertEquals(IntVector.class, encoded.getClass()); - IntVector index = ((IntVector)encoded); + IntVector index = ((IntVector) encoded); assertEquals(5, index.getValueCount()); assertEquals(0, index.get(0)); assertEquals(1, index.get(1)); @@ -102,9 +104,9 @@ public void testEncodeStrings() { // now run through the decoder and verify we get the original back try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) { assertEquals(vector.getClass(), decoded.getClass()); - assertEquals(vector.getValueCount(), ((VarCharVector)decoded).getValueCount()); + assertEquals(vector.getValueCount(), ((VarCharVector) decoded).getValueCount()); for (int i = 0; i < 5; i++) { - assertEquals(vector.getObject(i), ((VarCharVector)decoded).getObject(i)); + assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i)); } } } @@ -115,7 +117,7 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = newVarCharVector("foo", allocator); - final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) { + final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) { vector.allocateNew(); int count = 10000; @@ -131,7 +133,8 @@ public void testEncodeLargeVector() { dictionaryVector.setSafe(2, two, 0, two.length); dictionaryVector.setValueCount(3); - Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + Dictionary dictionary = + new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); try (final ValueVector encoded = (FieldVector) DictionaryEncoder.encode(vector, dictionary)) { @@ -156,14 +159,6 @@ public void testEncodeLargeVector() { } } - private void writeListVector(UnionListWriter writer, int[] values) { - writer.startList(); - for (int v: values) { - writer.integer().writeInt(v); - } - writer.endList(); - } - @Test public void testEncodeList() { // Create a new value vector @@ -218,13 +213,6 @@ public void testEncodeList() { } } - private void writeStructVector(NullableStructWriter writer, int value1, long value2) { - writer.start(); - writer.integer("f0").writeInt(value1); - writer.bigInt("f1").writeBigInt(value2); - writer.end(); - } - @Test public void testEncodeStruct() { // Create a new value vector @@ -406,4 +394,142 @@ public void testEncodeUnion() { } } } + + @Test + public void testIntEquals() { + //test Int + try (final IntVector vector1 = new IntVector("v1", allocator); + final IntVector vector2 = new IntVector("v2", allocator)) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + vector1.allocateNew(3); + vector1.setValueCount(3); + vector2.allocateNew(3); + vector2.setValueCount(3); + + vector1.setSafe(0, 1); + vector1.setSafe(1, 2); + vector1.setSafe(2, 3); + + vector2.setSafe(0, 1); + vector2.setSafe(1, 2); + vector2.setSafe(2, 0); + + assertFalse(dict1.equals(dict2)); + + vector2.setSafe(2, 3); + assertTrue(dict1.equals(dict2)); + } + } + + @Test + public void testListEquals() { + try (final ListVector vector1 = ListVector.empty("v1", allocator); + final ListVector vector2 = ListVector.empty("v2", allocator);) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + UnionListWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + //set some values + writeListVector(writer1, new int[] {1, 2}); + writeListVector(writer1, new int[] {3, 4}); + writeListVector(writer1, new int[] {5, 6}); + writer1.setValueCount(3); + + UnionListWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + //set some values + writeListVector(writer2, new int[] {1, 2}); + writeListVector(writer2, new int[] {3, 4}); + writeListVector(writer2, new int[] {5, 6}); + writer2.setValueCount(3); + + assertTrue(dict1.equals(dict2)); + } + } + + @Test + public void testStructEquals() { + try (final StructVector vector1 = StructVector.empty("v1", allocator); + final StructVector vector2 = StructVector.empty("v2", allocator);) { + vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); + vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + NullableStructWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + writeStructVector(writer1, 1, 10L); + writeStructVector(writer1, 2, 20L); + writer1.setValueCount(2); + + NullableStructWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + writeStructVector(writer2, 1, 10L); + writeStructVector(writer2, 2, 20L); + writer2.setValueCount(2); + + assertTrue(dict1.equals(dict2)); + } + } + + @Test + public void testUnionEquals() { + try (final UnionVector vector1 = new UnionVector("v1", allocator, null); + final UnionVector vector2 = new UnionVector("v1", allocator, null);) { + + final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); + uInt4Holder.value = 10; + uInt4Holder.isSet = 1; + + final NullableIntHolder intHolder = new NullableIntHolder(); + uInt4Holder.value = 20; + uInt4Holder.isSet = 1; + + vector1.setType(0, Types.MinorType.UINT4); + vector1.setSafe(0, uInt4Holder); + + vector1.setType(2, Types.MinorType.INT); + vector1.setSafe(2, intHolder); + vector1.setValueCount(3); + + vector2.setType(0, Types.MinorType.UINT4); + vector2.setSafe(0, uInt4Holder); + + vector2.setType(2, Types.MinorType.INT); + vector2.setSafe(2, intHolder); + vector2.setValueCount(3); + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + assertTrue(dict1.equals(dict2)); + } + } + + private void writeStructVector(NullableStructWriter writer, int value1, long value2) { + writer.start(); + writer.integer("f0").writeInt(value1); + writer.bigInt("f1").writeBigInt(value2); + writer.end(); + } + + private void writeListVector(UnionListWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } } From 3fe9389cc659e1f4eb2230ca83a2d2bce21675e0 Mon Sep 17 00:00:00 2001 From: tianchen Date: Fri, 19 Jul 2019 11:43:32 +0800 Subject: [PATCH 3/8] use Validator logic in Dictionary#equals and add UT --- .../arrow/vector/dictionary/Dictionary.java | 28 ++------- .../apache/arrow/vector/util/Validator.java | 2 +- .../arrow/vector/TestDictionaryVector.java | 58 +++++++++++++++++++ 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index 96bc8ac18ef..5024cdfc756 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -17,12 +17,12 @@ package org.apache.arrow.vector.dictionary; -import java.util.Arrays; import java.util.Objects; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.util.Validator; /** * A dictionary (integer to Value mapping) that is used to facilitate @@ -73,31 +73,11 @@ public int hashCode() { } private boolean equals(FieldVector vector1, FieldVector vector2) { - - if (vector1.getClass() != vector2.getClass()) { - return false; - } - int valueCount = vector1.getValueCount(); - if (valueCount != vector2.getValueCount()) { + try { + Validator.compareFieldVectors(vector1, vector2); + } catch (IllegalArgumentException e) { return false; } - ArrowType fieldType = vector1.getField().getType(); - for (int j = 0; j < valueCount; j++) { - Object obj1 = vector1.getObject(j); - Object obj2 = vector2.getObject(j); - if (!equals(fieldType, obj1, obj2)) { - return false; - } - } return true; } - - private boolean equals(ArrowType type, final Object o1, final Object o2) { - if (type instanceof ArrowType.Binary || type instanceof ArrowType.FixedSizeBinary) { - //TODO use ByteArrayWrapper to compare and add UT, see ARROW-5835 - return Arrays.equals((byte[]) o1, (byte[]) o2); - } - - return Objects.equals(o1, o2); - } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java index 241f569c990..2646e046509 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java @@ -117,7 +117,7 @@ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaR */ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) { Field field1 = vector1.getField(); - if (!field1.equals(vector2.getField())) { + if (vector1.getClass() != vector2.getClass()) { throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" + vector2.getField()); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 906db710d78..b5b04016bb1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -424,6 +424,64 @@ public void testIntEquals() { } } + @Test + public void testVarcharEquals() { + try (final VarCharVector vector1 = new VarCharVector("v1", allocator); + final VarCharVector vector2 = new VarCharVector("v2", allocator)) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + vector1.allocateNew(); + vector1.setValueCount(3); + vector2.allocateNew(); + vector2.setValueCount(3); + + // set some values + vector1.setSafe(0, zero, 0, zero.length); + vector1.setSafe(1, one, 0, one.length); + vector1.setSafe(2, two, 0, two.length); + + vector2.setSafe(0, zero, 0, zero.length); + vector2.setSafe(1, one, 0, one.length); + vector2.setSafe(2, one, 0, one.length); + + assertFalse(dict1.equals(dict2)); + + vector2.setSafe(2, two, 0, two.length); + assertTrue(dict1.equals(dict2)); + } + } + + @Test + public void testVarBinaryEquals() { + try (final VarBinaryVector vector1 = new VarBinaryVector("v1", allocator); + final VarBinaryVector vector2 = new VarBinaryVector("v2", allocator)) { + + Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); + Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); + + vector1.allocateNew(); + vector1.setValueCount(3); + vector2.allocateNew(); + vector2.setValueCount(3); + + // set some values + vector1.setSafe(0, zero, 0, zero.length); + vector1.setSafe(1, one, 0, one.length); + vector1.setSafe(2, two, 0, two.length); + + vector2.setSafe(0, zero, 0, zero.length); + vector2.setSafe(1, one, 0, one.length); + vector2.setSafe(2, one, 0, one.length); + + assertFalse(dict1.equals(dict2)); + + vector2.setSafe(2, two, 0, two.length); + assertTrue(dict1.equals(dict2)); + } + } + @Test public void testListEquals() { try (final ListVector vector1 = ListVector.empty("v1", allocator); From 87b7f66ea777073a32cff9c2dfddc37e739ebdd1 Mon Sep 17 00:00:00 2001 From: tianchen Date: Fri, 19 Jul 2019 11:47:04 +0800 Subject: [PATCH 4/8] fix --- .../java/org/apache/arrow/vector/dictionary/Dictionary.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index 5024cdfc756..794830d0e90 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -64,7 +64,7 @@ public boolean equals(Object o) { return false; } Dictionary that = (Dictionary) o; - return Objects.equals(encoding, that.encoding) && equals(dictionary, that.dictionary); + return encoding.equals(that.encoding) && compareFieldVector(dictionary, that.dictionary); } @Override @@ -72,7 +72,7 @@ public int hashCode() { return Objects.hash(encoding, dictionary); } - private boolean equals(FieldVector vector1, FieldVector vector2) { + private boolean compareFieldVector(FieldVector vector1, FieldVector vector2) { try { Validator.compareFieldVectors(vector1, vector2); } catch (IllegalArgumentException e) { From 1f41366d6e968fd7403073ae59a9ee5bd74e38a9 Mon Sep 17 00:00:00 2001 From: tianchen Date: Fri, 19 Jul 2019 14:22:53 +0800 Subject: [PATCH 5/8] fix build --- java/vector/src/main/codegen/templates/UnionVector.java | 6 +++++- .../java/org/apache/arrow/vector/TestDictionaryVector.java | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index b05005dad6a..c79dfd00863 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -518,7 +518,11 @@ private ValueVector getVector(int index) { } public Object getObject(int index) { - return getVector(index).getObject(index); + ValueVector vector = getVector(index); + if (vector != null) { + return vector.getObject(index); + } + return null; } public byte[] get(int index) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index b5b04016bb1..83041cf1d34 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -545,7 +545,7 @@ public void testStructEquals() { @Test public void testUnionEquals() { try (final UnionVector vector1 = new UnionVector("v1", allocator, null); - final UnionVector vector2 = new UnionVector("v1", allocator, null);) { + final UnionVector vector2 = new UnionVector("v2", allocator, null);) { final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); uInt4Holder.value = 10; From 0c69588545ef91ed01e0705ccfaa5a06fd665a44 Mon Sep 17 00:00:00 2001 From: tianchen Date: Tue, 23 Jul 2019 10:21:42 +0800 Subject: [PATCH 6/8] fix equals --- .../java/org/apache/arrow/vector/dictionary/Dictionary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index 794830d0e90..1944625f26c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -64,7 +64,7 @@ public boolean equals(Object o) { return false; } Dictionary that = (Dictionary) o; - return encoding.equals(that.encoding) && compareFieldVector(dictionary, that.dictionary); + return Objects.equals(encoding, that.encoding) && compareFieldVector(dictionary, that.dictionary); } @Override From 527a9d7469d04796739408de774715a4b1436889 Mon Sep 17 00:00:00 2001 From: tianchen Date: Wed, 24 Jul 2019 10:17:50 +0800 Subject: [PATCH 7/8] add TODO --- .../main/java/org/apache/arrow/vector/dictionary/Dictionary.java | 1 + 1 file changed, 1 insertion(+) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java index 1944625f26c..082d2ba1744 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java @@ -72,6 +72,7 @@ public int hashCode() { return Objects.hash(encoding, dictionary); } + //TODO after vector api support compare two vectors, this should be cleaned up private boolean compareFieldVector(FieldVector vector1, FieldVector vector2) { try { Validator.compareFieldVectors(vector1, vector2); From 3511857796706a4671e1167913ccaf709eb30a49 Mon Sep 17 00:00:00 2001 From: tianchen Date: Thu, 25 Jul 2019 11:26:36 +0800 Subject: [PATCH 8/8] revert --- .../apache/arrow/vector/util/Validator.java | 2 +- .../arrow/vector/TestDictionaryVector.java | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java index 2646e046509..241f569c990 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java @@ -117,7 +117,7 @@ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaR */ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) { Field field1 = vector1.getField(); - if (vector1.getClass() != vector2.getClass()) { + if (!field1.equals(vector2.getField())) { throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" + vector2.getField()); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 83041cf1d34..2d6391b33c1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -398,8 +398,8 @@ public void testEncodeUnion() { @Test public void testIntEquals() { //test Int - try (final IntVector vector1 = new IntVector("v1", allocator); - final IntVector vector2 = new IntVector("v2", allocator)) { + try (final IntVector vector1 = new IntVector("", allocator); + final IntVector vector2 = new IntVector("", allocator)) { Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); @@ -426,8 +426,8 @@ public void testIntEquals() { @Test public void testVarcharEquals() { - try (final VarCharVector vector1 = new VarCharVector("v1", allocator); - final VarCharVector vector2 = new VarCharVector("v2", allocator)) { + try (final VarCharVector vector1 = new VarCharVector("", allocator); + final VarCharVector vector2 = new VarCharVector("", allocator)) { Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); @@ -455,8 +455,8 @@ public void testVarcharEquals() { @Test public void testVarBinaryEquals() { - try (final VarBinaryVector vector1 = new VarBinaryVector("v1", allocator); - final VarBinaryVector vector2 = new VarBinaryVector("v2", allocator)) { + try (final VarBinaryVector vector1 = new VarBinaryVector("", allocator); + final VarBinaryVector vector2 = new VarBinaryVector("", allocator)) { Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); @@ -484,8 +484,8 @@ public void testVarBinaryEquals() { @Test public void testListEquals() { - try (final ListVector vector1 = ListVector.empty("v1", allocator); - final ListVector vector2 = ListVector.empty("v2", allocator);) { + try (final ListVector vector1 = ListVector.empty("", allocator); + final ListVector vector2 = ListVector.empty("", allocator);) { Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null)); Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null)); @@ -514,8 +514,8 @@ public void testListEquals() { @Test public void testStructEquals() { - try (final StructVector vector1 = StructVector.empty("v1", allocator); - final StructVector vector2 = StructVector.empty("v2", allocator);) { + try (final StructVector vector1 = StructVector.empty("", allocator); + final StructVector vector2 = StructVector.empty("", allocator);) { vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class); vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); @@ -544,8 +544,8 @@ public void testStructEquals() { @Test public void testUnionEquals() { - try (final UnionVector vector1 = new UnionVector("v1", allocator, null); - final UnionVector vector2 = new UnionVector("v2", allocator, null);) { + try (final UnionVector vector1 = new UnionVector("", allocator, null); + final UnionVector vector2 = new UnionVector("", allocator, null);) { final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); uInt4Holder.value = 10;