From 4d8b570df092d3485dff2eaa087ca3d7f00d4eb2 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Thu, 17 Oct 2019 18:04:07 +0800 Subject: [PATCH 1/6] [ARROW-6738][Java] Fix problems with current union comparison logic --- .../main/codegen/templates/UnionVector.java | 2 +- .../vector/compare/RangeEqualsVisitor.java | 34 +++++++++------ .../apache/arrow/vector/TestValueVector.java | 2 +- .../compare/TestRangeEqualsVisitor.java | 42 +++++++++++++++++++ 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java index db3c8a89f5e..2c28c66beb3 100644 --- a/java/vector/src/main/codegen/templates/UnionVector.java +++ b/java/vector/src/main/codegen/templates/UnionVector.java @@ -550,7 +550,7 @@ public Iterator iterator() { return vectors.iterator(); } - private ValueVector getVector(int index) { + public ValueVector getVector(int index) { int type = typeBuffer.getByte(index * TYPE_WIDTH); switch (MinorType.values()[type]) { case NULL: diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 49d6125a82c..0cd9d10b14f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -24,7 +24,6 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -189,9 +188,15 @@ public Boolean visit(NullVector left, Range range) { /** * Creates a visitor to visit child vectors. * It is used for complex vector types. - * @return the visitor for child vecors. + * @return the visitor for child vectors. */ protected RangeEqualsVisitor createInnerVisitor(ValueVector leftInner, ValueVector rightInner) { + return this.createInnerVisitor(leftInner, rightInner, this.typeComparator); + } + + protected RangeEqualsVisitor createInnerVisitor( + ValueVector leftInner, ValueVector rightInner, + BiFunction typeComparator) { return new RangeEqualsVisitor(leftInner, rightInner, typeComparator); } @@ -199,16 +204,21 @@ protected boolean compareUnionVectors(Range range) { UnionVector leftVector = (UnionVector) left; UnionVector rightVector = (UnionVector) right; - List leftChildren = leftVector.getChildrenFromFields(); - List rightChildren = rightVector.getChildrenFromFields(); - - if (leftChildren.size() != rightChildren.size()) { - return false; - } - - for (int k = 0; k < leftChildren.size(); k++) { - RangeEqualsVisitor visitor = createInnerVisitor(leftChildren.get(k), rightChildren.get(k)); - if (!visitor.rangeEquals(range)) { + Range subRange = new Range(0, 0, 1); + for (int i = 0; i < range.getLength(); i++) { + subRange.setLeftStart(range.getLeftStart() + i).setRightStart(range.getRightStart() + i); + ValueVector leftSubVector = leftVector.getVector(range.getLeftStart() + i); + ValueVector rightSubVector = rightVector.getVector(range.getRightStart() + i); + + if (leftSubVector == null || rightSubVector == null) { + if (leftSubVector == rightSubVector) { + continue; + } else { + return false; + } + } + RangeEqualsVisitor visitor = createInnerVisitor(leftSubVector, rightSubVector, true); + if (!visitor.rangeEquals(subRange)) { return false; } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 65bc963acb5..308c88ac62d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -2320,7 +2320,7 @@ public void testDecimalVectorEquals() { VectorEqualsVisitor visitor2 = new VectorEqualsVisitor(); assertTrue(visitor1.vectorEquals(vector1, vector2)); - assertFalse(visitor2.vectorEquals(vector1, vector3)); + assertTrue(visitor2.vectorEquals(vector1, vector3)); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index 183c8abac18..45fc7958f10 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -282,6 +282,48 @@ public void testUnionVectorRangeEquals() { } } + /** + * Test comparing two union vectors. + * The two vectors are different in total, but have a range with equal values. + */ + @Test + public void testUnionVectorSubRangeEquals() { + try (final UnionVector vector1 = new UnionVector("union", allocator, null, null); + final UnionVector vector2 = new UnionVector("union", allocator, null, null);) { + + final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder(); + uInt4Holder.value = 10; + uInt4Holder.isSet = 1; + + final NullableIntHolder intHolder = new NullableIntHolder(); + uInt4Holder.value = 20; + uInt4Holder.isSet = 1; + + vector1.setType(0, Types.MinorType.UINT4); + vector1.setSafe(0, uInt4Holder); + + vector1.setType(1, Types.MinorType.INT); + vector1.setSafe(1, intHolder); + + vector1.setType(2, Types.MinorType.INT); + vector1.setSafe(2, intHolder); + vector1.setValueCount(3); + + vector2.setType(0, Types.MinorType.SMALLINT); + vector2.setSafe(0, intHolder); + + vector2.setType(1, Types.MinorType.INT); + vector2.setSafe(1, intHolder); + + vector2.setType(2, Types.MinorType.INT); + vector2.setSafe(2, intHolder); + vector2.setValueCount(3); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + assertTrue(visitor.rangeEquals(new Range(1, 1, 2))); + } + } + @Ignore @Test public void testEqualsWithOutTypeCheck() { From 5b2225e3c7408f4e61c370e80de1de61b11a80db Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 18 Oct 2019 14:00:35 +0800 Subject: [PATCH 2/6] [ARROW-6738][Java] Fix the bug with decimal vector --- .../vector/compare/RangeEqualsVisitor.java | 18 ++++++++++++++++++ .../apache/arrow/vector/TestValueVector.java | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 0cd9d10b14f..a6d0e6193c3 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -24,6 +24,7 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -31,6 +32,7 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.types.pojo.ArrowType; /** * Visitor to compare a range of values for vectors. @@ -244,7 +246,23 @@ protected boolean compareStructVectors(Range range) { return true; } + private boolean compareDecimalVectorTypes() { + ArrowType.Decimal leftType = (ArrowType.Decimal) left.getField().getType(); + ArrowType.Decimal rightType = (ArrowType.Decimal) right.getField().getType(); + + if (leftType.getPrecision() != rightType.getPrecision() || leftType.getScale() != rightType.getScale()) { + return false; + } + return true; + } + protected boolean compareBaseFixedWidthVectors(Range range) { + if (left instanceof DecimalVector) { + if (!compareDecimalVectorTypes()) { + return false; + } + } + BaseFixedWidthVector leftVector = (BaseFixedWidthVector) left; BaseFixedWidthVector rightVector = (BaseFixedWidthVector) right; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 308c88ac62d..65bc963acb5 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -2320,7 +2320,7 @@ public void testDecimalVectorEquals() { VectorEqualsVisitor visitor2 = new VectorEqualsVisitor(); assertTrue(visitor1.vectorEquals(vector1, vector2)); - assertTrue(visitor2.vectorEquals(vector1, vector3)); + assertFalse(visitor2.vectorEquals(vector1, vector3)); } } From bab74028fcef9a5161359f05558521cdfeaf5e84 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Sat, 19 Oct 2019 17:09:36 +0800 Subject: [PATCH 3/6] [ARROW-6738][Java] Compare fields for all vectors except union vectors --- .../vector/compare/RangeEqualsVisitor.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index a6d0e6193c3..0cd9d10b14f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -24,7 +24,6 @@ import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; -import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; @@ -32,7 +31,6 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.NonNullableStructVector; import org.apache.arrow.vector.complex.UnionVector; -import org.apache.arrow.vector.types.pojo.ArrowType; /** * Visitor to compare a range of values for vectors. @@ -246,23 +244,7 @@ protected boolean compareStructVectors(Range range) { return true; } - private boolean compareDecimalVectorTypes() { - ArrowType.Decimal leftType = (ArrowType.Decimal) left.getField().getType(); - ArrowType.Decimal rightType = (ArrowType.Decimal) right.getField().getType(); - - if (leftType.getPrecision() != rightType.getPrecision() || leftType.getScale() != rightType.getScale()) { - return false; - } - return true; - } - protected boolean compareBaseFixedWidthVectors(Range range) { - if (left instanceof DecimalVector) { - if (!compareDecimalVectorTypes()) { - return false; - } - } - BaseFixedWidthVector leftVector = (BaseFixedWidthVector) left; BaseFixedWidthVector rightVector = (BaseFixedWidthVector) right; From c5153931ebe7d4a8dff86f1995a18bcd8f37743b Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Mon, 13 Jan 2020 12:12:45 +0800 Subject: [PATCH 4/6] [ARROW-6738][Java] Rule out the change for union type comparison --- .../org/apache/arrow/vector/compare/RangeEqualsVisitor.java | 4 +++- .../apache/arrow/vector/compare/TestRangeEqualsVisitor.java | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 0cd9d10b14f..53351b63e8b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -217,7 +217,9 @@ protected boolean compareUnionVectors(Range range) { return false; } } - RangeEqualsVisitor visitor = createInnerVisitor(leftSubVector, rightSubVector, true); + TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector); + RangeEqualsVisitor visitor = + createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left)); if (!visitor.rangeEquals(subRange)) { return false; } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index 45fc7958f10..a297228a759 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -309,8 +309,8 @@ public void testUnionVectorSubRangeEquals() { vector1.setSafe(2, intHolder); vector1.setValueCount(3); - vector2.setType(0, Types.MinorType.SMALLINT); - vector2.setSafe(0, intHolder); + vector2.setType(0, Types.MinorType.UINT4); + vector2.setSafe(0, uInt4Holder); vector2.setType(1, Types.MinorType.INT); vector2.setSafe(1, intHolder); From c008289ec994b13323532cc580d8421e945dbf15 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 21 Jan 2020 17:24:19 +0800 Subject: [PATCH 5/6] [ARROW-6738][Java] Resolve test failure after rebasing --- .../arrow/vector/compare/ApproxEqualsVisitor.java | 6 ++++-- .../arrow/vector/compare/RangeEqualsVisitor.java | 15 +++------------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java index ab5ce94e7e3..bcf8c64e0ce 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java @@ -109,8 +109,10 @@ public Boolean visit(BaseFixedWidthVector left, Range range) { } @Override - protected ApproxEqualsVisitor createInnerVisitor(ValueVector left, ValueVector right) { - return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone()); + protected ApproxEqualsVisitor createInnerVisitor( + ValueVector left, ValueVector right, + BiFunction typeComparator) { + return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone(), typeComparator); } private boolean float4ApproxEquals(Range range) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 53351b63e8b..688ac4eaf95 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -185,15 +185,6 @@ public Boolean visit(NullVector left, Range range) { return true; } - /** - * Creates a visitor to visit child vectors. - * It is used for complex vector types. - * @return the visitor for child vectors. - */ - protected RangeEqualsVisitor createInnerVisitor(ValueVector leftInner, ValueVector rightInner) { - return this.createInnerVisitor(leftInner, rightInner, this.typeComparator); - } - protected RangeEqualsVisitor createInnerVisitor( ValueVector leftInner, ValueVector rightInner, BiFunction typeComparator) { @@ -237,7 +228,7 @@ protected boolean compareStructVectors(Range range) { } for (String name : leftChildNames) { - RangeEqualsVisitor visitor = createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name)); + RangeEqualsVisitor visitor = createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), null); if (!visitor.rangeEquals(range)) { return false; } @@ -316,7 +307,7 @@ protected boolean compareListVectors(Range range) { ListVector leftVector = (ListVector) left; ListVector rightVector = (ListVector) right; - RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector()); + RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), null); Range innerRange = new Range(); for (int i = 0; i < range.getLength(); i++) { @@ -362,7 +353,7 @@ protected boolean compareFixedSizeListVectors(Range range) { } int listSize = leftVector.getListSize(); - RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector()); + RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), null); Range innerRange = new Range(0, 0, listSize); for (int i = 0; i < range.getLength(); i++) { From d6ef3d220b9738e895f73e7735acc02532ee44ce Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Tue, 28 Jan 2020 17:05:17 +0800 Subject: [PATCH 6/6] [ARROW-6738][Java] Refine test case --- .../vector/compare/RangeEqualsVisitor.java | 9 ++++++--- .../vector/compare/TestRangeEqualsVisitor.java | 17 +++++++++++++---- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 688ac4eaf95..f658af79ed0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -228,7 +228,8 @@ protected boolean compareStructVectors(Range range) { } for (String name : leftChildNames) { - RangeEqualsVisitor visitor = createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), null); + RangeEqualsVisitor visitor = + createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null); if (!visitor.rangeEquals(range)) { return false; } @@ -307,7 +308,8 @@ protected boolean compareListVectors(Range range) { ListVector leftVector = (ListVector) left; ListVector rightVector = (ListVector) right; - RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), null); + RangeEqualsVisitor innerVisitor = + createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); Range innerRange = new Range(); for (int i = 0; i < range.getLength(); i++) { @@ -353,7 +355,8 @@ protected boolean compareFixedSizeListVectors(Range range) { } int listSize = leftVector.getListSize(); - RangeEqualsVisitor innerVisitor = createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), null); + RangeEqualsVisitor innerVisitor = + createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); Range innerRange = new Range(0, 0, listSize); for (int i = 0; i < range.getLength(); i++) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index a297228a759..c35fd4107a9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -296,8 +296,8 @@ public void testUnionVectorSubRangeEquals() { uInt4Holder.isSet = 1; final NullableIntHolder intHolder = new NullableIntHolder(); - uInt4Holder.value = 20; - uInt4Holder.isSet = 1; + intHolder.value = 20; + intHolder.isSet = 1; vector1.setType(0, Types.MinorType.UINT4); vector1.setSafe(0, uInt4Holder); @@ -307,7 +307,11 @@ public void testUnionVectorSubRangeEquals() { vector1.setType(2, Types.MinorType.INT); vector1.setSafe(2, intHolder); - vector1.setValueCount(3); + + vector1.setType(3, Types.MinorType.INT); + vector1.setSafe(3, intHolder); + + vector1.setValueCount(4); vector2.setType(0, Types.MinorType.UINT4); vector2.setSafe(0, uInt4Holder); @@ -317,9 +321,14 @@ public void testUnionVectorSubRangeEquals() { vector2.setType(2, Types.MinorType.INT); vector2.setSafe(2, intHolder); - vector2.setValueCount(3); + + vector2.setType(3, Types.MinorType.UINT4); + vector2.setSafe(3, uInt4Holder); + + vector2.setValueCount(4); RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + assertFalse(visitor.rangeEquals(new Range(0, 0, 4))); assertTrue(visitor.rangeEquals(new Range(1, 1, 2))); } }