From 46d2c1171ee50a2265f12c46ad93c986c3a17512 Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 12 Jul 2019 18:17:18 +0800 Subject: [PATCH 1/2] [ARROW-5920][Java] Support sort & compare for all variable width vectors --- .../sort/DefaultVectorComparators.java | 27 ++++++++++--------- .../algorithm/search/TestVectorSearcher.java | 7 +++-- ...stVariableWidthOutOfPlaceVectorSorter.java | 3 ++- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 2dfa0aaa7cc..535940e2bcc 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -17,14 +17,15 @@ package org.apache.arrow.algorithm.sort; +import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH; + +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TinyIntVector; -import org.apache.arrow.vector.VarCharVector; -import org.apache.arrow.vector.holders.NullableVarCharHolder; /** * Default comparator implementations for different types of vectors. @@ -169,26 +170,26 @@ public int compareNotNull(int index1, int index2) { } /** - * Default comparator for varchars. + * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}. * The comparison is in lexicographic order, with null comes first. */ - public static class VarCharComparator extends VectorValueComparator { - - private NullableVarCharHolder holder1 = new NullableVarCharHolder(); - private NullableVarCharHolder holder2 = new NullableVarCharHolder(); + public static class VariableWidthComparator extends VectorValueComparator { @Override public int compareNotNull(int index1, int index2) { - vector1.get(index1, holder1); - vector2.get(index2, holder2); + int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH); + int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH); + + int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH); + int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH); - int length1 = holder1.end - holder1.start; - int length2 = holder2.end - holder2.start; + int length1 = end1 - start1; + int length2 = end2 - start2; int minLength = length1 < length2 ? length1 : length2; for (int i = 0; i < minLength; i++) { - byte b1 = holder1.buffer.getByte(holder1.start + i); - byte b2 = holder2.buffer.getByte(holder2.start + i); + byte b1 = vector1.getDataBuffer().getByte(start1 + i); + byte b2 = vector2.getDataBuffer().getByte(start2 + i); if (b1 != b2) { return b1 - b2; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index f5c29124765..41269cd5713 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -23,6 +23,7 @@ import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; import org.junit.After; @@ -137,7 +138,8 @@ public void testBinarySearchVarChar() { negVector.set(0, "abcd".getBytes()); // do search - VectorValueComparator comparator = new DefaultVectorComparators.VarCharComparator(); + VectorValueComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -175,7 +177,8 @@ public void testLinearSearchVarChar() { negVector.set(0, "abcd".getBytes()); // do search - VectorValueComparator comparator = new DefaultVectorComparators.VarCharComparator(); + VectorValueComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 68be2549de4..7ce4e3d7507 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -65,7 +65,8 @@ public void testSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator(); + DefaultVectorComparators.VariableWidthComparator comparator = + new DefaultVectorComparators.VariableWidthComparator(); VarCharVector sortedVec = (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); From cbd8c3fbbf45728f0c004135aef4d3ab234ec93c Mon Sep 17 00:00:00 2001 From: liyafan82 Date: Fri, 12 Jul 2019 19:33:10 +0800 Subject: [PATCH 2/2] [ARROW-5920] [Java] Provide a utility to create the default comparator --- .../sort/DefaultVectorComparators.java | 30 +++++++++++++++++++ .../algorithm/search/TestVectorSearcher.java | 10 ++++--- .../TestFixedWidthInPlaceVectorSorter.java | 2 +- .../TestFixedWidthOutOfPlaceVectorSorter.java | 12 ++++---- ...stVariableWidthOutOfPlaceVectorSorter.java | 5 ++-- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 535940e2bcc..a2d2f786603 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -19,6 +19,7 @@ import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH; +import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; @@ -26,12 +27,41 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.ValueVector; /** * Default comparator implementations for different types of vectors. */ public class DefaultVectorComparators { + /** + * Create the default comparator for the vector. + * @param vector the vector. + * @param the vector type. + * @return the default comparator. + */ + public static VectorValueComparator createDefaultComparator(T vector) { + if (vector instanceof BaseFixedWidthVector) { + if (vector instanceof TinyIntVector) { + return (VectorValueComparator) new ByteComparator(); + } else if (vector instanceof SmallIntVector) { + return (VectorValueComparator) new ShortComparator(); + } else if (vector instanceof IntVector) { + return (VectorValueComparator) new IntComparator(); + } else if (vector instanceof BigIntVector) { + return (VectorValueComparator) new LongComparator(); + } else if (vector instanceof Float4Vector) { + return (VectorValueComparator) new Float4Comparator(); + } else if (vector instanceof Float8Vector) { + return (VectorValueComparator) new Float8Comparator(); + } + } else if (vector instanceof BaseVariableWidthVector) { + return (VectorValueComparator) new VariableWidthComparator(); + } + + throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + } + /** * Default comparator for bytes. * The comparison is based on values, with null comes first. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index 41269cd5713..02e2b20cc06 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -69,7 +69,8 @@ public void testBinarySearchInt() { negVector.set(0, -333); // do search - VectorValueComparator comparator = new DefaultVectorComparators.IntComparator(); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -100,7 +101,8 @@ public void testLinearSearchInt() { negVector.set(0, -333); // do search - VectorValueComparator comparator = new DefaultVectorComparators.IntComparator(); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -139,7 +141,7 @@ public void testBinarySearchVarChar() { // do search VectorValueComparator comparator = - new DefaultVectorComparators.VariableWidthComparator(); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -178,7 +180,7 @@ public void testLinearSearchVarChar() { // do search VectorValueComparator comparator = - new DefaultVectorComparators.VariableWidthComparator(); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java index 1a71a534516..ecbf9faf0b2 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -64,7 +64,7 @@ public void testSortInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java index 4fc4a7ac5ea..1dfe946017e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java @@ -70,7 +70,7 @@ public void testSortByte() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.ByteComparator comparator = new DefaultVectorComparators.ByteComparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); TinyIntVector sortedVec = (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); @@ -117,7 +117,7 @@ public void testSortShort() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.ShortComparator comparator = new DefaultVectorComparators.ShortComparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); SmallIntVector sortedVec = (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); @@ -164,7 +164,7 @@ public void testSortInt() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); @@ -210,7 +210,7 @@ public void testSortLong() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.LongComparator comparator = new DefaultVectorComparators.LongComparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); @@ -256,7 +256,7 @@ public void testSortFloat() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.Float4Comparator comparator = new DefaultVectorComparators.Float4Comparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); @@ -302,7 +302,7 @@ public void testSortDobule() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.Float8Comparator comparator = new DefaultVectorComparators.Float8Comparator(); + VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 7ce4e3d7507..46b30602177 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -22,6 +22,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BaseVariableWidthVector; import org.apache.arrow.vector.VarCharVector; import org.junit.After; import org.junit.Assert; @@ -65,8 +66,8 @@ public void testSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - DefaultVectorComparators.VariableWidthComparator comparator = - new DefaultVectorComparators.VariableWidthComparator(); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); VarCharVector sortedVec = (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);