Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,51 @@

package org.apache.arrow.algorithm.sort;

import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH;

import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.holders.NullableVarCharHolder;
import org.apache.arrow.vector.ValueVector;

/**
* Default comparator implementations for different types of vectors.
*/
public class DefaultVectorComparators {

/**
* Create the default comparator for the vector.
* @param vector the vector.
* @param <T> the vector type.
* @return the default comparator.
*/
public static <T extends ValueVector> VectorValueComparator<T> createDefaultComparator(T vector) {
if (vector instanceof BaseFixedWidthVector) {
if (vector instanceof TinyIntVector) {
return (VectorValueComparator<T>) new ByteComparator();
} else if (vector instanceof SmallIntVector) {
return (VectorValueComparator<T>) new ShortComparator();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are the casts needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It seems the Java compiler cannot infer the generic type based on the instance of operator.

} else if (vector instanceof IntVector) {
return (VectorValueComparator<T>) new IntComparator();
} else if (vector instanceof BigIntVector) {
return (VectorValueComparator<T>) new LongComparator();
} else if (vector instanceof Float4Vector) {
return (VectorValueComparator<T>) new Float4Comparator();
} else if (vector instanceof Float8Vector) {
return (VectorValueComparator<T>) new Float8Comparator();
}
} else if (vector instanceof BaseVariableWidthVector) {
return (VectorValueComparator<T>) new VariableWidthComparator();
}

throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
}

/**
* Default comparator for bytes.
* The comparison is based on values, with null comes first.
Expand Down Expand Up @@ -169,26 +200,26 @@ public int compareNotNull(int index1, int index2) {
}

/**
* Default comparator for varchars.
* Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
* The comparison is in lexicographic order, with null comes first.
*/
public static class VarCharComparator extends VectorValueComparator<VarCharVector> {

private NullableVarCharHolder holder1 = new NullableVarCharHolder();
private NullableVarCharHolder holder2 = new NullableVarCharHolder();
public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {

@Override
public int compareNotNull(int index1, int index2) {
vector1.get(index1, holder1);
vector2.get(index2, holder2);
int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);

int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);

int length1 = holder1.end - holder1.start;
int length2 = holder2.end - holder2.start;
int length1 = end1 - start1;
int length2 = end2 - start2;

int minLength = length1 < length2 ? length1 : length2;
for (int i = 0; i < minLength; i++) {
byte b1 = holder1.buffer.getByte(holder1.start + i);
byte b2 = holder2.buffer.getByte(holder2.start + i);
byte b1 = vector1.getDataBuffer().getByte(start1 + i);
byte b2 = vector2.getDataBuffer().getByte(start2 + i);

if (b1 != b2) {
return b1 - b2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarCharVector;
import org.junit.After;
Expand Down Expand Up @@ -68,7 +69,8 @@ public void testBinarySearchInt() {
negVector.set(0, -333);

// do search
VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
VectorValueComparator<IntVector> comparator =
DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down Expand Up @@ -99,7 +101,8 @@ public void testLinearSearchInt() {
negVector.set(0, -333);

// do search
VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
VectorValueComparator<IntVector> comparator =
DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down Expand Up @@ -137,7 +140,8 @@ public void testBinarySearchVarChar() {
negVector.set(0, "abcd".getBytes());

// do search
VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
VectorValueComparator<BaseVariableWidthVector> comparator =
DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down Expand Up @@ -175,7 +179,8 @@ public void testLinearSearchVarChar() {
negVector.set(0, "abcd".getBytes());

// do search
VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
VectorValueComparator<BaseVariableWidthVector> comparator =
DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void testSortInt() {

// sort the vector
FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

sorter.sortInPlace(vec, comparator);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public void testSortByte() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.ByteComparator comparator = new DefaultVectorComparators.ByteComparator();
VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

TinyIntVector sortedVec =
(TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
Expand Down Expand Up @@ -117,7 +117,7 @@ public void testSortShort() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.ShortComparator comparator = new DefaultVectorComparators.ShortComparator();
VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

SmallIntVector sortedVec =
(SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
Expand Down Expand Up @@ -164,7 +164,7 @@ public void testSortInt() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
Expand Down Expand Up @@ -210,7 +210,7 @@ public void testSortLong() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.LongComparator comparator = new DefaultVectorComparators.LongComparator();
VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
Expand Down Expand Up @@ -256,7 +256,7 @@ public void testSortFloat() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.Float4Comparator comparator = new DefaultVectorComparators.Float4Comparator();
VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
Expand Down Expand Up @@ -302,7 +302,7 @@ public void testSortDobule() {

// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.Float8Comparator comparator = new DefaultVectorComparators.Float8Comparator();
VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);

Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.VarCharVector;
import org.junit.After;
import org.junit.Assert;
Expand Down Expand Up @@ -65,7 +66,8 @@ public void testSortString() {

// sort the vector
VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
VectorValueComparator<BaseVariableWidthVector> comparator =
DefaultVectorComparators.createDefaultComparator(vec);

VarCharVector sortedVec =
(VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
Expand Down