Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion integration/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,46 @@ def generate_column(self, size, name=None):
return self.column_class(name, size, is_valid, values)


class FixedSizeBinaryType(PrimitiveType):

def __init__(self, name, byte_width, nullable=True):
super(FixedSizeBinaryType, self).__init__(name, nullable=nullable)
self.byte_width = byte_width

@property
def numpy_type(self):
return object

@property
def column_class(self):
return FixedSizeBinaryColumn

def _get_type(self):
return OrderedDict([('name', 'fixedsizebinary'), ('byteWidth', self.byte_width)])

def _get_type_layout(self):
return OrderedDict([
('vectors',
[OrderedDict([('type', 'VALIDITY'),
('typeBitWidth', 1)]),
OrderedDict([('type', 'DATA'),
('typeBitWidth', self.byte_width)])])])

def generate_column(self, size, name=None):
is_valid = self._make_is_valid(size)
values = []

for i in range(size):
draw = (np.random.randint(0, 255, size=self.byte_width)
.astype(np.uint8)
.tostring())
values.append(draw)

if name is None:
name = self.name
return self.column_class(name, size, is_valid, values)


class StringType(BinaryType):

@property
Expand Down Expand Up @@ -525,6 +565,22 @@ def _get_buffers(self):
]


class FixedSizeBinaryColumn(PrimitiveColumn):

def _encode_value(self, x):
return ''.join('{:02x}'.format(c).upper() for c in x)

def _get_buffers(self):
data = []
for i, v in enumerate(self.values):
data.append(self._encode_value(v))

return [
('VALIDITY', [int(x) for x in self.is_valid]),
('DATA', data)
]


class StringColumn(BinaryColumn):

def _encode_value(self, x):
Expand Down Expand Up @@ -719,6 +775,9 @@ def get_field(name, type_, nullable=True):
return BinaryType(name, nullable=nullable)
elif type_ == 'utf8':
return StringType(name, nullable=nullable)
elif type_.startswith('fixedsizebinary_'):
byte_width = int(type_.split('_')[1])
return FixedSizeBinaryType(name, byte_width=byte_width, nullable=nullable)

dtype = np.dtype(type_)

Expand Down Expand Up @@ -751,7 +810,8 @@ def _generate_file(name, fields, batch_sizes, dictionaries=None):
def generate_primitive_case(batch_sizes, name='primitive'):
types = ['bool', 'int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float32', 'float64', 'binary', 'utf8']
'float32', 'float64', 'binary', 'utf8',
'fixedsizebinary_19', 'fixedsizebinary_120']

fields = []

Expand Down
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
fields: [],
complex: false
},
{
name: "FixedSizeBinary",
fields: [{name: "byteWidth", type: int}],
complex: false
}
{
name: "Bool",
fields: [],
Expand Down
15 changes: 15 additions & 0 deletions java/vector/src/main/codegen/data/ValueVectorTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,21 @@
}
]
},
{
major: "Fixed",
width: -1,
javaType: "byte[]",
boxedType: "ArrowBuf",
minor: [
{
class: "FixedSizeBinary",
typeParams: [ {name: "byteWidth", type: "int"} ],
arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary",
friendlyType: "byte[]",
fields: [{name: "buffer", type: "ArrowBuf"}],
}
]
},
{
major: "VarLen",
width: 4,
Expand Down
4 changes: 4 additions & 0 deletions java/vector/src/main/codegen/templates/HolderReaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ public void read(Nullable${name}Holder h) {
holder.buffer.getBytes(holder.start, bytes, 0, ${type.width});
${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale);
return value;
<#elseif minor.class == "FixedSizeBinary">
byte[] value = new byte [holder.byteWidth];
holder.buffer.getBytes(0, value, 0, holder.byteWidth);
return value;
<#else>
${friendlyType} value = new ${friendlyType}(this.holder.value);
return value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
*/
public abstract class BaseFixedWidthVector extends BaseValueVector
implements FixedWidthVector, FieldVector, VectorDefinitionSetter {
private final byte typeWidth;
private final int typeWidth;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not changed the derived vector classes to use integer type. Should I?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it matters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TYPE_WIDTH const in the subclass is a static variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for other types, they can be static, for fixed size binary, it has to be non-static

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe if you can update BitVector since it currently casts down to a byte


protected int valueAllocationSizeInBytes;
protected int validityAllocationSizeInBytes;
Expand All @@ -54,7 +54,7 @@ public abstract class BaseFixedWidthVector extends BaseValueVector
protected int valueCount;

public BaseFixedWidthVector(final String name, final BufferAllocator allocator,
FieldType fieldType, final byte typeWidth) {
FieldType fieldType, final int typeWidth) {
super(name, allocator);
this.typeWidth = typeWidth;
field = new Field(name, fieldType, null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public BitVector(String name, BufferAllocator allocator) {
* @param allocator allocator for memory management.
*/
public BitVector(String name, FieldType fieldType, BufferAllocator allocator) {
super(name, allocator, fieldType, (byte) 0);
super(name, allocator, fieldType, 0);
reader = new BitReaderImpl(BitVector.this);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public static BufferLayout dataBuffer(int typeBitWidth) {
case 128:
return VALUES_128;
default:
throw new IllegalArgumentException("only 8, 16, 32, or 64 bits supported");
throw new IllegalArgumentException("only 8, 16, 32, 64, or 128 bits supported");
}
}

Expand All @@ -90,7 +90,7 @@ public static BufferLayout byteVector() {

private final BufferType type;

private BufferLayout(BufferType type, int typeBitWidth) {
BufferLayout(BufferType type, int typeBitWidth) {
super();
this.type = Preconditions.checkNotNull(type);
this.typeBitWidth = (short) typeBitWidth;
Expand Down
Loading