Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 71 additions & 5 deletions integration/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,49 @@ def generate_column(self, size, name=None):
return self.column_class(name, size, is_valid, values)


class FixedSizeBinaryType(PrimitiveType):

def __init__(self, name, byte_width, nullable=True):
PrimitiveType.__init__(self, name, nullable)
self.byte_width = byte_width

@property
def numpy_type(self):
return object

@property
def column_class(self):
return FixedSizeBinaryColumn

def _get_type(self):
return OrderedDict([('name', 'fixedsizebinary'), ('byteWidth', self.byte_width)])

def _get_type_layout(self):
return OrderedDict([
('vectors',
[OrderedDict([('type', 'VALIDITY'),
('typeBitWidth', 1)]),
OrderedDict([('type', 'DATA'),
('typeBitWidth', self.byte_width)])])])

def generate_column(self, size, name=None):
is_valid = self._make_is_valid(size)
values = []

for i in range(size):
if is_valid[i]:
draw = (np.random.randint(0, 255, size=self.byte_width)
.astype(np.uint8)
.tostring())
values.append(draw)
else:
values.append("")

if name is None:
name = self.name
return self.column_class(name, size, is_valid, values)


class StringType(BinaryType):

@property
Expand Down Expand Up @@ -436,6 +479,22 @@ def _get_buffers(self):
]


class FixedSizeBinaryColumn(PrimitiveColumn):

def _encode_value(self, x):
return ''.join('{:02x}'.format(c).upper() for c in x)

def _get_buffers(self):
data = []
for i, v in enumerate(self.values):
data.append(self._encode_value(v if self.is_valid[i] else ""))

return [
('VALIDITY', [int(x) for x in self.is_valid]),
('DATA', data)
]


class StringColumn(BinaryColumn):

def _encode_value(self, x):
Expand Down Expand Up @@ -648,6 +707,9 @@ def get_field(name, type_, nullable=True):
return BinaryType(name, nullable=nullable)
elif type_ == 'utf8':
return StringType(name, nullable=nullable)
elif type_.startswith('fixedsizebinary_'):
byte_width = int(type_.split('_')[1])
return FixedSizeBinaryType(name, byte_width=byte_width, nullable=nullable)

dtype = np.dtype(type_)

Expand Down Expand Up @@ -677,18 +739,22 @@ def _generate_file(name, fields, batch_sizes, dictionaries=None):
return JsonFile(name, schema, batches, dictionaries)


def generate_primitive_case(batch_sizes):
def generate_primitive_case(batch_sizes, name_suffix=None):
types = ['bool', 'int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float32', 'float64', 'binary', 'utf8']
'float32', 'float64', 'binary', 'utf8',
'fixedsizebinary_19', 'fixedsizebinary_120']

fields = []

for type_ in types:
fields.append(get_field(type_ + "_nullable", type_, True))
fields.append(get_field(type_ + "_nonnullable", type_, False))

return _generate_file("primitive", fields, batch_sizes)
name = "primitive"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added name suffix for primitive case such that primitive test case at line 823/824 become both effective. Previously the first one was overwritten by the second one and hence ignored.

if name_suffix is not None:
name += name_suffix
return _generate_file(name, fields, batch_sizes)


def generate_datetime_case():
Expand Down Expand Up @@ -754,8 +820,8 @@ def _temp_path():
return

file_objs = [
generate_primitive_case([7, 10]),
generate_primitive_case([0, 0, 0]),
generate_primitive_case([7, 10], "_0"),
generate_primitive_case([0, 0, 0], "_1"),
generate_datetime_case(),
generate_nested_case(),
generate_dictionary_case()
Expand Down
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
fields: [],
complex: false
},
{
name: "FixedSizeBinary",
fields: [{name: "byteWidth", type: int}],
complex: false
}
{
name: "Bool",
fields: [],
Expand Down
15 changes: 15 additions & 0 deletions java/vector/src/main/codegen/data/ValueVectorTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,21 @@
}
]
},
{
major: "Fixed",
width: -1,
javaType: "byte[]",
boxedType: "ArrowBuf",
minor: [
{
class: "FixedSizeBinary",
typeParams: [ {name: "byteWidth", type: "int"} ],
arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary",
friendlyType: "byte[]",
fields: [{name: "index", type: "int", include: false}, {name: "buffer", type: "ArrowBuf"}],
}
]
},
{
major: "VarLen",
width: 4,
Expand Down
Loading