diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 29b203ae130..2bbc843836a 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -927,6 +927,83 @@ class LargeListColumn(_BaseListColumn, _LargeOffsetsMixin): pass +class ListViewField(Field): + + def __init__(self, name, value_field, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.value_field = value_field + + @property + def column_class(self): + return ListViewColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'listview') + ]) + + def _get_children(self): + return [self.value_field.get_json()] + + def generate_column(self, size, name=None): + MAX_LIST_SIZE = 4 + VALUES_SIZE = size * MAX_LIST_SIZE + + is_valid = self._make_is_valid(size) + + MAX_OFFSET = VALUES_SIZE - MAX_LIST_SIZE + offsets = np.random.randint(0, MAX_OFFSET + 1, size=size) + sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size) + + values = self.value_field.generate_column(VALUES_SIZE) + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, offsets, sizes, values) + + +class LargeListViewField(ListViewField): + + @property + def column_class(self): + return LargeListViewColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'largelistview') + ]) + + +class _BaseListViewColumn(Column): + + def __init__(self, name, count, is_valid, offsets, sizes, values): + super().__init__(name, count) + self.is_valid = is_valid + self.offsets = offsets + self.sizes = sizes + self.values = values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]), + ('OFFSET', self._encode_offsets(self.offsets)), + ('SIZE', self._encode_offsets(self.sizes)), + ] + + def _get_children(self): + return [self.values.get_json()] + + +class ListViewColumn(_BaseListViewColumn, _NarrowOffsetsMixin): + pass + + +class LargeListViewColumn(_BaseListViewColumn, _LargeOffsetsMixin): + pass + + class MapField(Field): def __init__(self, name, key_field, item_field, *, nullable=True, @@ -1663,6 +1740,15 @@ def generate_binary_view_case(): return _generate_file("binary_view", fields, batch_sizes) +def generate_list_view_case(): + fields = [ + ListViewField('lv', get_field('item', 'float32')), + LargeListViewField('llv', get_field('item', 'float32')), + ] + batch_sizes = [0, 7, 256] + return _generate_file("list_view", fields, batch_sizes) + + def generate_nested_large_offsets_case(): fields = [ LargeListField('large_list_nullable', get_field('item', 'int32')), @@ -1847,7 +1933,12 @@ def _temp_path(): generate_binary_view_case() .skip_tester('C#') - .skip_tester('Go') + .skip_tester('Java') + .skip_tester('JS') + .skip_tester('Rust'), + + generate_list_view_case() + .skip_tester('C#') .skip_tester('Java') .skip_tester('JS') .skip_tester('Rust'), diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index bab00e6d70d..7fadb7e47cf 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -193,6 +193,8 @@ def _run_test_cases(self, ``case_runner`` ran against ``test_cases`` """ def case_wrapper(test_case): + if serial: + return case_runner(test_case) with printer.cork(): return case_runner(test_case) diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index e1160b287e7..1a9b1b97f07 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -223,7 +223,7 @@ considered equivalent to ``[]`` (no metadata). Duplicated keys are not forbidden **Type**: :: { - "name" : "null|struct|list|largelist|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map" + "name" : "null|struct|list|largelist|listview|largelistview|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|utf8view|binaryview|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map|runendencoded" } A ``Type`` will have other fields as defined in @@ -446,12 +446,22 @@ or ``DATA``. ``BufferData`` is encoded based on the type of buffer: -* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for non-nullable +* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for non-nullable ``Field`` still has a ``VALIDITY`` array, even though all values are 1. * ``OFFSET``: a JSON array of integers for 32-bit offsets or - string-formatted integers for 64-bit offsets -* ``TYPE_ID``: a JSON array of integers -* ``DATA``: a JSON array of encoded values + string-formatted integers for 64-bit offsets. +* ``TYPE_ID``: a JSON array of integers. +* ``DATA``: a JSON array of encoded values. +* ``VARIADIC_DATA_BUFFERS``: a JSON array of data buffers represented as + hex encoded strings. +* ``VIEWS``: a JSON array of encoded views, which are JSON objects with: + * ``SIZE``: an integer indicating the size of the view, + * ``INLINED``: an encoded value (this field will be present if ``SIZE`` + is smaller than 12, otherwise the next three fields will be present), + * ``PREFIX_HEX``: the first four bytes of the view encoded as hex, + * ``BUFFER_INDEX``: the index in ``VARIADIC_DATA_BUFFERS`` of the buffer + viewed, + * ``OFFSET``: the offset in the buffer viewed. The value encoding for ``DATA`` is different depending on the logical type: @@ -527,6 +537,9 @@ in ``datagen.py``): - Signed indices - Unsigned indices - Nested dictionaries +* Run end encoded +* Binary view and string view +* List view and large list view * Extension Types diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index fa5fa7addf3..8ca1416b92a 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -150,19 +150,19 @@ func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { case *Int16: for _, v := range e.Int16Values()[physOffset : physOffset+physLength] { v -= int16(r.data.offset) - v = int16(utils.MinInt(int(v), r.data.length)) + v = int16(utils.Min(int(v), r.data.length)) bldr.(*Int16Builder).Append(v) } case *Int32: for _, v := range e.Int32Values()[physOffset : physOffset+physLength] { v -= int32(r.data.offset) - v = int32(utils.MinInt(int(v), r.data.length)) + v = int32(utils.Min(int(v), r.data.length)) bldr.(*Int32Builder).Append(v) } case *Int64: for _, v := range e.Int64Values()[physOffset : physOffset+physLength] { v -= int64(r.data.offset) - v = int64(utils.MinInt(int(v), r.data.length)) + v = int64(utils.Min(int(v), r.data.length)) bldr.(*Int64Builder).Append(v) } } diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 4b627341167..9d959b5e43b 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -19,7 +19,6 @@ package array import ( "bytes" "fmt" - "math" "strings" "sync/atomic" @@ -1411,118 +1410,19 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { return b.Unmarshal(dec) } -// Pre-conditions: -// -// input.DataType() is ListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func minListViewOffset32(input arrow.ArrayData) int32 { - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) - } - - // It's very likely that the first non-null non-empty list-view starts at - // offset 0 of the child array. - i := 0 - for i < input.Len() && (isNull(i) || sizes[i] == 0) { - i += 1 - } - if i >= input.Len() { - return 0 - } - minOffset := offsets[i] - if minOffset == 0 { - // early exit: offset 0 found already - return 0 - } - - // Slow path: scan the buffers entirely. - i += 1 - for ; i < input.Len(); i += 1 { - if isNull(i) { - continue - } - offset := offsets[i] - if offset < minOffset && sizes[i] > 0 { - minOffset = offset - } - } - return minOffset -} - -// Find the maximum offset+size in a LIST_VIEW array. +// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // -// input.DataType() is ListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func maxListViewOffset32(input arrow.ArrayData) int { - inputOffset := input.Offset() - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) - } - - i := input.Len() - 1 // safe because input.Len() > 0 - for i != 0 && (isNull(i) || sizes[i] == 0) { - i -= 1 - } - offset := offsets[i] - size := sizes[i] - if i == 0 { - if isNull(i) || sizes[i] == 0 { - return 0 - } else { - return int(offset + size) - } - } - - values := input.Children()[0] - maxEnd := int(offsets[i] + sizes[i]) - if maxEnd == values.Len() { - // Early-exit: maximum possible view-end found already. - return maxEnd - } - - // Slow path: scan the buffers entirely. - for ; i >= 0; i -= 1 { - offset := offsets[i] - size := sizes[i] - if size > 0 && !isNull(i) { - if int(offset+size) > maxEnd { - maxEnd = int(offset + size) - if maxEnd == values.Len() { - return maxEnd - } - } - } - } - return maxEnd -} - -// Pre-conditions: -// -// input.DataType() is LargeListViewType +// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 // input.Len() > 0 && input.NullN() != input.Len() -func minLargeListViewOffset64(input arrow.ArrayData) int64 { +func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { var bitmap []byte if input.Buffers()[0] != nil { bitmap = input.Buffers()[0].Bytes() } - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] + offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():] + sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():] isNull := func(i int) bool { return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) @@ -1557,27 +1457,25 @@ func minLargeListViewOffset64(input arrow.ArrayData) int64 { return minOffset } -// Find the maximum offset+size in a LARGE_LIST_VIEW array. +// Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // -// input.DataType() is LargeListViewType +// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 // input.Len() > 0 && input.NullN() != input.Len() -func maxLargeListViewOffset64(input arrow.ArrayData) int64 { +func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() var bitmap []byte if input.Buffers()[0] != nil { bitmap = input.Buffers()[0].Bytes() } - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] + offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] + sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] isNull := func(i int) bool { return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) } - // It's very likely that the first non-null non-empty list-view starts at - // offset zero, so we check that first and potentially early-return a 0. i := input.Len() - 1 // safe because input.Len() > 0 for i != 0 && (isNull(i) || sizes[i] == 0) { i -= 1 @@ -1592,15 +1490,9 @@ func maxLargeListViewOffset64(input arrow.ArrayData) int64 { } } - if offset > math.MaxInt64-size { - // Early-exit: 64-bit overflow detected. This is not possible on a - // valid list-view, but we return the maximum possible value to - // avoid undefined behavior. - return math.MaxInt64 - } values := input.Children()[0] maxEnd := offsets[i] + sizes[i] - if maxEnd == int64(values.Len()) { + if maxEnd == Offset(values.Len()) { // Early-exit: maximum possible view-end found already. return maxEnd } @@ -1611,14 +1503,8 @@ func maxLargeListViewOffset64(input arrow.ArrayData) int64 { size := sizes[i] if size > 0 && !isNull(i) { if offset+size > maxEnd { - if offset > math.MaxInt64-size { - // 64-bit overflow detected. This is not possible on a valid list-view, - // but we saturate maxEnd to the maximum possible value to avoid - // undefined behavior. - return math.MaxInt64 - } maxEnd = offset + size - if maxEnd == int64(values.Len()) { + if maxEnd == Offset(values.Len()) { return maxEnd } } @@ -1634,11 +1520,11 @@ func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { var minOffset, maxEnd int switch input.DataType().(type) { case *arrow.ListViewType: - minOffset = int(minListViewOffset32(input)) - maxEnd = maxListViewOffset32(input) + minOffset = int(minListViewOffset[int32](input)) + maxEnd = int(maxListViewEnd[int32](input)) case *arrow.LargeListViewType: - minOffset = int(minLargeListViewOffset64(input)) - maxEnd = int(maxLargeListViewOffset64(input)) + minOffset = int(minListViewOffset[int64](input)) + maxEnd = int(maxListViewEnd[int64](input)) case *arrow.ListType: offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] minOffset = int(offsets[0]) diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go index 5658c6e587d..974fea1f14e 100644 --- a/go/arrow/avro/reader_types.go +++ b/go/arrow/avro/reader_types.go @@ -22,7 +22,7 @@ import ( "errors" "fmt" "math/big" - + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/decimal128" diff --git a/go/arrow/bitutil/endian_default.go b/go/arrow/bitutil/endian_default.go index 9f5d3cdc7d2..ecbbaa70d04 100644 --- a/go/arrow/bitutil/endian_default.go +++ b/go/arrow/bitutil/endian_default.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !s390x // +build !s390x package bitutil diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go index a9bba439128..e99605f5848 100644 --- a/go/arrow/bitutil/endian_s390x.go +++ b/go/arrow/bitutil/endian_s390x.go @@ -18,7 +18,7 @@ package bitutil import ( "math/bits" - "unsafe" + "unsafe" ) var toFromLEFunc = bits.ReverseBytes64 diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index f9693851d74..64cc8456e81 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -82,6 +82,8 @@ var formatToSimpleType = map[string]arrow.DataType{ "Z": arrow.BinaryTypes.LargeBinary, "u": arrow.BinaryTypes.String, "U": arrow.BinaryTypes.LargeString, + "vz": arrow.BinaryTypes.BinaryView, + "vu": arrow.BinaryTypes.StringView, "tdD": arrow.FixedWidthTypes.Date32, "tdm": arrow.FixedWidthTypes.Date64, "tts": arrow.FixedWidthTypes.Time32s, @@ -263,6 +265,12 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { dt = arrow.ListOfField(childFields[0]) case 'L': // large list dt = arrow.LargeListOfField(childFields[0]) + case 'v': // list view/large list view + if f[2] == 'l' { + dt = arrow.ListViewOfField(childFields[0]) + } else if f[2] == 'L' { + dt = arrow.LargeListViewOfField(childFields[0]) + } case 'w': // fixed size list is w:# where # is the list size. listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) if err != nil { @@ -364,6 +372,16 @@ func (imp *cimporter) doImportChildren() error { if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } + case arrow.LIST_VIEW: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.LARGE_LIST_VIEW: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } case arrow.FIXED_SIZE_LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { @@ -485,10 +503,18 @@ func (imp *cimporter) doImport() error { return imp.importStringLike(int64(arrow.Int64SizeBytes)) case *arrow.LargeBinaryType: return imp.importStringLike(int64(arrow.Int64SizeBytes)) + case *arrow.StringViewType: + return imp.importBinaryViewLike() + case *arrow.BinaryViewType: + return imp.importBinaryViewLike() case *arrow.ListType: return imp.importListLike() case *arrow.LargeListType: return imp.importListLike() + case *arrow.ListViewType: + return imp.importListViewLike() + case *arrow.LargeListViewType: + return imp.importListViewLike() case *arrow.MapType: return imp.importListLike() case *arrow.FixedSizeListType: @@ -654,6 +680,33 @@ func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) { return } +func (imp *cimporter) importBinaryViewLike() (err error) { + if err = imp.checkNoChildren(); err != nil { + return + } + + buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) + defer memory.ReleaseBuffers(buffers) + + if buffers[0], err = imp.importNullBitmap(0); err != nil { + return + } + + if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil { + return + } + + dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) + for i, size := range dataBufferSizes { + if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { + return + } + } + + imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) + return +} + func (imp *cimporter) importListLike() (err error) { if err = imp.checkNumChildren(1); err != nil { return err @@ -683,6 +736,43 @@ func (imp *cimporter) importListLike() (err error) { return } +func (imp *cimporter) importListViewLike() (err error) { + offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth) + + if err = imp.checkNumChildren(1); err != nil { + return err + } + + if err = imp.checkNumBuffers(3); err != nil { + return err + } + + var nulls, offsets, sizes *memory.Buffer + if nulls, err = imp.importNullBitmap(0); err != nil { + return + } + if nulls != nil { + defer nulls.Release() + } + + if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil { + return + } + if offsets != nil { + defer offsets.Release() + } + + if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil { + return + } + if sizes != nil { + defer sizes.Release() + } + + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + return +} + func (imp *cimporter) importFixedSizePrimitive() error { if err := imp.checkNoChildren(); err != nil { return err diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index d5fdc0dac15..9c7c238ffb7 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -167,6 +167,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return "u" case *arrow.LargeStringType: return "U" + case *arrow.BinaryViewType: + return "vz" + case *arrow.StringViewType: + return "vu" case *arrow.Date32Type: return "tdD" case *arrow.Date64Type: @@ -228,6 +232,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return "+l" case *arrow.LargeListType: return "+L" + case *arrow.ListViewType: + return "+vl" + case *arrow.LargeListViewType: + return "+vL" case *arrow.FixedSizeListType: return fmt.Sprintf("+w:%d", dt.Len()) case *arrow.StructType: @@ -328,6 +336,15 @@ func allocateBufferPtrArr(n int) (out []*C.void) { return } +func allocateBufferSizeArr(n int) (out []C.int64_t) { + s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof(int64(0))))) + s.Len = n + s.Cap = n + + return +} + func (exp *schemaExporter) finish(out *CArrowSchema) { out.dictionary = nil if exp.dict != nil { @@ -368,15 +385,19 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { exportField(arrow.Field{Type: arr.DataType()}, outSchema) } - nbuffers := len(arr.Data().Buffers()) - buf_offset := 0 + buffers := arr.Data().Buffers() // Some types don't have validity bitmaps, but we keep them shifted // to make processing easier in other contexts. This means that // we have to adjust when exporting. has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if nbuffers > 0 && !has_validity_bitmap { - nbuffers-- - buf_offset++ + if len(buffers) > 0 && !has_validity_bitmap { + buffers = buffers[1:] + } + nbuffers := len(buffers) + + has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID()) + if has_buffer_sizes_buffer { + nbuffers++ } out.dictionary = nil @@ -387,25 +408,34 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { out.buffers = nil if nbuffers > 0 { - bufs := arr.Data().Buffers() - buffers := allocateBufferPtrArr(nbuffers) - for i, buf := range bufs[buf_offset:] { + cBufs := allocateBufferPtrArr(nbuffers) + for i, buf := range buffers { if buf == nil || buf.Len() == 0 { if i > 0 || !has_validity_bitmap { // apache/arrow#33936: export a dummy buffer to be friendly to // implementations that don't import NULL properly - buffers[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) + cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) } else { // null pointer permitted for the validity bitmap // (assuming null count is 0) - buffers[i] = nil + cBufs[i] = nil } continue } - buffers[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) + cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) + } + + if has_buffer_sizes_buffer { + sizes := allocateBufferSizeArr(len(buffers[2:])) + for i, buf := range buffers[2:] { + sizes[i] = C.int64_t(buf.Len()) + } + if len(sizes) > 0 { + cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) + } } - out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&buffers[0])) + out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) } arr.Data().Retain() diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go index 1ee1959b2dd..052d79610bc 100644 --- a/go/arrow/compute/arithmetic.go +++ b/go/arrow/compute/arithmetic.go @@ -678,8 +678,8 @@ func RegisterScalarArithmetic(reg FunctionRegistry) { // the allocated space is for duration (an int64) but we // wrote the time32 - time32 as if the output was time32 // so a quick copy in reverse expands the int32s to int64. - rawData := exec.GetData[int32](out.Buffers[1].Buf) - outData := exec.GetData[int64](out.Buffers[1].Buf) + rawData := arrow.GetData[int32](out.Buffers[1].Buf) + outData := arrow.GetData[int64](out.Buffers[1].Buf) for i := out.Len - 1; i >= 0; i-- { outData[i] = int64(rawData[i]) diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go index c9c3f1ceb03..34c1bc6d98d 100644 --- a/go/arrow/compute/arithmetic_test.go +++ b/go/arrow/compute/arithmetic_test.go @@ -195,7 +195,7 @@ func (b *Float16BinaryFuncTestSuite) TestSub() { } } -type BinaryArithmeticSuite[T exec.NumericTypes] struct { +type BinaryArithmeticSuite[T arrow.NumericType] struct { BinaryFuncTestSuite opts compute.ArithmeticOptions @@ -205,7 +205,7 @@ type BinaryArithmeticSuite[T exec.NumericTypes] struct { } func (BinaryArithmeticSuite[T]) DataType() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) { @@ -564,7 +564,7 @@ func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() { bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number") } -type BinaryIntegralArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct { +type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryArithmeticSuite[T] } @@ -2412,7 +2412,7 @@ func TestUnaryArithmeticNull(t *testing.T) { } } -type UnaryArithmeticSuite[T exec.NumericTypes, O fnOpts] struct { +type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct { suite.Suite mem *memory.CheckedAllocator @@ -2433,7 +2433,7 @@ func (us *UnaryArithmeticSuite[T, O]) TearDownTest() { } func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar { @@ -2532,7 +2532,7 @@ func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O] us.ErrorContains(err, msg) } -type UnaryArithmeticIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct { UnaryArithmeticSuite[T, compute.ArithmeticOptions] } @@ -2598,7 +2598,7 @@ func (us *UnaryArithmeticIntegral[T]) TestLog() { } } -type UnaryArithmeticSigned[T exec.IntTypes] struct { +type UnaryArithmeticSigned[T arrow.IntType] struct { UnaryArithmeticIntegral[T] } @@ -2678,7 +2678,7 @@ func (us *UnaryArithmeticSigned[T]) TestNegate() { }) } -type UnaryArithmeticUnsigned[T exec.UintTypes] struct { +type UnaryArithmeticUnsigned[T arrow.UintType] struct { UnaryArithmeticIntegral[T] } @@ -2965,12 +2965,12 @@ func TestUnaryArithmetic(t *testing.T) { suite.Run(t, new(DecimalUnaryArithmeticSuite)) } -type BitwiseArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct { +type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryFuncTestSuite } func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } // to make it easier to test different widths, tests give bytes which @@ -3061,7 +3061,7 @@ var roundModes = []compute.RoundMode{ compute.RoundHalfToOdd, } -type UnaryRoundSuite[T exec.NumericTypes] struct { +type UnaryRoundSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundOptions] } @@ -3073,7 +3073,7 @@ func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) { us.opts.NDigits = v } -type UnaryRoundToMultipleSuite[T exec.NumericTypes] struct { +type UnaryRoundToMultipleSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundToMultipleOptions] } @@ -3085,15 +3085,15 @@ func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) { us.opts.Multiple = scalar.NewFloat64Scalar(val) } -type UnaryRoundIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundSuite[T] } -type UnaryRoundToMultipleIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundToMultipleSuite[T] } -type UnaryRoundSigned[T exec.IntTypes] struct { +type UnaryRoundSigned[T arrow.IntType] struct { UnaryRoundIntegral[T] } @@ -3130,7 +3130,7 @@ func (us *UnaryRoundSigned[T]) TestRound() { } } -type UnaryRoundToMultipleSigned[T exec.IntTypes] struct { +type UnaryRoundToMultipleSigned[T arrow.IntType] struct { UnaryRoundToMultipleIntegral[T] } @@ -3164,7 +3164,7 @@ func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() { } } -type UnaryRoundUnsigned[T exec.UintTypes] struct { +type UnaryRoundUnsigned[T arrow.UintType] struct { UnaryRoundIntegral[T] } @@ -3201,7 +3201,7 @@ func (us *UnaryRoundUnsigned[T]) TestRound() { } } -type UnaryRoundToMultipleUnsigned[T exec.UintTypes] struct { +type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct { UnaryRoundToMultipleIntegral[T] } diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go index 276e4570ca9..1b5e69a502c 100644 --- a/go/arrow/compute/exec/utils.go +++ b/go/arrow/compute/exec/utils.go @@ -21,96 +21,21 @@ package exec import ( "fmt" "math" - "reflect" "sync/atomic" "unsafe" "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/bitutil" - "github.com/apache/arrow/go/v15/arrow/decimal128" - "github.com/apache/arrow/go/v15/arrow/decimal256" - "github.com/apache/arrow/go/v15/arrow/float16" "github.com/apache/arrow/go/v15/arrow/memory" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" ) -// IntTypes is a type constraint for raw values represented as signed -// integer types by Arrow. We aren't just using constraints.Signed -// because we don't want to include the raw `int` type here whose size -// changes based on the architecture (int32 on 32-bit architectures and -// int64 on 64-bit architectures). -// -// This will also cover types like MonthInterval or the time types -// as their underlying types are int32 and int64 which will get covered -// by using the ~ -type IntTypes interface { - ~int8 | ~int16 | ~int32 | ~int64 -} - -// UintTypes is a type constraint for raw values represented as unsigned -// integer types by Arrow. We aren't just using constraints.Unsigned -// because we don't want to include the raw `uint` type here whose size -// changes based on the architecture (uint32 on 32-bit architectures and -// uint64 on 64-bit architectures). We also don't want to include uintptr -type UintTypes interface { - ~uint8 | ~uint16 | ~uint32 | ~uint64 -} - -// FloatTypes is a type constraint for raw values for representing -// floating point values in Arrow. This consists of constraints.Float and -// float16.Num -type FloatTypes interface { - float16.Num | constraints.Float -} - -// NumericTypes is a type constraint for just signed/unsigned integers -// and float32/float64. -type NumericTypes interface { - IntTypes | UintTypes | constraints.Float -} - -// DecimalTypes is a type constraint for raw values representing larger -// decimal type values in Arrow, specifically decimal128 and decimal256. -type DecimalTypes interface { - decimal128.Num | decimal256.Num -} - -// FixedWidthTypes is a type constraint for raw values in Arrow that -// can be represented as FixedWidth byte slices. Specifically this is for -// using Go generics to easily re-type a byte slice to a properly-typed -// slice. Booleans are excluded here since they are represented by Arrow -// as a bitmap and thus the buffer can't be just reinterpreted as a []bool -type FixedWidthTypes interface { - IntTypes | UintTypes | - FloatTypes | DecimalTypes | - arrow.DayTimeInterval | arrow.MonthDayNanoInterval -} - -type TemporalTypes interface { - arrow.Date32 | arrow.Date64 | arrow.Time32 | arrow.Time64 | - arrow.Timestamp | arrow.Duration | arrow.DayTimeInterval | - arrow.MonthInterval | arrow.MonthDayNanoInterval -} - -func GetValues[T FixedWidthTypes](data arrow.ArrayData, i int) []T { - if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { - return nil - } - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()) - return ret[data.Offset():] -} - -func GetOffsets[T int32 | int64](data arrow.ArrayData, i int) []T { - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()+1) - return ret[data.Offset():] -} - // GetSpanValues returns a properly typed slice by reinterpreting // the buffer at index i using unsafe.Slice. This will take into account // the offset of the given ArraySpan. -func GetSpanValues[T FixedWidthTypes](span *ArraySpan, i int) []T { +func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T { if len(span.Buffers[i].Buf) == 0 { return nil } @@ -126,16 +51,6 @@ func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T { return ret[span.Offset:] } -func GetBytes[T FixedWidthTypes](in []T) []byte { - var z T - return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*int(unsafe.Sizeof(z))) -} - -func GetData[T FixedWidthTypes](in []byte) []T { - var z T - return unsafe.Slice((*T)(unsafe.Pointer(&in[0])), len(in)/int(unsafe.Sizeof(z))) -} - func Min[T constraints.Ordered](a, b T) T { if a < b { return a @@ -165,59 +80,22 @@ func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) arrow.ErrInvalid) } -var typMap = map[reflect.Type]arrow.DataType{ - reflect.TypeOf(false): arrow.FixedWidthTypes.Boolean, - reflect.TypeOf(int8(0)): arrow.PrimitiveTypes.Int8, - reflect.TypeOf(int16(0)): arrow.PrimitiveTypes.Int16, - reflect.TypeOf(int32(0)): arrow.PrimitiveTypes.Int32, - reflect.TypeOf(int64(0)): arrow.PrimitiveTypes.Int64, - reflect.TypeOf(uint8(0)): arrow.PrimitiveTypes.Uint8, - reflect.TypeOf(uint16(0)): arrow.PrimitiveTypes.Uint16, - reflect.TypeOf(uint32(0)): arrow.PrimitiveTypes.Uint32, - reflect.TypeOf(uint64(0)): arrow.PrimitiveTypes.Uint64, - reflect.TypeOf(float32(0)): arrow.PrimitiveTypes.Float32, - reflect.TypeOf(float64(0)): arrow.PrimitiveTypes.Float64, - reflect.TypeOf(string("")): arrow.BinaryTypes.String, - reflect.TypeOf(arrow.Date32(0)): arrow.FixedWidthTypes.Date32, - reflect.TypeOf(arrow.Date64(0)): arrow.FixedWidthTypes.Date64, - reflect.TypeOf(true): arrow.FixedWidthTypes.Boolean, - reflect.TypeOf(float16.Num{}): arrow.FixedWidthTypes.Float16, - reflect.TypeOf([]byte{}): arrow.BinaryTypes.Binary, -} - -// GetDataType returns the appropriate arrow.DataType for the given type T -// only for non-parametric types. This uses a map and reflection internally -// so don't call this in a tight loop, instead call this once and then use -// a closure with the result. -func GetDataType[T NumericTypes | bool | string | []byte | float16.Num]() arrow.DataType { - var z T - return typMap[reflect.TypeOf(z)] -} - -// GetType returns the appropriate arrow.Type type T, only for non-parametric -// types. This uses a map and reflection internally so don't call this in -// a tight loop, instead call it once and then use a closure with the result. -func GetType[T NumericTypes | bool | string]() arrow.Type { - var z T - return typMap[reflect.TypeOf(z)].ID() -} - -type arrayBuilder[T NumericTypes | bool] interface { +type arrayBuilder[T arrow.NumericType | bool] interface { array.Builder Append(T) AppendValues([]T, []bool) } -func ArrayFromSlice[T NumericTypes | bool](mem memory.Allocator, data []T) arrow.Array { - bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T]) +func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array { + bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, nil) return bldr.NewArray() } -func ArrayFromSliceWithValid[T NumericTypes | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { - bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T]) +func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { + bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, valid) @@ -323,7 +201,7 @@ func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) { } type arrayTypes interface { - FixedWidthTypes | TemporalTypes | bool | string | []byte + arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte } type ArrayIter[T arrayTypes] interface { @@ -345,11 +223,11 @@ func (b *BoolIter) Next() (out bool) { return } -type PrimitiveIter[T FixedWidthTypes] struct { +type PrimitiveIter[T arrow.FixedWidthType] struct { Values []T } -func NewPrimitiveIter[T FixedWidthTypes](arr *ArraySpan) ArrayIter[T] { +func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] { return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)} } diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go index b26e4ff41e7..345d6dcf3b4 100644 --- a/go/arrow/compute/exec/utils_test.go +++ b/go/arrow/compute/exec/utils_test.go @@ -53,7 +53,7 @@ func TestRechunkConsistentArraysTrivial(t *testing.T) { } } -func assertEqual[T exec.NumericTypes](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { +func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { exp := exec.ArrayFromSlice(mem, data) defer exp.Release() assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr) diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go index 565ae3bfadb..036e1e355ed 100644 --- a/go/arrow/compute/fieldref.go +++ b/go/arrow/compute/fieldref.go @@ -282,31 +282,31 @@ type refImpl interface { // // Nested fields can be referenced as well, given the schema: // -// arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, -// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, -// }) +// arrow.NewSchema([]arrow.Field{ +// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, +// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, +// }) // // the following all indicate the nested field named "n": // -// FieldRefPath(FieldPath{0, 0}) -// FieldRefList("a", 0) -// FieldRefList("a", "n") -// FieldRefList(0, "n") -// NewFieldRefFromDotPath(".a[0]") +// FieldRefPath(FieldPath{0, 0}) +// FieldRefList("a", 0) +// FieldRefList("a", "n") +// FieldRefList(0, "n") +// NewFieldRefFromDotPath(".a[0]") // // FieldPaths matching a FieldRef are retrieved with the FindAll* functions // Multiple matches are possible because field names may be duplicated within // a schema. For example: // -// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, -// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, -// }) -// matches := FieldRefName("a").FindAll(aIsAmbiguous) -// assert.Len(matches, 2) -// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) -// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) +// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ +// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, +// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, +// }) +// matches := FieldRefName("a").FindAll(aIsAmbiguous) +// assert.Len(matches, 2) +// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) +// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) type FieldRef struct { impl refImpl } @@ -346,17 +346,18 @@ func FieldRefList(elems ...interface{}) FieldRef { // NewFieldRefFromDotPath parses a dot path into a field ref. // // dot_path = '.' name -// | '[' digit+ ']' -// | dot_path+ +// +// | '[' digit+ ']' +// | dot_path+ // // Examples // -// ".alpha" => FieldRefName("alpha") -// "[2]" => FieldRefIndex(2) -// ".beta[3]" => FieldRefList("beta", 3) -// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) -// ".hello world" => FieldRefName("hello world") -// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) +// ".alpha" => FieldRefName("alpha") +// "[2]" => FieldRefIndex(2) +// ".beta[3]" => FieldRefList("beta", 3) +// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) +// ".hello world" => FieldRefName("hello world") +// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) // // Note: when parsing a name, a '\' preceding any other character will be // dropped from the resulting name. therefore if a name must contain the characters diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go index 4ef0031f314..b795c04c39e 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic.go @@ -81,7 +81,7 @@ const ( OpLogbChecked ) -func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) { +func mulWithOverflow[T arrow.IntType | arrow.UintType](a, b T) (T, error) { min, max := MinOf[T](), MaxOf[T]() switch { case a > 0: @@ -107,7 +107,7 @@ func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) { return a * b, nil } -func getGoArithmeticBinary[OutT, Arg0T, Arg1T exec.NumericTypes](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] { +func getGoArithmeticBinary[OutT, Arg0T, Arg1T arrow.NumericType](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] { return binaryOps[OutT, Arg0T, Arg1T]{ arrArr: func(_ *exec.KernelCtx, left []Arg0T, right []Arg1T, out []OutT) error { var err error @@ -143,7 +143,7 @@ var ( errLogNeg = fmt.Errorf("%w: logarithm of negative number", arrow.ErrInvalid) ) -func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getGoArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { switch op { case OpAdd: return ScalarBinary(getGoArithmeticBinary(func(a, b InT, _ *error) OutT { return OutT(a + b) })) @@ -178,7 +178,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit if SizeOf[InT]() == SizeOf[OutT]() { return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - in, output := exec.GetBytes(arg), exec.GetBytes(out) + in, output := arrow.GetBytes(arg), arrow.GetBytes(out) copy(output, in) return nil }) @@ -314,7 +314,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit } if SizeOf[InT]() == SizeOf[OutT]() { return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - in, output := exec.GetBytes(arg), exec.GetBytes(out) + in, output := arrow.GetBytes(arg), arrow.GetBytes(out) copy(output, in) return nil }) @@ -837,7 +837,7 @@ func ArithmeticExecSameType(ty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec return nil } -func arithmeticExec[InT exec.IntTypes | exec.UintTypes](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec { +func arithmeticExec[InT arrow.IntType | arrow.UintType](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec { switch oty { case arrow.INT8: return getArithmeticOpIntegral[InT, int8](op) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go index 942b8e4ff56..51b1866fb68 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go @@ -21,63 +21,64 @@ package kernels import ( "unsafe" + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/compute/exec" "github.com/apache/arrow/go/v15/arrow/internal/debug" "golang.org/x/exp/constraints" "golang.org/x/sys/cpu" ) -func getAvx2ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] { - typ := exec.GetType[T]() +func getAvx2ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] { + typ := arrow.GetType[T]() return binaryOps[T, T, T]{ arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error { - arithmeticAvx2(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticAvx2(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error { - arithmeticArrScalarAvx2(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticArrScalarAvx2(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error { - arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1)) + arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1)) return nil }, } } -func getSSE4ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] { - typ := exec.GetType[T]() +func getSSE4ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] { + typ := arrow.GetType[T]() return binaryOps[T, T, T]{ arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error { - arithmeticSSE4(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticSSE4(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error { - arithmeticArrScalarSSE4(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticArrScalarSSE4(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error { - arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1)) + arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1)) return nil }, } } -func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { if cpu.X86.HasAVX2 { switch op { case OpAdd, OpSub, OpMul: return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpNegate: - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) case OpSign: - inType, outType := exec.GetType[InT](), exec.GetType[OutT]() + inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]() return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - arithmeticUnaryDiffTypesAvx2(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryDiffTypesAvx2(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } @@ -86,15 +87,15 @@ func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arithm case OpAdd, OpSub, OpMul: return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpNegate: - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(ctx *exec.KernelCtx, arg, out []InT) error { - arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) case OpSign: - inType, outType := exec.GetType[InT](), exec.GetType[OutT]() + inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]() return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - arithmeticUnaryDiffTypesSSE4(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryDiffTypesSSE4(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } @@ -109,38 +110,38 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec. if cpu.X86.HasAVX2 { switch op { case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } } else if cpu.X86.HasSSE42 { switch op { case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go index 8e46ca030c8..2c1559fe0f0 100644 --- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go +++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go @@ -19,6 +19,7 @@ package kernels import ( + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/compute/exec" "golang.org/x/exp/constraints" ) @@ -27,6 +28,6 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec. return getGoArithmeticOpFloating[InT, OutT](op) } -func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { return getGoArithmeticOpIntegral[InT, OutT](op) } diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go index 923c5b3f545..6109d257909 100644 --- a/go/arrow/compute/internal/kernels/boolean_cast.go +++ b/go/arrow/compute/internal/kernels/boolean_cast.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v15/arrow/compute/exec" ) -func isNonZero[T exec.FixedWidthTypes](ctx *exec.KernelCtx, in []T, out []byte) error { +func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error { var zero T for i, v := range in { bitutil.SetBitTo(out, i, v != zero) diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go index 686c4b3e0c2..1ac09ba43bf 100644 --- a/go/arrow/compute/internal/kernels/helpers.go +++ b/go/arrow/compute/internal/kernels/helpers.go @@ -37,9 +37,9 @@ import ( // which will receive a slice containing the raw input data along with // a slice to populate for the output data. // -// Note that bool is not included in exec.FixedWidthTypes since it is +// Note that bool is not included in arrow.FixedWidthType since it is // represented as a bitmap, not as a slice of bool. -func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec { +func ScalarUnary[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { arg0 := in.Values[0].Array inData := exec.GetSpanValues[Arg0T](&arg0, 1) @@ -51,7 +51,7 @@ func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Ar // ScalarUnaryNotNull is for generating a kernel to operate only on the // non-null values in the input array. The zerovalue of the output type // is used for any null input values. -func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec { +func ScalarUnaryNotNull[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { var ( arg0 = &in.Values[0].Array @@ -78,7 +78,7 @@ func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCt // ScalarUnaryBoolOutput is like ScalarUnary only it is for cases of boolean // output. The function should take in a slice of the input type and a slice // of bytes to fill with the output boolean bitmap. -func ScalarUnaryBoolOutput[Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec { +func ScalarUnaryBoolOutput[Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { arg0 := in.Values[0].Array inData := exec.GetSpanValues[Arg0T](&arg0, 1) @@ -127,7 +127,7 @@ func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](defVal bool, op f // It implements the handling to iterate the offsets and values calling // the provided function on each byte slice. The zero value of the OutT // will be used as the output for elements of the input that are null. -func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec { +func ScalarUnaryNotNullBinaryArg[OutT arrow.FixedWidthType, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { var ( arg0 = &in.Values[0].Array @@ -156,14 +156,14 @@ func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int6 // ScalarUnaryBoolArg is like ScalarUnary except it specifically expects a // function that takes a byte slice since booleans arrays are represented // as a bitmap. -func ScalarUnaryBoolArg[OutT exec.FixedWidthTypes](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec { +func ScalarUnaryBoolArg[OutT arrow.FixedWidthType](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, input *exec.ExecSpan, out *exec.ExecResult) error { outData := exec.GetSpanValues[OutT](out, 1) return op(ctx, input.Values[0].Array.Buffers[1].Buf, outData) } } -func UnboxScalar[T exec.FixedWidthTypes](val scalar.PrimitiveScalar) T { +func UnboxScalar[T arrow.FixedWidthType](val scalar.PrimitiveScalar) T { return *(*T)(unsafe.Pointer(&val.Data()[0])) } @@ -174,11 +174,11 @@ func UnboxBinaryScalar(val scalar.BinaryScalar) []byte { return val.Data() } -type arrArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error -type arrScalarFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error -type scalarArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error +type arrArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error +type arrScalarFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error +type scalarArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error -type binaryOps[OutT, Arg0T, Arg1T exec.FixedWidthTypes] struct { +type binaryOps[OutT, Arg0T, Arg1T arrow.FixedWidthType] struct { arrArr arrArrFn[OutT, Arg0T, Arg1T] arrScalar arrScalarFn[OutT, Arg0T, Arg1T] scalarArr scalarArrFn[OutT, Arg0T, Arg1T] @@ -190,7 +190,7 @@ type binaryBoolOps struct { scalarArr func(ctx *exec.KernelCtx, lhs bool, rhs, out bitutil.Bitmap) error } -func ScalarBinary[OutT, Arg0T, Arg1T exec.FixedWidthTypes](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec { +func ScalarBinary[OutT, Arg0T, Arg1T arrow.FixedWidthType](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec { arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error { var ( a0 = exec.GetSpanValues[Arg0T](arg0, 1) @@ -281,7 +281,7 @@ func ScalarBinaryBools(ops *binaryBoolOps) exec.ArrayKernelExec { } } -func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec { +func ScalarBinaryNotNull[OutT, Arg0T, Arg1T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec { arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) (err error) { // fast path if one side is entirely null if arg0.UpdateNullCount() == arg0.Len || arg1.UpdateNullCount() == arg1.Len { @@ -379,7 +379,7 @@ func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec. } } -type binaryBinOp[T exec.FixedWidthTypes | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T +type binaryBinOp[T arrow.FixedWidthType | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T func ScalarBinaryBinaryArgsBoolOut(itrFn func(*exec.ArraySpan) exec.ArrayIter[[]byte], op binaryBinOp[bool]) exec.ArrayKernelExec { arrArr := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error { @@ -577,7 +577,7 @@ func intsCanFit(data *exec.ArraySpan, target arrow.Type) error { } } -func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBound, upperBound T) error { +func intsInRange[T arrow.IntType | arrow.UintType](data *exec.ArraySpan, lowerBound, upperBound T) error { if MinOf[T]() >= lowerBound && MaxOf[T]() <= upperBound { return nil } @@ -653,7 +653,7 @@ func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBo } type numeric interface { - exec.IntTypes | exec.UintTypes | constraints.Float + arrow.IntType | arrow.UintType | constraints.Float } func memCpySpan[T numeric](in, out *exec.ArraySpan) { @@ -883,12 +883,12 @@ func (bldr *execBufBuilder) finish() (buf *memory.Buffer) { return } -type bufferBuilder[T exec.FixedWidthTypes] struct { +type bufferBuilder[T arrow.FixedWidthType] struct { execBufBuilder zero T } -func newBufferBuilder[T exec.FixedWidthTypes](mem memory.Allocator) *bufferBuilder[T] { +func newBufferBuilder[T arrow.FixedWidthType](mem memory.Allocator) *bufferBuilder[T] { return &bufferBuilder[T]{ execBufBuilder: execBufBuilder{ mem: mem, @@ -901,11 +901,11 @@ func (b *bufferBuilder[T]) reserve(additional int) { } func (b *bufferBuilder[T]) unsafeAppend(value T) { - b.execBufBuilder.unsafeAppend(exec.GetBytes([]T{value})) + b.execBufBuilder.unsafeAppend(arrow.GetBytes([]T{value})) } func (b *bufferBuilder[T]) unsafeAppendSlice(values []T) { - b.execBufBuilder.unsafeAppend(exec.GetBytes(values)) + b.execBufBuilder.unsafeAppend(arrow.GetBytes(values)) } func (b *bufferBuilder[T]) len() int { return b.sz / int(unsafe.Sizeof(b.zero)) } @@ -914,7 +914,7 @@ func (b *bufferBuilder[T]) cap() int { return cap(b.data) / int(unsafe.Sizeof(b.zero)) } -func checkIndexBoundsImpl[T exec.IntTypes | exec.UintTypes](values *exec.ArraySpan, upperLimit uint64) error { +func checkIndexBoundsImpl[T arrow.IntType | arrow.UintType](values *exec.ArraySpan, upperLimit uint64) error { // for unsigned integers, if the values array is larger // than the maximum index value, then there's no need to bounds check isSigned := !arrow.IsUnsignedInteger(values.Type.ID()) diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go index c055552bf7f..d31edfdd308 100644 --- a/go/arrow/compute/internal/kernels/numeric_cast.go +++ b/go/arrow/compute/internal/kernels/numeric_cast.go @@ -69,13 +69,13 @@ func CastIntegerToFloating(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec. return nil } -type decimal[T exec.DecimalTypes] interface { +type decimal[T decimal128.Num | decimal256.Num] interface { Less(T) bool GreaterEqual(T) bool LowBits() uint64 } -func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT { +func decimalToIntImpl[InT decimal128.Num | decimal256.Num, OutT arrow.IntType | arrow.UintType](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT { if !allowOverflow && (v.Less(min) || v.GreaterEqual(max)) { debug.Log("integer value out of bounds from decimal") *err = fmt.Errorf("%w: integer value out of bounds", arrow.ErrInvalid) @@ -84,7 +84,7 @@ func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes return OutT(v.LowBits()) } -func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastDecimal256ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(CastState) inputType = batch.Values[0].Type().(*arrow.Decimal256Type) @@ -125,7 +125,7 @@ func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC return ex(ctx, batch, out) } -func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastDecimal128ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(CastState) inputType = batch.Values[0].Type().(*arrow.Decimal128Type) @@ -166,7 +166,7 @@ func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC return ex(ctx, batch, out) } -func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec { +func integerToDecimal128[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec { var getDecimal func(v T) decimal128.Num switch inType { case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64: @@ -183,7 +183,7 @@ func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou }) } -func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec { +func integerToDecimal256[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec { var getDecimal func(v T) decimal256.Num switch inType { case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64: @@ -200,7 +200,7 @@ func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou }) } -func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastIntegerToDecimal[OutT decimal128.Num | decimal256.Num, Arg0 arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( precision, scale int32 executor exec.ArrayKernelExec @@ -234,7 +234,7 @@ func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.Uint return executor(ctx, batch, out) } -func getCastIntToDecimal[T exec.DecimalTypes](inType arrow.Type) exec.ArrayKernelExec { +func getCastIntToDecimal[T decimal128.Num | decimal256.Num](inType arrow.Type) exec.ArrayKernelExec { switch inType { case arrow.UINT8: return CastIntegerToDecimal[T, uint8] @@ -543,7 +543,7 @@ func boolToNum[T numeric](_ *exec.KernelCtx, in []byte, out []T) error { return nil } -func checkFloatTrunc[InT constraints.Float, OutT exec.IntTypes | exec.UintTypes](in, out *exec.ArraySpan) error { +func checkFloatTrunc[InT constraints.Float, OutT arrow.IntType | arrow.UintType](in, out *exec.ArraySpan) error { wasTrunc := func(out OutT, in InT) bool { return InT(out) != in } @@ -665,7 +665,7 @@ func checkIntToFloatTrunc(in *exec.ArraySpan, outType arrow.Type) error { return nil } -func parseStringToNumberImpl[T exec.IntTypes | exec.UintTypes | exec.FloatTypes, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec { +func parseStringToNumberImpl[T arrow.IntType | arrow.UintType | arrow.FloatType, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec { return ScalarUnaryNotNullBinaryArg[T, OffsetT](func(_ *exec.KernelCtx, in []byte, err *error) T { st := *(*string)(unsafe.Pointer(&in)) v, e := parseFn(st) @@ -749,7 +749,7 @@ func addCommonNumberCasts[T numeric](outTy arrow.DataType, kernels []exec.Scalar return kernels } -func GetCastToInteger[T exec.IntTypes | exec.UintTypes](outType arrow.DataType) []exec.ScalarKernel { +func GetCastToInteger[T arrow.IntType | arrow.UintType](outType arrow.DataType) []exec.ScalarKernel { kernels := make([]exec.ScalarKernel, 0) output := exec.NewOutputType(outType) diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go index cf17e9fd954..f1ed21065e4 100644 --- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go +++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go @@ -254,7 +254,7 @@ func GetBitwiseBinaryKernels(op BitwiseOp) []exec.ScalarKernel { return append(kernels, NullExecKernel(2)) } -func bitwiseNot[T exec.IntTypes | exec.UintTypes](_ *exec.KernelCtx, arg T, _ *error) T { +func bitwiseNot[T arrow.IntType | arrow.UintType](_ *exec.KernelCtx, arg T, _ *error) T { return ^arg } @@ -290,7 +290,7 @@ const ( ShiftRight ) -func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec { +func shiftKernelSignedImpl[T arrow.IntType, Unsigned arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec { errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid) maxShift := T(8*SizeOf[T]() - 1) @@ -334,7 +334,7 @@ func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDi return nil } -func shiftKernelUnsignedImpl[T exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec { +func shiftKernelUnsignedImpl[T arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec { errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid) maxShift := T(8 * SizeOf[T]()) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go index 8e5ce1ab7c1..52cd2c31a2a 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go @@ -22,7 +22,6 @@ import ( "unsafe" "github.com/apache/arrow/go/v15/arrow" - "github.com/apache/arrow/go/v15/arrow/compute/exec" "golang.org/x/sys/cpu" ) @@ -32,12 +31,12 @@ type cmpfn func(arrow.Type, []byte, []byte, []byte, int64, int) var comparisonMap map[CompareOperator][3]cmpfn -func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData { +func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { if pureGo { return genGoCompareKernel(getCmpOp[T](op)) } - ty := exec.GetType[T]() + ty := arrow.GetType[T]() byteWidth := int(unsafe.Sizeof(T(0))) comparisonFns := comparisonMap[op] return &CompareData{ diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go index c0aef5a04e9..b36524baa12 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go @@ -18,8 +18,8 @@ package kernels -import "github.com/apache/arrow/go/v15/arrow/compute/exec" +import "github.com/apache/arrow/go/v15/arrow" -func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData { +func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { return genGoCompareKernel(getCmpOp[T](op)) } diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go index 9a7640a8d8a..29e6db29cb2 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparisons.go +++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go @@ -35,22 +35,22 @@ import ( type binaryKernel func(left, right, out []byte, offset int) -type cmpFn[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, []RightT, []uint32) -type cmpScalarLeft[LeftT, RightT exec.FixedWidthTypes] func(LeftT, []RightT, []uint32) -type cmpScalarRight[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, RightT, []uint32) +type cmpFn[LeftT, RightT arrow.FixedWidthType] func([]LeftT, []RightT, []uint32) +type cmpScalarLeft[LeftT, RightT arrow.FixedWidthType] func(LeftT, []RightT, []uint32) +type cmpScalarRight[LeftT, RightT arrow.FixedWidthType] func([]LeftT, RightT, []uint32) -type cmpOp[T exec.FixedWidthTypes] struct { +type cmpOp[T arrow.FixedWidthType] struct { arrArr cmpFn[T, T] arrScalar cmpScalarRight[T, T] scalarArr cmpScalarLeft[T, T] } -func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKernel { +func comparePrimitiveArrayArray[T arrow.FixedWidthType](op cmpFn[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( - left = exec.GetData[T](leftBytes) - right = exec.GetData[T](rightBytes) + left = arrow.GetData[T](leftBytes) + right = arrow.GetData[T](rightBytes) nvals = len(left) nbatches = nvals / batchSize tmpOutput [batchSize]uint32 @@ -83,11 +83,11 @@ func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKe } } -func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T]) binaryKernel { +func comparePrimitiveArrayScalar[T arrow.FixedWidthType](op cmpScalarRight[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( - left = exec.GetData[T](leftBytes) + left = arrow.GetData[T](leftBytes) rightVal = *(*T)(unsafe.Pointer(&rightBytes[0])) nvals = len(left) nbatches = nvals / batchSize @@ -121,12 +121,12 @@ func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T] } } -func comparePrimitiveScalarArray[T exec.FixedWidthTypes](op cmpScalarLeft[T, T]) binaryKernel { +func comparePrimitiveScalarArray[T arrow.FixedWidthType](op cmpScalarLeft[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( leftVal = *(*T)(unsafe.Pointer(&leftBytes[0])) - right = exec.GetData[T](rightBytes) + right = arrow.GetData[T](rightBytes) nvals = len(right) nbatches = nvals / batchSize @@ -181,7 +181,7 @@ func getOffsetSpanBytes(span *exec.ArraySpan) []byte { return buf[start : start+(span.Len*byteWidth)] } -func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func compareKernel[T arrow.FixedWidthType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { kn := ctx.Kernel.(*exec.ScalarKernel) knData := kn.Data.(CompareFuncData).Funcs() @@ -202,7 +202,7 @@ func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.Exec return nil } -func genGoCompareKernel[T exec.FixedWidthTypes](op *cmpOp[T]) *CompareData { +func genGoCompareKernel[T arrow.FixedWidthType](op *cmpOp[T]) *CompareData { return &CompareData{ funcAA: comparePrimitiveArrayArray(op.arrArr), funcAS: comparePrimitiveArrayScalar(op.arrScalar), @@ -376,7 +376,7 @@ func genDecimalCompareKernel[T decimal128.Num | decimal256.Num](op CompareOperat return } -func getCmpOp[T exec.NumericTypes](op CompareOperator) *cmpOp[T] { +func getCmpOp[T arrow.NumericType](op CompareOperator) *cmpOp[T] { switch op { case CmpEQ: return &cmpOp[T]{ @@ -524,7 +524,7 @@ func getBinaryCmp(op CompareOperator) binaryBinOp[bool] { return nil } -func numericCompareKernel[T exec.NumericTypes](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) { +func numericCompareKernel[T arrow.NumericType](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) { ex := compareKernel[T] kn = exec.NewScalarKernelWithSig(&exec.KernelSignature{ InputTypes: []exec.InputType{ty, ty}, diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go index 76da901e33f..d9cf52320b3 100644 --- a/go/arrow/compute/internal/kernels/string_casts.go +++ b/go/arrow/compute/internal/kernels/string_casts.go @@ -116,7 +116,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1) castNumericUnsafe(arrow.INT64, arrow.INT32, - exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets)) + arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets)) return nil default: // upcast from int32 -> int64 @@ -127,7 +127,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1) castNumericUnsafe(arrow.INT32, arrow.INT64, - exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets)) + arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets)) return nil } } @@ -201,8 +201,8 @@ func GetFsbCastKernels() []exec.ScalarKernel { func float16Formatter(v float16.Num) string { return v.String() } func date32Formatter(v arrow.Date32) string { return v.FormattedString() } func date64Formatter(v arrow.Date64) string { return v.FormattedString() } -func numericFormatterSigned[T exec.IntTypes](v T) string { return strconv.FormatInt(int64(v), 10) } -func numericFormatterUnsigned[T exec.UintTypes](v T) string { return strconv.FormatUint(uint64(v), 10) } +func numericFormatterSigned[T arrow.IntType](v T) string { return strconv.FormatInt(int64(v), 10) } +func numericFormatterUnsigned[T arrow.UintType](v T) string { return strconv.FormatUint(uint64(v), 10) } func float32Formatter(v float32) string { return strconv.FormatFloat(float64(v), 'g', -1, 32) } func float64Formatter(v float64) string { return strconv.FormatFloat(v, 'g', -1, 64) } @@ -247,7 +247,7 @@ func timeToStringCastExec[T timeIntrinsic](ctx *exec.KernelCtx, batch *exec.Exec return nil } -func numericToStringCastExec[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](formatter func(T) string) exec.ArrayKernelExec { +func numericToStringCastExec[T arrow.IntType | arrow.UintType | arrow.FloatType](formatter func(T) string) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( input = &batch.Values[0].Array diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go index 9401e31cc5b..f6c9a7f39db 100644 --- a/go/arrow/compute/internal/kernels/vector_hash.go +++ b/go/arrow/compute/internal/kernels/vector_hash.go @@ -178,7 +178,7 @@ func doAppendFixedSize(action Action, memo hashing.MemoTable, arr *exec.ArraySpa }) } -func doAppendNumeric[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error { +func doAppendNumeric[T arrow.IntType | arrow.UintType | arrow.FloatType](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error { arrData := exec.GetSpanValues[T](arr, 1) shouldEncodeNulls := action.ShouldEncodeNulls() return bitutils.VisitBitBlocksShort(arr.Buffers[0].Buf, arr.Offset, arr.Len, diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go index 076bef13684..017b9712025 100644 --- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go +++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go @@ -46,18 +46,18 @@ type RunEndsType interface { int16 | int32 | int64 } -func readFixedWidthVal[V exec.FixedWidthTypes](inputValidity, inputValues []byte, offset int64, out *V) bool { +func readFixedWidthVal[V arrow.FixedWidthType](inputValidity, inputValues []byte, offset int64, out *V) bool { sz := int64(unsafe.Sizeof(*out)) *out = *(*V)(unsafe.Pointer(&inputValues[offset*sz])) return bitutil.BitIsSet(inputValidity, int(offset)) } -func writeFixedWidthVal[V exec.FixedWidthTypes](result *exec.ExecResult, offset int64, valid bool, value V) { +func writeFixedWidthVal[V arrow.FixedWidthType](result *exec.ExecResult, offset int64, valid bool, value V) { if len(result.Buffers[0].Buf) != 0 { bitutil.SetBitTo(result.Buffers[0].Buf, int(offset), valid) } - arr := exec.GetData[V](result.Buffers[1].Buf) + arr := arrow.GetData[V](result.Buffers[1].Buf) arr[offset] = value } @@ -73,7 +73,7 @@ func writeBoolVal(result *exec.ExecResult, offset int64, valid bool, value bool) bitutil.SetBitTo(result.Buffers[1].Buf, int(offset), value) } -type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] struct { +type runEndEncodeLoopFixedWidth[R RunEndsType, V arrow.FixedWidthType | bool] struct { inputLen, inputOffset int64 inputValidity []byte inputValues []byte @@ -84,7 +84,7 @@ type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] st } func (re *runEndEncodeLoopFixedWidth[R, V]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) readOffset := re.inputOffset var currentRun V @@ -155,7 +155,7 @@ func (re *runEndEncodeLoopFixedWidth[R, V]) PreallocOutput(ctx *exec.KernelCtx, valueBuffer = ctx.Allocate(int(numOutput) * bufSpec.ByteWidth) } - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) out.Release() *out = exec.ExecResult{ @@ -230,7 +230,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6 } valueBuffer := ctx.Allocate(re.width * int(numOutput)) - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) out.Release() *out = exec.ExecResult{ @@ -258,7 +258,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6 } func (re *runEndEncodeFSB[R]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) outputValues := out.Children[1].Buffers[1].Buf readOffset := re.inputOffset @@ -362,7 +362,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO valueBuffer := ctx.Allocate(int(re.estimatedValuesLen)) offsetsBuffer := ctx.Allocate(int(numOutput+1) * int(SizeOf[O]())) - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) *out = exec.ExecResult{ Type: reeType, Len: re.inputLen, @@ -389,7 +389,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO } func (re *runEndEncodeLoopBinary[R, O]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) outputOffsets := exec.GetSpanOffsets[O](&out.Children[1], 1) outputValues := out.Children[1].Buffers[2].Buf @@ -443,7 +443,7 @@ func validateRunEndType[R RunEndsType](length int64) error { return nil } -func createEncoder[R RunEndsType, V exec.FixedWidthTypes](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] { +func createEncoder[R RunEndsType, V arrow.FixedWidthType](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] { return &runEndEncodeLoopFixedWidth[R, V]{ inputLen: input.Len, inputOffset: input.Offset, @@ -539,7 +539,7 @@ func runEndEncodeImpl[R RunEndsType](ctx *exec.KernelCtx, batch *exec.ExecSpan, ) if inputLen == 0 { - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), inputArr.Type) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), inputArr.Type) *out = exec.ExecResult{ Type: reeType, Children: []exec.ArraySpan{ diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go index 714e452325b..f08bb4100bf 100644 --- a/go/arrow/compute/internal/kernels/vector_selection.go +++ b/go/arrow/compute/internal/kernels/vector_selection.go @@ -99,12 +99,12 @@ type builder[T any] interface { UnsafeAppendBoolToBitmap(bool) } -func getTakeIndices[T exec.IntTypes | exec.UintTypes](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData { +func getTakeIndices[T arrow.IntType | arrow.UintType](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData { var ( filterData = filter.Buffers[1].Buf haveFilterNulls = filter.MayHaveNulls() filterIsValid = filter.Buffers[0].Buf - idxType = exec.GetDataType[T]() + idxType = arrow.GetDataType[T]() ) if haveFilterNulls && nullSelect == EmitNulls { @@ -394,7 +394,7 @@ func primitiveFilterImpl(wr writeFiltered, values *exec.ArraySpan, filter *exec. } } -type filterWriter[T exec.UintTypes] struct { +type filterWriter[T arrow.UintType] struct { outPosition int outOffset int valuesOffset int @@ -519,7 +519,7 @@ func PrimitiveFilter(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecRe return nil } -type primitiveGetter[T exec.IntTypes | bool] interface { +type primitiveGetter[T arrow.IntType | bool] interface { IsValid(int64) bool GetValue(int64) T NullCount() int64 @@ -542,7 +542,7 @@ func (b *boolGetter) GetValue(i int64) bool { func (b *boolGetter) NullCount() int64 { return b.inner.Nulls } func (b *boolGetter) Len() int64 { return b.inner.Len } -type primitiveGetterImpl[T exec.IntTypes] struct { +type primitiveGetterImpl[T arrow.IntType] struct { inner *exec.ArraySpan values []T } @@ -608,7 +608,7 @@ func (c *chunkedBoolGetter) GetValue(i int64) bool { func (c *chunkedBoolGetter) NullCount() int64 { return c.nulls } func (c *chunkedBoolGetter) Len() int64 { return c.len } -type chunkedPrimitiveGetter[T exec.IntTypes] struct { +type chunkedPrimitiveGetter[T arrow.IntType] struct { inner *arrow.Chunked resolver *exec.ChunkResolver nulls int64 @@ -619,7 +619,7 @@ type chunkedPrimitiveGetter[T exec.IntTypes] struct { valuesOffset []int64 } -func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] { +func newChunkedPrimitiveGetter[T arrow.IntType](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] { nchunks := len(arr.Chunks()) lengths := make([]int64, nchunks) valuesData := make([][]T, nchunks) @@ -630,7 +630,7 @@ func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrim lengths[i] = int64(c.Len()) valuesOffset[i] = int64(c.Data().Offset()) valuesIsValid[i] = c.NullBitmapBytes() - valuesData[i] = exec.GetValues[T](c.Data(), 1) + valuesData[i] = arrow.GetValues[T](c.Data(), 1) } return &chunkedPrimitiveGetter[T]{ @@ -662,7 +662,7 @@ func (c *chunkedPrimitiveGetter[T]) GetValue(i int64) T { func (c *chunkedPrimitiveGetter[T]) NullCount() int64 { return c.nulls } func (c *chunkedPrimitiveGetter[T]) Len() int64 { return c.len } -func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) { +func primitiveTakeImpl[IdxT arrow.UintType, ValT arrow.IntType](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) { var ( indicesData = exec.GetSpanValues[IdxT](indices, 1) indicesIsValid = indices.Buffers[0].Buf @@ -747,7 +747,7 @@ func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitive out.Nulls = out.Len - validCount } -func booleanTakeImpl[IdxT exec.UintTypes](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) { +func booleanTakeImpl[IdxT arrow.UintType](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) { var ( indicesData = exec.GetSpanValues[IdxT](indices, 1) indicesIsValid = indices.Buffers[0].Buf @@ -876,7 +876,7 @@ func booleanTakeDispatch(values, indices *exec.ArraySpan, out *exec.ExecResult) return nil } -func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked, out []*exec.ExecResult) error { +func takeIdxChunkedDispatch[ValT arrow.IntType](values, indices *arrow.Chunked, out []*exec.ExecResult) error { getter := newChunkedPrimitiveGetter[ValT](values) var fn func(primitiveGetter[ValT], *exec.ArraySpan, *exec.ExecResult) @@ -901,7 +901,7 @@ func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked, return nil } -func takeIdxDispatch[ValT exec.IntTypes](values, indices *exec.ArraySpan, out *exec.ExecResult) error { +func takeIdxDispatch[ValT arrow.IntType](values, indices *exec.ArraySpan, out *exec.ExecResult) error { getter := &primitiveGetterImpl[ValT]{inner: values, values: exec.GetSpanValues[ValT](values, 1)} switch indices.Type.(arrow.FixedWidthDataType).Bytes() { @@ -1368,7 +1368,7 @@ func binaryFilterImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, values, filter return nil } -func takeExecImpl[T exec.UintTypes](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error { +func takeExecImpl[T arrow.UintType](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error { var ( validityBuilder = validityBuilder{mem: exec.GetAllocator(ctx.Ctx)} indicesValues = exec.GetSpanValues[T](indices, 1) @@ -1600,7 +1600,7 @@ func ListImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out.Buffers[1].WrapBuffer(offsetBuilder.finish()) out.Children = make([]exec.ArraySpan, 1) - out.Children[0].Type = exec.GetDataType[OffsetT]() + out.Children[0].Type = arrow.GetDataType[OffsetT]() out.Children[0].Len = int64(childIdxBuilder.len()) out.Children[0].Buffers[1].WrapBuffer(childIdxBuilder.finish()) diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go index d209f72c800..1fa0591692e 100644 --- a/go/arrow/compute/scalar_compare_test.go +++ b/go/arrow/compute/scalar_compare_test.go @@ -89,7 +89,7 @@ func (c *CompareSuite) validateCompareScalarArr(op kernels.CompareOperator, dt a c.validateCompareDatum(op, lhs, &compute.ArrayDatum{rhs.Data()}, &compute.ArrayDatum{exp.Data()}) } -func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs, rhs T) bool { +func slowCompare[T arrow.NumericType | string](op kernels.CompareOperator, lhs, rhs T) bool { switch op { case kernels.CmpEQ: return lhs == rhs @@ -108,7 +108,7 @@ func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs, } } -// func simpleScalarArrayCompare[T exec.NumericTypes](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { +// func simpleScalarArrayCompare[T arrow.NumericType](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { // var ( // swap = lhs.Kind() == compute.KindArray // span exec.ArraySpan @@ -230,7 +230,7 @@ type valuer[T any] interface { Value(int) T } -func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { +func simpleArrArrCompare[T arrow.NumericType | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { var ( lArr = lhs.(*compute.ArrayDatum).MakeArray() rArr = rhs.(*compute.ArrayDatum).MakeArray() @@ -263,7 +263,7 @@ func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op return compute.NewDatum(result) } -type NumericCompareSuite[T exec.NumericTypes] struct { +type NumericCompareSuite[T arrow.NumericType] struct { CompareSuite } @@ -282,7 +282,7 @@ type NumericCompareSuite[T exec.NumericTypes] struct { // } func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() one := compute.NewDatum(scalar.MakeScalar(T(1))) n.Run(dt.String(), func() { @@ -361,7 +361,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() { } func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() one := compute.NewDatum(scalar.MakeScalar(T(1))) n.Run(dt.String(), func() { @@ -440,7 +440,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() { } func (n *NumericCompareSuite[T]) TestNullScalar() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() null := compute.NewDatum(scalar.MakeNullScalar(dt)) n.Run(dt.String(), func() { @@ -453,7 +453,7 @@ func (n *NumericCompareSuite[T]) TestNullScalar() { } func (n *NumericCompareSuite[T]) TestSimpleCompareArrArr() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() n.Run(dt.String(), func() { n.validateCompare(kernels.CmpEQ, dt, `[]`, `[]`, `[]`) diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index 9410720de79..c37db584805 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -26,7 +26,6 @@ import ( "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/compute" - "github.com/apache/arrow/go/v15/arrow/compute/exec" "github.com/apache/arrow/go/v15/arrow/decimal128" "github.com/apache/arrow/go/v15/arrow/decimal256" "github.com/apache/arrow/go/v15/arrow/memory" @@ -36,7 +35,7 @@ import ( "golang.org/x/exp/constraints" ) -func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) { +func checkUniqueDict[I arrow.IntType | arrow.UintType](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) { out, err := compute.Unique(context.TODO(), input) require.NoError(t, err) defer out.Release() @@ -52,8 +51,8 @@ func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compu require.Truef(t, array.Equal(exDict, resultDict), "wanted: %s\ngot: %s", exDict, resultDict) - want := exec.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1) - got := exec.GetValues[I](result.Indices().Data(), 1) + want := arrow.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1) + got := arrow.GetValues[I](result.Indices().Data(), 1) assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got) } @@ -81,15 +80,15 @@ func checkDictionaryUnique(t *testing.T, input compute.ArrayLikeDatum, expected } } -func checkUniqueFixedWidth[T exec.FixedWidthTypes](t *testing.T, input, expected arrow.Array) { +func checkUniqueFixedWidth[T arrow.FixedWidthType](t *testing.T, input, expected arrow.Array) { result, err := compute.UniqueArray(context.TODO(), input) require.NoError(t, err) defer result.Release() require.Truef(t, arrow.TypeEqual(result.DataType(), expected.DataType()), "wanted: %s\ngot: %s", expected.DataType(), result.DataType()) - want := exec.GetValues[T](expected.Data(), 1) - got := exec.GetValues[T](expected.Data(), 1) + want := arrow.GetValues[T](expected.Data(), 1) + got := arrow.GetValues[T](expected.Data(), 1) assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got) } @@ -106,7 +105,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp createSlice := func(v arrow.Array) [][]byte { var ( - offsets = exec.GetOffsets[OffsetType](v.Data(), 1) + offsets = arrow.GetOffsets[OffsetType](v.Data(), 1) data = v.Data().Buffers()[2].Bytes() out = make([][]byte, v.Len()) ) @@ -124,7 +123,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp } type ArrowType interface { - exec.FixedWidthTypes | string | []byte + arrow.FixedWidthType | string | []byte } type builder[T ArrowType] interface { @@ -166,7 +165,7 @@ func checkUniqueFixedSizeBinary(t *testing.T, mem memory.Allocator, dt *arrow.Fi assert.ElementsMatch(t, want, got) } -func checkUniqueFW[T exec.FixedWidthTypes](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) { +func checkUniqueFW[T arrow.FixedWidthType](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) { input := makeArray(mem, dt, inValues, inValid) defer input.Release() expected := makeArray(mem, dt, outValues, outValid) @@ -189,7 +188,7 @@ func checkUniqueVW[T string | []byte](t *testing.T, mem memory.Allocator, dt arr } } -type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Float] struct { +type PrimitiveHashKernelSuite[T arrow.IntType | arrow.UintType | constraints.Float] struct { suite.Suite mem *memory.CheckedAllocator @@ -197,7 +196,7 @@ type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Flo } func (ps *PrimitiveHashKernelSuite[T]) SetupSuite() { - ps.dt = exec.GetDataType[T]() + ps.dt = arrow.GetDataType[T]() } func (ps *PrimitiveHashKernelSuite[T]) SetupTest() { diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go index f44840ba720..4e38bc995cd 100644 --- a/go/arrow/compute/vector_selection_test.go +++ b/go/arrow/compute/vector_selection_test.go @@ -459,9 +459,9 @@ func (f *FilterKernelNumeric) TestFilterNumeric() { }) } -type comparator[T exec.NumericTypes] func(a, b T) bool +type comparator[T arrow.NumericType] func(a, b T) bool -func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T] { +func getComparator[T arrow.NumericType](op kernels.CompareOperator) comparator[T] { return []comparator[T]{ // EQUAL func(a, b T) bool { return a == b }, @@ -478,7 +478,7 @@ func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T }[int8(op)] } -func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { +func compareAndFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { filtered := make([]T, 0, len(data)) for _, v := range data { if fn(v) { @@ -488,12 +488,12 @@ func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, f return exec.ArrayFromSlice(mem, filtered) } -func compareAndFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { +func compareAndFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) return compareAndFilterImpl(mem, data, func(e T) bool { return cmp(e, val) }) } -func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { +func compareAndFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) i := 0 return compareAndFilterImpl(mem, data, func(e T) bool { @@ -503,7 +503,7 @@ func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, othe }) } -func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { +func createFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { bldr := array.NewBooleanBuilder(mem) defer bldr.Release() for _, v := range data { @@ -512,12 +512,12 @@ func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn fu return bldr.NewArray() } -func createFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { +func createFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) return createFilterImpl(mem, data, func(e T) bool { return cmp(e, val) }) } -func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { +func createFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) i := 0 return createFilterImpl(mem, data, func(e T) bool { @@ -527,8 +527,8 @@ func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other [] }) } -func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) { - dt := exec.GetDataType[T]() +func compareScalarAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) { + dt := arrow.GetDataType[T]() rng := gen.NewRandomArrayGenerator(randomSeed, mem) t.Run("compare scalar and filter", func(t *testing.T) { @@ -537,7 +537,7 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem t.Run(fmt.Sprintf("random %d", length), func(t *testing.T) { arr := rng.Numeric(dt.ID(), length, 0, 100, 0) defer arr.Release() - data := exec.GetData[T](arr.Data().Buffers()[1].Bytes()) + data := arrow.GetData[T](arr.Data().Buffers()[1].Bytes()) for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} { selection := createFilterValue(mem, data, 50, op) defer selection.Release() @@ -556,8 +556,8 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem }) } -func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) { - dt := exec.GetDataType[T]() +func compareArrayAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) { + dt := arrow.GetDataType[T]() rng := gen.NewRandomArrayGenerator(randomSeed, mem) t.Run("compare array and filter", func(t *testing.T) { for i := 3; i < 10; i++ { @@ -568,8 +568,8 @@ func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem m rhs := rng.Numeric(dt.ID(), length, 0, 100, 0) defer rhs.Release() - data := exec.GetData[T](lhs.Data().Buffers()[1].Bytes()) - other := exec.GetData[T](rhs.Data().Buffers()[1].Bytes()) + data := arrow.GetData[T](lhs.Data().Buffers()[1].Bytes()) + other := arrow.GetData[T](rhs.Data().Buffers()[1].Bytes()) for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} { selection := createFilterSlice(mem, data, other, op) defer selection.Release() diff --git a/go/arrow/flight/doc.go b/go/arrow/flight/doc.go index 68d1ca3458f..c36a808b00e 100644 --- a/go/arrow/flight/doc.go +++ b/go/arrow/flight/doc.go @@ -74,5 +74,4 @@ // the main thread reset the timer every time a write operation completes successfully // (that means one needs to use to_batches() + write_batch and not write_table). - package flight diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index c9c8b390a86..3e1da64dcf8 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -42,7 +42,7 @@ type ( FlightEndpoint = flight.FlightEndpoint Location = flight.Location FlightInfo = flight.FlightInfo - PollInfo = flight.PollInfo + PollInfo = flight.PollInfo FlightData = flight.FlightData PutResult = flight.PutResult Ticket = flight.Ticket diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 84dc6389832..49f711cdacd 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -826,7 +826,8 @@ type Array struct { Offset interface{} `json:"OFFSET,omitempty"` Size interface{} `json:"SIZE,omitempty"` Children []Array `json:"children,omitempty"` - Variadic []string `json:"VARIADIC_BUFFERS,omitempty"` + Variadic []string `json:"VARIADIC_DATA_BUFFERS,omitempty"` + Views []interface{} `json:"VIEWS,omitempty"` } func (a *Array) MarshalJSON() ([]byte, error) { @@ -1090,7 +1091,7 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr case arrow.BinaryViewDataType: valids := validsToBitmap(validsFromJSON(arr.Valids), mem) nulls := arr.Count - bitutil.CountSetBits(valids.Bytes(), 0, arr.Count) - headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Data) + headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Views) extraBufs := variadicBuffersFromJSON(arr.Variadic) defer valids.Release() defer headers.Release() @@ -1513,7 +1514,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Name: field.Name, Count: arr.Len(), Valids: validsToJSON(arr), - Data: stringHeadersToJSON(arr, false), + Views: stringHeadersToJSON(arr, false), Variadic: variadic, } case *array.BinaryView: @@ -1522,7 +1523,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Name: field.Name, Count: arr.Len(), Valids: validsToJSON(arr), - Data: stringHeadersToJSON(arr, true), + Views: stringHeadersToJSON(arr, true), Variadic: variadic, } case *array.List: @@ -2406,7 +2407,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface } values[i].SetIndexOffset(int32(bufIdx), int32(bufOffset)) - prefix, err := hex.DecodeString(v["PREFIX"].(string)) + prefix, err := hex.DecodeString(v["PREFIX_HEX"].(string)) if err != nil { panic(err) } @@ -2426,7 +2427,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface func stringHeadersToJSON(arr array.ViewLike, isBinary bool) []interface{} { type StringHeader struct { Size int `json:"SIZE"` - Prefix *string `json:"PREFIX,omitempty"` + Prefix *string `json:"PREFIX_HEX,omitempty"` BufferIdx *int `json:"BUFFER_INDEX,omitempty"` BufferOff *int `json:"OFFSET,omitempty"` Inlined *string `json:"INLINED,omitempty"` diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index 31f3cb238ec..164210cbc23 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -6165,7 +6165,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "31C3A9" @@ -6187,7 +6187,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "35" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] }, { "name": "string_view", @@ -6199,7 +6199,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "1é" @@ -6221,7 +6221,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "5" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] } ] }, @@ -6238,7 +6238,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "31C3A9" @@ -6260,7 +6260,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "35353535" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] }, { "name": "string_view", @@ -6272,20 +6272,20 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "1é" }, { "SIZE": 14, - "PREFIX": "32323232", + "PREFIX_HEX": "32323232", "BUFFER_INDEX": 0, "OFFSET": 0 }, { "SIZE": 14, - "PREFIX": "33333333", + "PREFIX_HEX": "33333333", "BUFFER_INDEX": 0, "OFFSET": 14 }, @@ -6298,7 +6298,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "5555" } ], - "VARIADIC_BUFFERS": [ + "VARIADIC_DATA_BUFFERS": [ "32323232323232323232323232323333333333333333333333333333" ] } @@ -6317,20 +6317,20 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 6, "INLINED": "31C3A931C3A9" }, { "SIZE": 14, - "PREFIX": "32323232", + "PREFIX_HEX": "32323232", "BUFFER_INDEX": 0, "OFFSET": 0 }, { "SIZE": 14, - "PREFIX": "33333333", + "PREFIX_HEX": "33333333", "BUFFER_INDEX": 0, "OFFSET": 14 }, @@ -6343,7 +6343,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "3535" } ], - "VARIADIC_BUFFERS": [ + "VARIADIC_DATA_BUFFERS": [ "32323232323232323232323232323333333333333333333333333333" ] }, @@ -6357,7 +6357,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 6, "INLINED": "1é1é" @@ -6379,7 +6379,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "55" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] } ] } diff --git a/go/arrow/internal/testing/tools/bits.go b/go/arrow/internal/testing/tools/bits.go index c123573e2fa..ea6a5432e5c 100644 --- a/go/arrow/internal/testing/tools/bits.go +++ b/go/arrow/internal/testing/tools/bits.go @@ -22,7 +22,7 @@ import "math/bits" // The low bit of each nibble is tested, therefore integers should be written as 8-digit // hex numbers consisting of 1s or 0s. // -// IntsToBitsLSB(0x11001010) -> 0x35 +// IntsToBitsLSB(0x11001010) -> 0x35 func IntsToBitsLSB(v ...int32) []byte { res := make([]byte, 0, len(v)) for _, b := range v { diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go index 619eebd97dc..7b5df167ea4 100644 --- a/go/arrow/internal/utils.go +++ b/go/arrow/internal/utils.go @@ -45,3 +45,15 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool { } return true } + +// HasBufferSizesBuffer returns whether a given type has an extra buffer +// in the C ABI to store the sizes of other buffers. Currently this is only +// StringView and BinaryView. +func HasBufferSizesBuffer(id arrow.Type) bool { + switch id { + case arrow.STRING_VIEW, arrow.BINARY_VIEW: + return true + default: + return false + } +} diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index dd51a761510..7bc7f6ebfaa 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -521,7 +521,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { case *arrow.RunEndEncodedType: field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) runEnds := ctx.loadChild(dt.RunEnds()) defer runEnds.Release() @@ -583,7 +583,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData buffers = append(buffers, ctx.buffer()) } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -591,7 +591,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 3) buffers = append(buffers, ctx.buffer(), ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -603,7 +603,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData for i := 0; i < int(nVariadicBufs); i++ { buffers = append(buffers, ctx.buffer()) } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -611,7 +611,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -619,7 +619,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -630,7 +630,7 @@ func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -641,7 +641,7 @@ func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData { func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 3) buffers = append(buffers, ctx.buffer(), ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -651,7 +651,7 @@ func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.A func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -661,7 +661,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) ar func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) subs := make([]arrow.ArrayData, dt.NumFields()) for i, f := range dt.Fields() { @@ -704,7 +704,7 @@ func (ctx *arrayLoaderContext) loadUnion(dt arrow.UnionType) arrow.ArrayData { } } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) subs := make([]arrow.ArrayData, dt.NumFields()) for i, f := range dt.Fields() { subs[i] = ctx.loadChild(f.Type) @@ -768,11 +768,3 @@ func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker } return dictutils.KindReplacement, nil } - -func releaseBuffers(buffers []*memory.Buffer) { - for _, b := range buffers { - if b != nil { - b.Release() - } - } -} diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index e9d59f0e35e..31ce53a0f1a 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -34,6 +34,7 @@ import ( "github.com/apache/arrow/go/v15/arrow/internal/dictutils" "github.com/apache/arrow/go/v15/arrow/internal/flatbuf" "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v15/internal/utils" ) type swriter struct { @@ -746,42 +747,22 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { w.depth++ case *arrow.ListViewType, *arrow.LargeListViewType: - data := arr.Data() arr := arr.(array.VarLenListLike) - offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() - rngOff, rngLen := array.RangeOfValuesUsed(arr) - voffsets := w.getValueOffsetsAtBaseValue(arr, rngOff) - p.body = append(p.body, voffsets) - vsizes := data.Buffers()[2] - if vsizes != nil { - if data.Offset() != 0 || vsizes.Len() > offsetTraits.BytesRequired(arr.Len()) { - beg := offsetTraits.BytesRequired(data.Offset()) - end := beg + offsetTraits.BytesRequired(data.Len()) - vsizes = memory.NewBufferBytes(vsizes.Bytes()[beg:end]) - } else { - vsizes.Retain() - } - } + voffsets, minOffset, maxEnd := w.getZeroBasedListViewOffsets(arr) + vsizes := w.getListViewSizes(arr) + + p.body = append(p.body, voffsets) p.body = append(p.body, vsizes) w.depth-- var ( - values = arr.ListValues() - mustRelease = false - values_offset = int64(rngOff) - values_end = int64(rngOff + rngLen) + values = arr.ListValues() ) - defer func() { - if mustRelease { - values.Release() - } - }() - if arr.Len() > 0 && values_end < int64(values.Len()) { - // must also slice the values - values = array.NewSlice(values, values_offset, values_end) - mustRelease = true + if minOffset != 0 || maxEnd < int64(values.Len()) { + values = array.NewSlice(values, minOffset, maxEnd) + defer values.Release() } err := w.visit(p, values) @@ -882,61 +863,92 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer return voffsets } -// Truncates the offsets if needed and shifts the values if minOffset > 0. -// The offsets returned are corrected assuming the child values are truncated -// and now start at minOffset. -// -// This function only works on offset buffers of ListViews and LargeListViews. -// TODO(felipecrv): Unify this with getZeroBasedValueOffsets. -func (w *recordEncoder) getValueOffsetsAtBaseValue(arr arrow.Array, minOffset int) *memory.Buffer { - data := arr.Data() - voffsets := data.Buffers()[1] - offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() - offsetBytesNeeded := offsetTraits.BytesRequired(data.Len()) +func getZeroBasedListViewOffsets[OffsetT int32 | int64](mem memory.Allocator, arr array.VarLenListLike) (valueOffsets *memory.Buffer, minOffset, maxEnd OffsetT) { + requiredBytes := int(unsafe.Sizeof(minOffset)) * arr.Len() + if arr.Data().Offset() == 0 { + // slice offsets to used extent, in case we have truncated slice + minOffset, maxEnd = 0, OffsetT(arr.ListValues().Len()) + valueOffsets = arr.Data().Buffers()[1] + if valueOffsets.Len() > requiredBytes { + valueOffsets = memory.SliceBuffer(valueOffsets, 0, requiredBytes) + } else { + valueOffsets.Retain() + } + return + } - if voffsets == nil || voffsets.Len() == 0 { - return nil + // non-zero offset, it's likely that the smallest offset is not zero + // we must a) create a new offsets array with shifted offsets and + // b) slice the values array accordingly + + valueOffsets = memory.NewResizableBuffer(mem) + valueOffsets.Resize(requiredBytes) + if arr.Len() > 0 { + // max value of int32/int64 based on type + minOffset = (^OffsetT(0)) << ((8 * unsafe.Sizeof(minOffset)) - 1) + for i := 0; i < arr.Len(); i++ { + start, end := arr.ValueOffsets(i) + minOffset = utils.Min(minOffset, OffsetT(start)) + maxEnd = utils.Max(maxEnd, OffsetT(end)) + } + } + + offsets := arrow.GetData[OffsetT](arr.Data().Buffers()[1].Bytes())[arr.Data().Offset():] + destOffset := arrow.GetData[OffsetT](valueOffsets.Bytes()) + for i := 0; i < arr.Len(); i++ { + destOffset[i] = offsets[i] - minOffset } + return +} - needsTruncate := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() - needsShift := minOffset > 0 +func getListViewSizes[OffsetT int32 | int64](arr array.VarLenListLike) *memory.Buffer { + var z OffsetT + requiredBytes := int(unsafe.Sizeof(z)) * arr.Len() + sizes := arr.Data().Buffers()[2] - if needsTruncate || needsShift { - shiftedOffsets := memory.NewResizableBuffer(w.mem) - shiftedOffsets.Resize(offsetBytesNeeded) + if arr.Data().Offset() != 0 || sizes.Len() > requiredBytes { + // slice offsets to used extent, in case we have truncated slice + offsetBytes := arr.Data().Offset() * int(unsafe.Sizeof(z)) + sizes = memory.SliceBuffer(sizes, offsetBytes, requiredBytes) + } else { + sizes.Retain() + } + return sizes +} - switch arr.DataType().Layout().Buffers[1].ByteWidth { - case 8: - dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) - offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] +func (w *recordEncoder) getZeroBasedListViewOffsets(arr array.VarLenListLike) (*memory.Buffer, int64, int64) { + if arr.Len() == 0 { + return nil, 0, 0 + } - if minOffset > 0 { - for i, o := range offsets { - dest[i] = o - int64(minOffset) - } - } else { - copy(dest, offsets) - } - default: - debug.Assert(arr.DataType().Layout().Buffers[1].ByteWidth == 4, "invalid offset bytewidth") - dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes()) - offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] + var ( + outOffsets *memory.Buffer + minOff, maxEnd int64 + ) - if minOffset > 0 { - for i, o := range offsets { - dest[i] = o - int32(minOffset) - } - } else { - copy(dest, offsets) - } - } + switch v := arr.(type) { + case *array.ListView: + voffsets, outOff, outEnd := getZeroBasedListViewOffsets[int32](w.mem, v) + outOffsets = voffsets + minOff, maxEnd = int64(outOff), int64(outEnd) + case *array.LargeListView: + outOffsets, minOff, maxEnd = getZeroBasedListViewOffsets[int64](w.mem, v) + } + return outOffsets, minOff, maxEnd +} - voffsets = shiftedOffsets - } else { - voffsets.Retain() +func (w *recordEncoder) getListViewSizes(arr array.VarLenListLike) *memory.Buffer { + if arr.Len() == 0 { + return nil } - return voffsets + switch v := arr.(type) { + case *array.ListView: + return getListViewSizes[int32](v) + case *array.LargeListView: + return getListViewSizes[int64](v) + } + return nil } func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer { diff --git a/go/arrow/memory/util.go b/go/arrow/memory/util.go index 3b0d3a5cb9e..6cc7ec91b96 100644 --- a/go/arrow/memory/util.go +++ b/go/arrow/memory/util.go @@ -35,3 +35,11 @@ func isMultipleOfPowerOf2(v int, d int) bool { func addressOf(b []byte) uintptr { return uintptr(unsafe.Pointer(&b[0])) } + +func ReleaseBuffers(buffers []*Buffer) { + for _, b := range buffers { + if b != nil { + b.Release() + } + } +} diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go new file mode 100644 index 00000000000..67fa8a266b3 --- /dev/null +++ b/go/arrow/type_traits.go @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import ( + "reflect" + "unsafe" + + "github.com/apache/arrow/go/v15/arrow/decimal128" + "github.com/apache/arrow/go/v15/arrow/decimal256" + "github.com/apache/arrow/go/v15/arrow/float16" + "golang.org/x/exp/constraints" +) + +// IntType is a type constraint for raw values represented as signed +// integer types by We aren't just using constraints.Signed +// because we don't want to include the raw `int` type here whose size +// changes based on the architecture (int32 on 32-bit architectures and +// int64 on 64-bit architectures). +// +// This will also cover types like MonthInterval or the time types +// as their underlying types are int32 and int64 which will get covered +// by using the ~ +type IntType interface { + ~int8 | ~int16 | ~int32 | ~int64 +} + +// UintType is a type constraint for raw values represented as unsigned +// integer types by We aren't just using constraints.Unsigned +// because we don't want to include the raw `uint` type here whose size +// changes based on the architecture (uint32 on 32-bit architectures and +// uint64 on 64-bit architectures). We also don't want to include uintptr +type UintType interface { + ~uint8 | ~uint16 | ~uint32 | ~uint64 +} + +// FloatType is a type constraint for raw values for representing +// floating point values in This consists of constraints.Float and +// float16.Num +type FloatType interface { + float16.Num | constraints.Float +} + +// NumericType is a type constraint for just signed/unsigned integers +// and float32/float64. +type NumericType interface { + IntType | UintType | constraints.Float +} + +// FixedWidthType is a type constraint for raw values in Arrow that +// can be represented as FixedWidth byte slices. Specifically this is for +// using Go generics to easily re-type a byte slice to a properly-typed +// slice. Booleans are excluded here since they are represented by Arrow +// as a bitmap and thus the buffer can't be just reinterpreted as a []bool +type FixedWidthType interface { + IntType | UintType | + FloatType | decimal128.Num | decimal256.Num | + DayTimeInterval | MonthDayNanoInterval +} + +type TemporalType interface { + Date32 | Date64 | Time32 | Time64 | + Timestamp | Duration | DayTimeInterval | + MonthInterval | MonthDayNanoInterval +} + +func reinterpretSlice[Out, T any](b []T) []Out { + if cap(b) == 0 { + return nil + } + out := (*Out)(unsafe.Pointer(&b[:1][0])) + + lenBytes := len(b) * int(unsafe.Sizeof(b[0])) + capBytes := cap(b) * int(unsafe.Sizeof(b[0])) + + lenOut := lenBytes / int(unsafe.Sizeof(*out)) + capOut := capBytes / int(unsafe.Sizeof(*out)) + + return unsafe.Slice(out, capOut)[:lenOut] +} + +// GetValues reinterprets the data.Buffers()[i] to a slice of T with len=data.Len(). +// +// If the buffer is nil, nil will be returned. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). +func GetValues[T FixedWidthType](data ArrayData, i int) []T { + if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { + return nil + } + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()] +} + +// GetOffsets reinterprets the data.Buffers()[i] to a slice of T with len=data.Len()+1. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). +func GetOffsets[T int32 | int64](data ArrayData, i int) []T { + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()+1] +} + +// GetBytes reinterprets a slice of T to a slice of bytes. +func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { + return reinterpretSlice[byte](in) +} + +// GetData reinterprets a slice of bytes to a slice of T. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). +func GetData[T FixedWidthType | ViewHeader](in []byte) []T { + return reinterpretSlice[T](in) +} + +var typMap = map[reflect.Type]DataType{ + reflect.TypeOf(false): FixedWidthTypes.Boolean, + reflect.TypeOf(int8(0)): PrimitiveTypes.Int8, + reflect.TypeOf(int16(0)): PrimitiveTypes.Int16, + reflect.TypeOf(int32(0)): PrimitiveTypes.Int32, + reflect.TypeOf(int64(0)): PrimitiveTypes.Int64, + reflect.TypeOf(uint8(0)): PrimitiveTypes.Uint8, + reflect.TypeOf(uint16(0)): PrimitiveTypes.Uint16, + reflect.TypeOf(uint32(0)): PrimitiveTypes.Uint32, + reflect.TypeOf(uint64(0)): PrimitiveTypes.Uint64, + reflect.TypeOf(float32(0)): PrimitiveTypes.Float32, + reflect.TypeOf(float64(0)): PrimitiveTypes.Float64, + reflect.TypeOf(string("")): BinaryTypes.String, + reflect.TypeOf(Date32(0)): FixedWidthTypes.Date32, + reflect.TypeOf(Date64(0)): FixedWidthTypes.Date64, + reflect.TypeOf(true): FixedWidthTypes.Boolean, + reflect.TypeOf(float16.Num{}): FixedWidthTypes.Float16, + reflect.TypeOf([]byte{}): BinaryTypes.Binary, +} + +// GetDataType returns the appropriate DataType for the given type T +// only for non-parametric types. This uses a map and reflection internally +// so don't call this in a tight loop, instead call this once and then use +// a closure with the result. +func GetDataType[T NumericType | bool | string | []byte | float16.Num]() DataType { + var z T + return typMap[reflect.TypeOf(z)] +} + +// GetType returns the appropriate Type type T, only for non-parametric +// types. This uses a map and reflection internally so don't call this in +// a tight loop, instead call it once and then use a closure with the result. +func GetType[T NumericType | bool | string]() Type { + var z T + return typMap[reflect.TypeOf(z)].ID() +} diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index f573ad3c65a..d600ba29c11 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/decimal128" @@ -47,16 +46,12 @@ func (decimal128Traits) PutValue(b []byte, v decimal128.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*decimal128.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal128SizeBytes)[:len(b)/Decimal128SizeBytes] + return GetData[decimal128.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (decimal128Traits) CastToBytes(b []decimal128.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal128SizeBytes)[:len(b)*Decimal128SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index adf3cc3e0bc..fded46a0a52 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/decimal256" @@ -44,15 +43,11 @@ func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { // CastFromBytes reinterprets the slice b to a slice of decimal256 func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*decimal256.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal256SizeBytes)[:len(b)/Decimal256SizeBytes] + return GetData[decimal256.Num](b) } func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal256SizeBytes)[:len(b)*Decimal256SizeBytes] + return GetBytes(b) } func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index e59efd4c248..5369ad352f8 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -46,16 +45,12 @@ func (float16Traits) PutValue(b []byte, v float16.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (float16Traits) CastFromBytes(b []byte) []float16.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float16.Num)(unsafe.Pointer(h.Data)), cap(b)/Float16SizeBytes)[:len(b)/Float16SizeBytes] + return GetData[float16.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float16Traits) CastToBytes(b []float16.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float16SizeBytes)[:len(b)*Float16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 5fbd7a52489..ca530a72323 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -57,16 +56,12 @@ func (monthTraits) PutValue(b []byte, v MonthInterval) { // // NOTE: len(b) must be a multiple of MonthIntervalSizeBytes. func (monthTraits) CastFromBytes(b []byte) []MonthInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*MonthInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthIntervalSizeBytes)[:len(b)/MonthIntervalSizeBytes] + return GetData[MonthInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthTraits) CastToBytes(b []MonthInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthIntervalSizeBytes)[:len(b)*MonthIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -94,16 +89,12 @@ func (daytimeTraits) PutValue(b []byte, v DayTimeInterval) { // // NOTE: len(b) must be a multiple of DayTimeIntervalSizeBytes. func (daytimeTraits) CastFromBytes(b []byte) []DayTimeInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*DayTimeInterval)(unsafe.Pointer(h.Data)), cap(b)/DayTimeIntervalSizeBytes)[:len(b)/DayTimeIntervalSizeBytes] + return GetData[DayTimeInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (daytimeTraits) CastToBytes(b []DayTimeInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DayTimeIntervalSizeBytes)[:len(b)*DayTimeIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -132,16 +123,12 @@ func (monthDayNanoTraits) PutValue(b []byte, v MonthDayNanoInterval) { // // NOTE: len(b) must be a multiple of MonthDayNanoIntervalSizeBytes. func (monthDayNanoTraits) CastFromBytes(b []byte) []MonthDayNanoInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*MonthDayNanoInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthDayNanoIntervalSizeBytes)[:len(b)/MonthDayNanoIntervalSizeBytes] + return GetData[MonthDayNanoInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthDayNanoTraits) CastToBytes(b []MonthDayNanoInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthDayNanoIntervalSizeBytes)[:len(b)*MonthDayNanoIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index 57606c0fce6..06412466032 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -20,7 +20,6 @@ package arrow import ( "math" - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -65,16 +64,12 @@ func (int64Traits) PutValue(b []byte, v int64) { // // NOTE: len(b) must be a multiple of Int64SizeBytes. func (int64Traits) CastFromBytes(b []byte) []int64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int64)(unsafe.Pointer(h.Data)), cap(b)/Int64SizeBytes)[:len(b)/Int64SizeBytes] + return GetData[int64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int64Traits) CastToBytes(b []int64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int64SizeBytes)[:len(b)*Int64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -101,16 +96,12 @@ func (uint64Traits) PutValue(b []byte, v uint64) { // // NOTE: len(b) must be a multiple of Uint64SizeBytes. func (uint64Traits) CastFromBytes(b []byte) []uint64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/Uint64SizeBytes)[:len(b)/Uint64SizeBytes] + return GetData[uint64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint64Traits) CastToBytes(b []uint64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint64SizeBytes)[:len(b)*Uint64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -137,16 +128,12 @@ func (float64Traits) PutValue(b []byte, v float64) { // // NOTE: len(b) must be a multiple of Float64SizeBytes. func (float64Traits) CastFromBytes(b []byte) []float64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float64)(unsafe.Pointer(h.Data)), cap(b)/Float64SizeBytes)[:len(b)/Float64SizeBytes] + return GetData[float64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float64Traits) CastToBytes(b []float64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float64SizeBytes)[:len(b)*Float64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -173,16 +160,12 @@ func (int32Traits) PutValue(b []byte, v int32) { // // NOTE: len(b) must be a multiple of Int32SizeBytes. func (int32Traits) CastFromBytes(b []byte) []int32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int32)(unsafe.Pointer(h.Data)), cap(b)/Int32SizeBytes)[:len(b)/Int32SizeBytes] + return GetData[int32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int32Traits) CastToBytes(b []int32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int32SizeBytes)[:len(b)*Int32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -209,16 +192,12 @@ func (uint32Traits) PutValue(b []byte, v uint32) { // // NOTE: len(b) must be a multiple of Uint32SizeBytes. func (uint32Traits) CastFromBytes(b []byte) []uint32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint32)(unsafe.Pointer(h.Data)), cap(b)/Uint32SizeBytes)[:len(b)/Uint32SizeBytes] + return GetData[uint32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint32Traits) CastToBytes(b []uint32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint32SizeBytes)[:len(b)*Uint32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -245,16 +224,12 @@ func (float32Traits) PutValue(b []byte, v float32) { // // NOTE: len(b) must be a multiple of Float32SizeBytes. func (float32Traits) CastFromBytes(b []byte) []float32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float32)(unsafe.Pointer(h.Data)), cap(b)/Float32SizeBytes)[:len(b)/Float32SizeBytes] + return GetData[float32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float32Traits) CastToBytes(b []float32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float32SizeBytes)[:len(b)*Float32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -281,16 +256,12 @@ func (int16Traits) PutValue(b []byte, v int16) { // // NOTE: len(b) must be a multiple of Int16SizeBytes. func (int16Traits) CastFromBytes(b []byte) []int16 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int16)(unsafe.Pointer(h.Data)), cap(b)/Int16SizeBytes)[:len(b)/Int16SizeBytes] + return GetData[int16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int16Traits) CastToBytes(b []int16) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int16SizeBytes)[:len(b)*Int16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -317,16 +288,12 @@ func (uint16Traits) PutValue(b []byte, v uint16) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (uint16Traits) CastFromBytes(b []byte) []uint16 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint16)(unsafe.Pointer(h.Data)), cap(b)/Uint16SizeBytes)[:len(b)/Uint16SizeBytes] + return GetData[uint16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint16Traits) CastToBytes(b []uint16) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint16SizeBytes)[:len(b)*Uint16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -353,16 +320,12 @@ func (int8Traits) PutValue(b []byte, v int8) { // // NOTE: len(b) must be a multiple of Int8SizeBytes. func (int8Traits) CastFromBytes(b []byte) []int8 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int8)(unsafe.Pointer(h.Data)), cap(b)/Int8SizeBytes)[:len(b)/Int8SizeBytes] + return GetData[int8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int8Traits) CastToBytes(b []int8) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int8SizeBytes)[:len(b)*Int8SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -389,16 +352,12 @@ func (uint8Traits) PutValue(b []byte, v uint8) { // // NOTE: len(b) must be a multiple of Uint8SizeBytes. func (uint8Traits) CastFromBytes(b []byte) []uint8 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint8)(unsafe.Pointer(h.Data)), cap(b)/Uint8SizeBytes)[:len(b)/Uint8SizeBytes] + return GetData[uint8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint8Traits) CastToBytes(b []uint8) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint8SizeBytes)[:len(b)*Uint8SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -425,16 +384,12 @@ func (time32Traits) PutValue(b []byte, v Time32) { // // NOTE: len(b) must be a multiple of Time32SizeBytes. func (time32Traits) CastFromBytes(b []byte) []Time32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Time32)(unsafe.Pointer(h.Data)), cap(b)/Time32SizeBytes)[:len(b)/Time32SizeBytes] + return GetData[Time32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (time32Traits) CastToBytes(b []Time32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time32SizeBytes)[:len(b)*Time32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -461,16 +416,12 @@ func (time64Traits) PutValue(b []byte, v Time64) { // // NOTE: len(b) must be a multiple of Time64SizeBytes. func (time64Traits) CastFromBytes(b []byte) []Time64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Time64)(unsafe.Pointer(h.Data)), cap(b)/Time64SizeBytes)[:len(b)/Time64SizeBytes] + return GetData[Time64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (time64Traits) CastToBytes(b []Time64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time64SizeBytes)[:len(b)*Time64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -497,16 +448,12 @@ func (date32Traits) PutValue(b []byte, v Date32) { // // NOTE: len(b) must be a multiple of Date32SizeBytes. func (date32Traits) CastFromBytes(b []byte) []Date32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Date32)(unsafe.Pointer(h.Data)), cap(b)/Date32SizeBytes)[:len(b)/Date32SizeBytes] + return GetData[Date32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (date32Traits) CastToBytes(b []Date32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date32SizeBytes)[:len(b)*Date32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -533,16 +480,12 @@ func (date64Traits) PutValue(b []byte, v Date64) { // // NOTE: len(b) must be a multiple of Date64SizeBytes. func (date64Traits) CastFromBytes(b []byte) []Date64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Date64)(unsafe.Pointer(h.Data)), cap(b)/Date64SizeBytes)[:len(b)/Date64SizeBytes] + return GetData[Date64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (date64Traits) CastToBytes(b []Date64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date64SizeBytes)[:len(b)*Date64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -569,16 +512,12 @@ func (durationTraits) PutValue(b []byte, v Duration) { // // NOTE: len(b) must be a multiple of DurationSizeBytes. func (durationTraits) CastFromBytes(b []byte) []Duration { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Duration)(unsafe.Pointer(h.Data)), cap(b)/DurationSizeBytes)[:len(b)/DurationSizeBytes] + return GetData[Duration](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (durationTraits) CastToBytes(b []Duration) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DurationSizeBytes)[:len(b)*DurationSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index c491047b514..e98f59528c6 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -18,7 +18,6 @@ package arrow import ( "math" - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -66,16 +65,12 @@ func ({{.name}}Traits) PutValue(b []byte, v {{.Type}}) { // // NOTE: len(b) must be a multiple of {{.Name}}SizeBytes. func ({{.name}}Traits) CastFromBytes(b []byte) []{{.Type}} { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*{{.Type}})(unsafe.Pointer(h.Data)), cap(b)/{{.Name}}SizeBytes)[:len(b)/{{.Name}}SizeBytes] + return GetData[{{.Type}}](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func ({{.name}}Traits) CastToBytes(b []{{.Type}}) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*{{.Name}}SizeBytes)[:len(b)*{{.Name}}SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index c1a9aba3db3..8e9970a719f 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -43,16 +42,12 @@ func (timestampTraits) PutValue(b []byte, v Timestamp) { // // NOTE: len(b) must be a multiple of TimestampSizeBytes. func (timestampTraits) CastFromBytes(b []byte) []Timestamp { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Timestamp)(unsafe.Pointer(h.Data)), cap(b)/TimestampSizeBytes)[:len(b)/TimestampSizeBytes] + return GetData[Timestamp](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (timestampTraits) CastToBytes(b []Timestamp) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*TimestampSizeBytes)[:len(b)*TimestampSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go index c3846db2946..be3f15fed69 100644 --- a/go/arrow/type_traits_view.go +++ b/go/arrow/type_traits_view.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -39,15 +38,11 @@ func (viewHeaderTraits) PutValue(b []byte, v ViewHeader) { } func (viewHeaderTraits) CastFromBytes(b []byte) (res []ViewHeader) { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*ViewHeader)(unsafe.Pointer(h.Data)), cap(b)/ViewHeaderSizeBytes)[:len(b)/ViewHeaderSizeBytes] + return GetData[ViewHeader](b) } func (viewHeaderTraits) CastToBytes(b []ViewHeader) (res []byte) { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*ViewHeaderSizeBytes)[:len(b)*ViewHeaderSizeBytes] + return GetBytes(b) } func (viewHeaderTraits) Copy(dst, src []ViewHeader) { copy(dst, src) } diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go index 6764ca79126..374b8d4aab3 100644 --- a/go/internal/bitutils/bit_set_run_reader.go +++ b/go/internal/bitutils/bit_set_run_reader.go @@ -113,7 +113,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { bitOffset := int8(startOffset % 8) if length > 0 && bitOffset != 0 { - br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset))) + br.curNumBits = int32(utils.Min(int(length), int(8-bitOffset))) br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits)) } return @@ -124,7 +124,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { endBitOffset := int8((startOffset + length) % 8) if length > 0 && endBitOffset != 0 { br.pos++ - br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset))) + br.curNumBits = int32(utils.Min(int(length), int(endBitOffset))) br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits)) } } @@ -219,7 +219,7 @@ func (br *baseSetBitRunReader) skipNextZeros() { if br.remaining > 0 { br.curWord = br.loadPartial(0, br.remaining) br.curNumBits = int32(br.remaining) - nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) + nzeros := int32(utils.Min(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) br.curWord = br.consumeBits(br.curWord, nzeros) br.curNumBits -= nzeros br.remaining -= int64(nzeros) diff --git a/go/internal/utils/math.go b/go/internal/utils/math.go index 62cf96ce431..c8311750e3a 100644 --- a/go/internal/utils/math.go +++ b/go/internal/utils/math.go @@ -16,32 +16,16 @@ package utils -// Min is a convenience Min function for int64 -func Min(a, b int64) int64 { - if a < b { - return a - } - return b -} +import "golang.org/x/exp/constraints" -// MinInt is a convenience Min function for int -func MinInt(a, b int) int { +func Min[T constraints.Ordered](a, b T) T { if a < b { return a } return b } -// Max is a convenience Max function for int64 -func Max(a, b int64) int64 { - if a > b { - return a - } - return b -} - -// MaxInt is a convenience Max function for int -func MaxInt(a, b int) int { +func Max[T constraints.Ordered](a, b T) T { if a > b { return a } diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index 766638d88f2..342fb3b198a 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -517,7 +517,7 @@ func (c *columnChunkReader) readBatch(batchSize int64, defLvls, repLvls []int16, // if this is a required field, ndefs will be 0 since there is no definition // levels stored with it and `read` will be the number of values, otherwise // we use ndefs since it will be equal to or greater than read. - totalVals := int64(utils.MaxInt(ndefs, read)) + totalVals := int64(utils.Max(ndefs, read)) c.consumeBufferedValues(totalVals) totalLvls += totalVals diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go index 21ea52e2b7b..a6725bc02fe 100755 --- a/go/parquet/file/column_reader_test.go +++ b/go/parquet/file/column_reader_test.go @@ -244,7 +244,7 @@ func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { totalRead += batch batchActual += int(read) - batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096))) + batchSize = int32(utils.Min(1<<24, utils.Max(int(batchSize*2), 4096))) if batch <= 0 { break } diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go index f6707fce86d..251468658ae 100755 --- a/go/parquet/file/level_conversion.go +++ b/go/parquet/file/level_conversion.go @@ -144,7 +144,7 @@ func defLevelsBatchToBitmap(defLevels []int16, remainingUpperBound int64, info L var batch []int16 for len(defLevels) > 0 { - batchSize := shared_utils.MinInt(maxbatch, len(defLevels)) + batchSize := shared_utils.Min(maxbatch, len(defLevels)) batch, defLevels = defLevels[:batchSize], defLevels[batchSize:] definedBitmap := bmi.GreaterThanBitmap(batch, info.DefLevel-1) diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index 3782dc85ea8..353f4438559 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -55,7 +55,7 @@ func (dec *PlainBooleanDecoder) SetData(nvals int, data []byte) error { // // Returns the number of values decoded func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { - max := shared_utils.MinInt(len(out), dec.nvals) + max := shared_utils.Min(len(out), dec.nvals) // attempts to read all remaining bool values from the current data byte unalignedExtract := func(i int) int { @@ -148,7 +148,7 @@ func (dec *RleBooleanDecoder) SetData(nvals int, data []byte) error { } func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) { - max := shared_utils.MinInt(len(out), dec.nvals) + max := shared_utils.Min(len(out), dec.nvals) var ( buf [1024]uint64 @@ -156,7 +156,7 @@ func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) { ) for n > 0 { - batch := shared_utils.MinInt(len(buf), n) + batch := shared_utils.Min(len(buf), n) decoded := dec.rleDec.GetBatch(buf[:batch]) if decoded != batch { return max - n, io.ErrUnexpectedEOF diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go index 82ce9f84265..0c1c858fb48 100644 --- a/go/parquet/internal/encoding/byte_array_decoder.go +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -49,7 +49,7 @@ func (PlainByteArrayDecoder) Type() parquet.Type { // // Returns the number of values that were decoded. func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), pbad.nvals) + max := utils.Min(len(out), pbad.nvals) for i := 0; i < max; i++ { // there should always be at least four bytes which is the length of the diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go index cee624730e9..acb57fbce78 100644 --- a/go/parquet/internal/encoding/decoder.go +++ b/go/parquet/internal/encoding/decoder.go @@ -155,7 +155,7 @@ func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []b } func (d *dictDecoder) DecodeIndices(numValues int, bldr array.Builder) (int, error) { - n := shared_utils.MinInt(numValues, d.nvals) + n := shared_utils.Min(numValues, d.nvals) if cap(d.idxScratchSpace) < n { d.idxScratchSpace = make([]uint64, n, bitutil.NextPowerOf2(n)) } else { diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index a00f3457cac..560b77f4c66 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -158,7 +158,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.MinInt(len(out), int(d.totalValues)) + max := shared_utils.Min(len(out), int(d.totalValues)) if max == 0 { return 0, nil } @@ -249,7 +249,7 @@ func (d *DeltaBitPackInt64Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) { - max := shared_utils.MinInt(len(out), d.nvals) + max := shared_utils.Min(len(out), d.nvals) if max == 0 { return 0, nil } diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index 57b0c8a70e5..5e5002e34a6 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -172,7 +172,7 @@ func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error { // Decode decodes byte arrays into the slice provided and returns the number of values actually decoded func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), d.nvals) + max := utils.Min(len(out), d.nvals) if max == 0 { return 0, nil } diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index d5a99c187d1..183eb453ca0 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -126,7 +126,7 @@ func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { // Decode populates the passed in slice with data decoded until it hits the length of out // or runs out of values in the column to decode, then returns the number of values actually decoded. func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), d.nvals) + max := utils.Min(len(out), d.nvals) for i := 0; i < max; i++ { out[i] = d.data[:d.lengths[i]:d.lengths[i]] d.data = d.data[d.lengths[i]:] diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index 1e589fc2e7b..2054e1bb85f 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -38,7 +38,7 @@ func (PlainFixedLenByteArrayDecoder) Type() parquet.Type { // values to decode or the length of out has been filled. Then returns the total number of values // that were decoded. func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { - max := utils.MinInt(len(out), pflba.nvals) + max := utils.Min(len(out), pflba.nvals) numBytesNeeded := max * pflba.typeLen if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 { return 0, xerrors.New("parquet: eof exception") diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go index 09403d74cb0..a41f754f62a 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -172,7 +172,7 @@ func (PlainInt32Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Int32SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -277,7 +277,7 @@ func (PlainInt64Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Int64SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -382,7 +382,7 @@ func (PlainInt96Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(parquet.Int96SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -487,7 +487,7 @@ func (PlainFloat32Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Float32SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -592,7 +592,7 @@ func (PlainFloat64Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Float64SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl index 2838c63a418..74f63e78bcc 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -133,7 +133,7 @@ func (Plain{{.Name}}Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 4bc18e8c63c..04db72178f3 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -195,7 +195,7 @@ func (DictInt32Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt32Decoder) Decode(out []int32) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -209,7 +209,7 @@ func (d *DictInt32Decoder) Decode(out []int32) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -432,7 +432,7 @@ func (DictInt64Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt64Decoder) Decode(out []int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -446,7 +446,7 @@ func (d *DictInt64Decoder) Decode(out []int64) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -647,7 +647,7 @@ func (DictInt96Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -661,7 +661,7 @@ func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -872,7 +872,7 @@ func (DictFloat32Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFloat32Decoder) Decode(out []float32) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -886,7 +886,7 @@ func (d *DictFloat32Decoder) Decode(out []float32) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1097,7 +1097,7 @@ func (DictFloat64Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFloat64Decoder) Decode(out []float64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1111,7 +1111,7 @@ func (d *DictFloat64Decoder) Decode(out []float64) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1365,7 +1365,7 @@ func (DictByteArrayDecoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1379,7 +1379,7 @@ func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1544,7 +1544,7 @@ func (DictFixedLenByteArrayDecoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1558,7 +1558,7 @@ func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) ( // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index d72f3151204..ceb755caa0b 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -271,7 +271,7 @@ func (Dict{{.Name}}Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -285,7 +285,7 @@ func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 4ab3ab1a1c9..f8d860c88a0 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { b.buf = bufferPool.Get().(*memory.Buffer) } - newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256) + newCap := utils.Max(b.buf.Cap()+b.offset, 256) for newCap < b.pos+nbytes { newCap = bitutil.NextPowerOf2(newCap) } @@ -375,7 +375,7 @@ func (b *BufferWriter) Reserve(nbytes int) { if b.buffer == nil { b.buffer = memory.NewResizableBuffer(b.mem) } - newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256) + newCap := utils.Max(b.buffer.Cap()+b.offset, 256) for newCap < b.pos+nbytes+b.offset { newCap = bitutil.NextPowerOf2(newCap) } diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go index 48ac3316400..525921d9631 100644 --- a/go/parquet/internal/testutils/pagebuilder.go +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -75,7 +75,7 @@ func (d *DataPageBuilder) appendLevels(lvls []int16, maxLvl int16, e parquet.Enc func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) { d.defLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) - d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.nvals = utils.Max(len(lvls), d.nvals) d.defLvlEncoding = parquet.Encodings.RLE d.hasDefLvls = true } @@ -83,7 +83,7 @@ func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) { func (d *DataPageBuilder) AppendRepLevels(lvls []int16, maxLvl int16) { d.repLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) - d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.nvals = utils.Max(len(lvls), d.nvals) d.repLvlEncoding = parquet.Encodings.RLE d.hasRepLvls = true } @@ -122,7 +122,7 @@ func (d *DataPageBuilder) AppendValues(desc *schema.Column, values interface{}, panic(err) } - d.nvals = utils.MaxInt(sz, d.nvals) + d.nvals = utils.Max(sz, d.nvals) d.encoding = e d.hasValues = true } @@ -191,7 +191,7 @@ func MakeDataPage(dataPageVersion parquet.DataPageVersion, d *schema.Column, val num = builder.nvals } else { stream.Write(indexBuffer.Bytes()) - num = utils.MaxInt(builder.nvals, nvals) + num = utils.Max(builder.nvals, nvals) } buf := stream.Finish() diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go index 0bf501e0488..d327be5f525 100644 --- a/go/parquet/internal/utils/bit_reader.go +++ b/go/parquet/internal/utils/bit_reader.go @@ -266,7 +266,7 @@ func (b *BitReader) GetBatchBools(out []bool) (int, error) { for i < length { // grab byte-aligned bits in a loop since it's more efficient than going // bit by bit when you can grab 8 bools at a time. - unpackSize := utils.MinInt(blen, length-i) / 8 * 8 + unpackSize := utils.Min(blen, length-i) / 8 * 8 n, err := b.reader.Read(buf[:bitutil.BytesForBits(int64(unpackSize))]) if err != nil { return i, err @@ -314,7 +314,7 @@ func (b *BitReader) GetBatch(bits uint, out []uint64) (int, error) { b.reader.Seek(b.byteoffset, io.SeekStart) for i < length { // unpack groups of 32 bytes at a time into a buffer since it's more efficient - unpackSize := utils.MinInt(buflen, length-i) + unpackSize := utils.Min(buflen, length-i) numUnpacked := unpack32(b.reader, b.unpackBuf[:unpackSize], int(bits)) if numUnpacked == 0 { break diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go index f367e7dc13c..dffe55402b9 100644 --- a/go/parquet/internal/utils/rle.go +++ b/go/parquet/internal/utils/rle.go @@ -51,7 +51,7 @@ func MaxRLEBufferSize(width, numValues int) int { minRepeatedRunSize := 1 + int(bitutil.BytesForBits(int64(width))) repeatedMaxSize := int(bitutil.BytesForBits(int64(numValues))) * minRepeatedRunSize - return utils.MaxInt(literalMaxSize, repeatedMaxSize) + return utils.Max(literalMaxSize, repeatedMaxSize) } // Utility classes to do run length encoding (RLE) for fixed bit width values. If runs @@ -370,7 +370,7 @@ func (r *RleDecoder) consumeRepeatCounts(read, batchSize, remain int, run bituti } func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -388,7 +388,7 @@ func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64 ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go index 886d24564db..37dc49a6958 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go @@ -130,7 +130,7 @@ func (r *RleDecoder) getspacedInt32(dc DictionaryConverter, vals []int32, batchS } func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -148,7 +148,7 @@ func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32, ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -185,7 +185,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32) if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -193,7 +193,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32) read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -323,7 +323,7 @@ func (r *RleDecoder) getspacedInt64(dc DictionaryConverter, vals []int64, batchS } func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -341,7 +341,7 @@ func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64, ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -378,7 +378,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64) if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -386,7 +386,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64) read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -516,7 +516,7 @@ func (r *RleDecoder) getspacedInt96(dc DictionaryConverter, vals []parquet.Int96 } func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet.Int96, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -534,7 +534,7 @@ func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -571,7 +571,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -579,7 +579,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -709,7 +709,7 @@ func (r *RleDecoder) getspacedFloat32(dc DictionaryConverter, vals []float32, ba } func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -727,7 +727,7 @@ func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -764,7 +764,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -772,7 +772,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -902,7 +902,7 @@ func (r *RleDecoder) getspacedFloat64(dc DictionaryConverter, vals []float64, ba } func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -920,7 +920,7 @@ func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -957,7 +957,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -965,7 +965,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -1095,7 +1095,7 @@ func (r *RleDecoder) getspacedByteArray(dc DictionaryConverter, vals []parquet.B } func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []parquet.ByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -1113,7 +1113,7 @@ func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []par ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -1150,7 +1150,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -1158,7 +1158,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -1288,7 +1288,7 @@ func (r *RleDecoder) getspacedFixedLenByteArray(dc DictionaryConverter, vals []p } func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, vals []parquet.FixedLenByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -1306,7 +1306,7 @@ func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, va ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -1343,7 +1343,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -1351,7 +1351,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl index abcb419055a..88c7dd979eb 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl @@ -129,7 +129,7 @@ func (r *RleDecoder) getspaced{{.Name}}(dc DictionaryConverter, vals []{{.name}} } func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.name}}, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -147,7 +147,7 @@ func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{. ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -184,7 +184,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{ if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -192,7 +192,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{ read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go index 3c38aba5c32..a403b2196a8 100644 --- a/go/parquet/pqarrow/column_readers.go +++ b/go/parquet/pqarrow/column_readers.go @@ -790,7 +790,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) { isNeg := int8(buf[0]) < 0 // 1. extract high bits - highBitsOffset := utils.MaxInt(0, len(buf)-8) + highBitsOffset := utils.Max(0, len(buf)-8) var ( highBits uint64 lowBits uint64 @@ -811,7 +811,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) { } // 2. extract lower bits - lowBitsOffset := utils.MinInt(len(buf), 8) + lowBitsOffset := utils.Min(len(buf), 8) lowBits = uint64FromBigEndianShifted(buf[highBitsOffset:]) if lowBitsOffset == 8 { @@ -850,7 +850,7 @@ func bigEndianToDecimal256(buf []byte) (decimal256.Num, error) { } for wordIdx := 0; wordIdx < 4; wordIdx++ { - wordLen := utils.MinInt(len(buf), arrow.Uint64SizeBytes) + wordLen := utils.Min(len(buf), arrow.Uint64SizeBytes) word := buf[len(buf)-wordLen:] if wordLen == 8 {