From 56f71348587db547f1e7dd51696925bd5ba48c1b Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 30 Nov 2023 20:36:15 -0500 Subject: [PATCH 01/28] GH-39013: [Go][Integration] Support cABI import/export of StringView in Go --- dev/archery/archery/integration/datagen.py | 1 - go/arrow/cdata/cdata.go | 46 +++++++++++++++++++ go/arrow/cdata/cdata_exports.go | 52 ++++++++++++++++++---- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 29b203ae130..42b14ae5d97 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1847,7 +1847,6 @@ def _temp_path(): generate_binary_view_case() .skip_tester('C#') - .skip_tester('Go') .skip_tester('Java') .skip_tester('JS') .skip_tester('Rust'), diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index f9693851d74..53e93ce3a79 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -82,6 +82,8 @@ var formatToSimpleType = map[string]arrow.DataType{ "Z": arrow.BinaryTypes.LargeBinary, "u": arrow.BinaryTypes.String, "U": arrow.BinaryTypes.LargeString, + "vz": arrow.BinaryTypes.BinaryView, + "vu": arrow.BinaryTypes.StringView, "tdD": arrow.FixedWidthTypes.Date32, "tdm": arrow.FixedWidthTypes.Date64, "tts": arrow.FixedWidthTypes.Time32s, @@ -485,6 +487,10 @@ func (imp *cimporter) doImport() error { return imp.importStringLike(int64(arrow.Int64SizeBytes)) case *arrow.LargeBinaryType: return imp.importStringLike(int64(arrow.Int64SizeBytes)) + case *arrow.StringViewType: + return imp.importBinaryViewLike() + case *arrow.BinaryViewType: + return imp.importBinaryViewLike() case *arrow.ListType: return imp.importListLike() case *arrow.LargeListType: @@ -654,6 +660,46 @@ func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) { return } +func (imp *cimporter) importBinaryViewLike() (err error) { + if err = imp.checkNoChildren(); err != nil { + return + } + + buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) + defer func() { + for _, buf := range buffers { + if buf != nil { + buf.Release() + } + } + }() + + if buffers[0], err = imp.importNullBitmap(0); err != nil { + return + } + + if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil { + return + } + + var dataBufferSizes *memory.Buffer + if dataBufferSizes, err = imp.importFixedSizeBuffer(1, int64(len(buffers))-2); err != nil { + return + } + defer dataBufferSizes.Release() + + for i, size := range arrow.Int64Traits.CastFromBytes(dataBufferSizes.Bytes()) { + if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { + return + } + } + + imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) + + buffers = []*memory.Buffer{} + return +} + func (imp *cimporter) importListLike() (err error) { if err = imp.checkNumChildren(1); err != nil { return err diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index d5fdc0dac15..cc80ddf90ad 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -167,6 +167,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return "u" case *arrow.LargeStringType: return "U" + case *arrow.BinaryViewType: + return "vz" + case *arrow.StringViewType: + return "vu" case *arrow.Date32Type: return "tdD" case *arrow.Date64Type: @@ -328,6 +332,15 @@ func allocateBufferPtrArr(n int) (out []*C.void) { return } +func allocateBufferSizeArr(n int) (out []C.int64_t) { + s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) + s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof(int64(0))))) + s.Len = n + s.Cap = n + + return +} + func (exp *schemaExporter) finish(out *CArrowSchema) { out.dictionary = nil if exp.dict != nil { @@ -369,14 +382,14 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { } nbuffers := len(arr.Data().Buffers()) - buf_offset := 0 + bufs := arr.Data().Buffers() // Some types don't have validity bitmaps, but we keep them shifted // to make processing easier in other contexts. This means that // we have to adjust when exporting. has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) if nbuffers > 0 && !has_validity_bitmap { nbuffers-- - buf_offset++ + bufs = bufs[1:] } out.dictionary = nil @@ -386,26 +399,47 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { out.n_buffers = C.int64_t(nbuffers) out.buffers = nil + needBufferSizes := func() bool { + switch arr.(type) { + case *array.BinaryView: + return true + case *array.StringView: + return true + default: + return false + } + }() + if needBufferSizes { + nbuffers++ + } + if nbuffers > 0 { - bufs := arr.Data().Buffers() - buffers := allocateBufferPtrArr(nbuffers) - for i, buf := range bufs[buf_offset:] { + cBufs := allocateBufferPtrArr(nbuffers) + for i, buf := range bufs { if buf == nil || buf.Len() == 0 { if i > 0 || !has_validity_bitmap { // apache/arrow#33936: export a dummy buffer to be friendly to // implementations that don't import NULL properly - buffers[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) + cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion)) } else { // null pointer permitted for the validity bitmap // (assuming null count is 0) - buffers[i] = nil + cBufs[i] = nil } continue } - buffers[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) + cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) + } + + if needBufferSizes { + sizes := allocateBufferSizeArr(len(bufs[2:])) + for i, buf := range bufs[2:] { + sizes[i] = C.int64_t(buf.Len()) + } + cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) } - out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&buffers[0])) + out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) } arr.Data().Retain() From 5dd6ea551d46c8a9520f76f285358b5e7c37e6f2 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 1 Dec 2023 13:08:24 -0500 Subject: [PATCH 02/28] amend integration JSON field names --- go/arrow/internal/arrjson/arrjson.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go index 84dc6389832..49f711cdacd 100644 --- a/go/arrow/internal/arrjson/arrjson.go +++ b/go/arrow/internal/arrjson/arrjson.go @@ -826,7 +826,8 @@ type Array struct { Offset interface{} `json:"OFFSET,omitempty"` Size interface{} `json:"SIZE,omitempty"` Children []Array `json:"children,omitempty"` - Variadic []string `json:"VARIADIC_BUFFERS,omitempty"` + Variadic []string `json:"VARIADIC_DATA_BUFFERS,omitempty"` + Views []interface{} `json:"VIEWS,omitempty"` } func (a *Array) MarshalJSON() ([]byte, error) { @@ -1090,7 +1091,7 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr case arrow.BinaryViewDataType: valids := validsToBitmap(validsFromJSON(arr.Valids), mem) nulls := arr.Count - bitutil.CountSetBits(valids.Bytes(), 0, arr.Count) - headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Data) + headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Views) extraBufs := variadicBuffersFromJSON(arr.Variadic) defer valids.Release() defer headers.Release() @@ -1513,7 +1514,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Name: field.Name, Count: arr.Len(), Valids: validsToJSON(arr), - Data: stringHeadersToJSON(arr, false), + Views: stringHeadersToJSON(arr, false), Variadic: variadic, } case *array.BinaryView: @@ -1522,7 +1523,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array { Name: field.Name, Count: arr.Len(), Valids: validsToJSON(arr), - Data: stringHeadersToJSON(arr, true), + Views: stringHeadersToJSON(arr, true), Variadic: variadic, } case *array.List: @@ -2406,7 +2407,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface } values[i].SetIndexOffset(int32(bufIdx), int32(bufOffset)) - prefix, err := hex.DecodeString(v["PREFIX"].(string)) + prefix, err := hex.DecodeString(v["PREFIX_HEX"].(string)) if err != nil { panic(err) } @@ -2426,7 +2427,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface func stringHeadersToJSON(arr array.ViewLike, isBinary bool) []interface{} { type StringHeader struct { Size int `json:"SIZE"` - Prefix *string `json:"PREFIX,omitempty"` + Prefix *string `json:"PREFIX_HEX,omitempty"` BufferIdx *int `json:"BUFFER_INDEX,omitempty"` BufferOff *int `json:"OFFSET,omitempty"` Inlined *string `json:"INLINED,omitempty"` From 9766f85b00cde54e67da0ccd5f963f79965d64d1 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 1 Dec 2023 13:55:03 -0500 Subject: [PATCH 03/28] debugging, maybe --- dev/archery/archery/integration/runner.py | 2 ++ go/arrow/cdata/cdata.go | 1 + go/arrow/cdata/cdata_exports.go | 36 ++++++++--------------- go/arrow/internal/utils.go | 14 +++++++++ 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index bab00e6d70d..7fadb7e47cf 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -193,6 +193,8 @@ def _run_test_cases(self, ``case_runner`` ran against ``test_cases`` """ def case_wrapper(test_case): + if serial: + return case_runner(test_case) with printer.cork(): return case_runner(test_case) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 53e93ce3a79..7890c571ad7 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -666,6 +666,7 @@ func (imp *cimporter) importBinaryViewLike() (err error) { } buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) + // XXX couldn't figure out how to extract file_reader.go::releaseBuffers as a utility defer func() { for _, buf := range buffers { if buf != nil { diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index cc80ddf90ad..72b684944d2 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -381,15 +381,19 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { exportField(arrow.Field{Type: arr.DataType()}, outSchema) } - nbuffers := len(arr.Data().Buffers()) - bufs := arr.Data().Buffers() + buffers := arr.Data().Buffers() // Some types don't have validity bitmaps, but we keep them shifted // to make processing easier in other contexts. This means that // we have to adjust when exporting. has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID()) - if nbuffers > 0 && !has_validity_bitmap { - nbuffers-- - bufs = bufs[1:] + if len(buffers) > 0 && !has_validity_bitmap { + buffers = buffers[1:] + } + nbuffers := len(buffers) + + has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID()) + if has_buffer_sizes_buffer { + nbuffers++ } out.dictionary = nil @@ -399,23 +403,9 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { out.n_buffers = C.int64_t(nbuffers) out.buffers = nil - needBufferSizes := func() bool { - switch arr.(type) { - case *array.BinaryView: - return true - case *array.StringView: - return true - default: - return false - } - }() - if needBufferSizes { - nbuffers++ - } - if nbuffers > 0 { cBufs := allocateBufferPtrArr(nbuffers) - for i, buf := range bufs { + for i, buf := range buffers { if buf == nil || buf.Len() == 0 { if i > 0 || !has_validity_bitmap { // apache/arrow#33936: export a dummy buffer to be friendly to @@ -432,9 +422,9 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0])) } - if needBufferSizes { - sizes := allocateBufferSizeArr(len(bufs[2:])) - for i, buf := range bufs[2:] { + if has_buffer_sizes_buffer { + sizes := allocateBufferSizeArr(len(buffers[2:])) + for i, buf := range buffers[2:] { sizes[i] = C.int64_t(buf.Len()) } cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go index 619eebd97dc..934bf628cb4 100644 --- a/go/arrow/internal/utils.go +++ b/go/arrow/internal/utils.go @@ -45,3 +45,17 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool { } return true } + +// HasBufferSizesBuffer returns whether a given type has an extra buffer +// in the C ABI to store the sizes of other buffers. Currently this is only +// StringView and BinaryView. +func HasBufferSizesBuffer(id arrow.Type) bool { + switch id { + case arrow.STRING_VIEW: + return true + case arrow.BINARY_VIEW: + return true + default: + return false + } +} From a2763512f30c5384eeff2e77d2ce623e4eab8753 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 8 Dec 2023 13:38:43 -0500 Subject: [PATCH 04/28] add datagen.py support for list view --- dev/archery/archery/integration/datagen.py | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 42b14ae5d97..6f07ba88e85 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -927,6 +927,85 @@ class LargeListColumn(_BaseListColumn, _LargeOffsetsMixin): pass +class ListViewField(Field): + + def __init__(self, name, value_field, *, nullable=True, + metadata=None): + super().__init__(name, nullable=nullable, + metadata=metadata) + self.value_field = value_field + + @property + def column_class(self): + return ListViewColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'listview') + ]) + + def _get_children(self): + return [self.value_field.get_json()] + + def generate_column(self, size, name=None): + MAX_LIST_SIZE = 4 + + is_valid = self._make_is_valid(size) + offsets = [] + sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size) + offset = 0 + for s in sizes: + offsets.append(offset) + offset += int(s) + + # The offset now is the total number of elements in the child array + values = self.value_field.generate_column(offset) + + if name is None: + name = self.name + return self.column_class(name, size, is_valid, offsets, sizes, values) + + +class LargeListViewField(ListViewField): + + @property + def column_class(self): + return LargeListViewColumn + + def _get_type(self): + return OrderedDict([ + ('name', 'largelistview') + ]) + + +class _BaseListViewColumn(Column): + + def __init__(self, name, count, is_valid, offsets, sizes, values): + super().__init__(name, count) + self.is_valid = is_valid + self.offsets = offsets + self.sizes = sizes + self.values = values + + def _get_buffers(self): + return [ + ('VALIDITY', [int(v) for v in self.is_valid]), + ('OFFSET', self._encode_offsets(self.offsets)), + ('SIZE', self._encode_offsets(self.sizes)), + ] + + def _get_children(self): + return [self.values.get_json()] + + +class ListViewColumn(_BaseListViewColumn, _NarrowOffsetsMixin): + pass + + +class LargeListViewColumn(_BaseListViewColumn, _LargeOffsetsMixin): + pass + + class MapField(Field): def __init__(self, name, key_field, item_field, *, nullable=True, @@ -1663,6 +1742,15 @@ def generate_binary_view_case(): return _generate_file("binary_view", fields, batch_sizes) +def generate_list_view_case(): + fields = [ + ListViewField('lv', get_field('item', 'float32')), + LargeListViewField('llv', get_field('item', 'float32')), + ] + batch_sizes = [0, 7, 256] + return _generate_file("list_view", fields, batch_sizes) + + def generate_nested_large_offsets_case(): fields = [ LargeListField('large_list_nullable', get_field('item', 'int32')), @@ -1851,6 +1939,13 @@ def _temp_path(): .skip_tester('JS') .skip_tester('Rust'), + generate_list_view_case() + .skip_tester('C#') + .skip_tester('Java') + .skip_tester('JS') + .skip_tester('Go') + .skip_tester('Rust'), + generate_extension_case() # TODO: ensure the extension is registered in the C++ entrypoint .skip_format(SKIP_C_SCHEMA, 'C++') From b89bd6931843503c3d11b383833fb1dc15ecd0a3 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 11 Dec 2023 14:28:51 -0500 Subject: [PATCH 05/28] debugging session --- dev/archery/archery/integration/datagen.py | 1 - go/arrow/cdata/cdata.go | 9 ++------- go/arrow/cdata/cdata_exports.go | 1 + 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 6f07ba88e85..488ad6f60c9 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1943,7 +1943,6 @@ def _temp_path(): .skip_tester('C#') .skip_tester('Java') .skip_tester('JS') - .skip_tester('Go') .skip_tester('Rust'), generate_extension_case() diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 7890c571ad7..a5b4494c108 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -683,13 +683,8 @@ func (imp *cimporter) importBinaryViewLike() (err error) { return } - var dataBufferSizes *memory.Buffer - if dataBufferSizes, err = imp.importFixedSizeBuffer(1, int64(len(buffers))-2); err != nil { - return - } - defer dataBufferSizes.Release() - - for i, size := range arrow.Int64Traits.CastFromBytes(dataBufferSizes.Bytes()) { + dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) + for i, size := range dataBufferSizes { if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { return } diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 72b684944d2..a0fd9d552cd 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -39,6 +39,7 @@ import ( "bytes" "encoding/binary" "fmt" + "os" "reflect" "runtime/cgo" "strconv" From 8e80f5afb0bb25ece219940de4f962ec1592d113 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Mon, 11 Dec 2023 15:24:16 -0500 Subject: [PATCH 06/28] fix failing binaryview test --- go/arrow/cdata/cdata.go | 4 +--- go/arrow/cdata/cdata_exports.go | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index a5b4494c108..2eaf858664d 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -683,7 +683,7 @@ func (imp *cimporter) importBinaryViewLike() (err error) { return } - dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) + dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2) for i, size := range dataBufferSizes { if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil { return @@ -691,8 +691,6 @@ func (imp *cimporter) importBinaryViewLike() (err error) { } imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset)) - - buffers = []*memory.Buffer{} return } diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index a0fd9d552cd..9f754fb2778 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -39,7 +39,6 @@ import ( "bytes" "encoding/binary" "fmt" - "os" "reflect" "runtime/cgo" "strconv" @@ -428,7 +427,9 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) { for i, buf := range buffers[2:] { sizes[i] = C.int64_t(buf.Len()) } - cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) + if len(sizes) > 0 { + cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0])) + } } out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0])) } From c6563380d227cdbaea734f799ab8703d4f6bfef8 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 08:56:52 -0500 Subject: [PATCH 07/28] add c ABI support for list view --- go/arrow/cdata/cdata.go | 57 +++++++++++++++++++++++++++++++++ go/arrow/cdata/cdata_exports.go | 4 +++ 2 files changed, 61 insertions(+) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 2eaf858664d..6399fe904c3 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -265,6 +265,12 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { dt = arrow.ListOfField(childFields[0]) case 'L': // large list dt = arrow.LargeListOfField(childFields[0]) + case 'v': // list view/large list view + if f[2] == 'l' { + dt = arrow.ListViewOfField(childFields[0]) + } else if f[2] == 'L' { + dt = arrow.LargeListViewOfField(childFields[0]) + } case 'w': // fixed size list is w:# where # is the list size. listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) if err != nil { @@ -366,6 +372,16 @@ func (imp *cimporter) doImportChildren() error { if err := imp.children[0].importChild(imp, children[0]); err != nil { return err } + case arrow.LIST_VIEW: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.LARGE_LIST_VIEW: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } case arrow.FIXED_SIZE_LIST: // only one child to import imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() if err := imp.children[0].importChild(imp, children[0]); err != nil { @@ -495,6 +511,10 @@ func (imp *cimporter) doImport() error { return imp.importListLike() case *arrow.LargeListType: return imp.importListLike() + case *arrow.ListViewType: + return imp.importListViewLike() + case *arrow.LargeListViewType: + return imp.importListViewLike() case *arrow.MapType: return imp.importListLike() case *arrow.FixedSizeListType: @@ -723,6 +743,43 @@ func (imp *cimporter) importListLike() (err error) { return } +func (imp *cimporter) importListViewLike() (err error) { + offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth) + + if err = imp.checkNumChildren(1); err != nil { + return err + } + + if err = imp.checkNumBuffers(3); err != nil { + return err + } + + var nulls, offsets, sizes *memory.Buffer + if nulls, err = imp.importNullBitmap(0); err != nil { + return + } + if nulls != nil { + defer nulls.Release() + } + + if offsets, err = imp.importOffsetsBuffer(1, offsetSize); err != nil { + return + } + if offsets != nil { + defer offsets.Release() + } + + if sizes, err = imp.importOffsetsBuffer(2, offsetSize); err != nil { + return + } + if sizes != nil { + defer sizes.Release() + } + + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + return +} + func (imp *cimporter) importFixedSizePrimitive() error { if err := imp.checkNoChildren(); err != nil { return err diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go index 9f754fb2778..9c7c238ffb7 100644 --- a/go/arrow/cdata/cdata_exports.go +++ b/go/arrow/cdata/cdata_exports.go @@ -232,6 +232,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string { return "+l" case *arrow.LargeListType: return "+L" + case *arrow.ListViewType: + return "+vl" + case *arrow.LargeListViewType: + return "+vL" case *arrow.FixedSizeListType: return fmt.Sprintf("+w:%d", dt.Len()) case *arrow.StructType: From 0c33184c0263ac474ce0b2b8f33e08a1ccd9f174 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 10:15:36 -0500 Subject: [PATCH 08/28] ensure offsets under null bits are not ignored --- go/arrow/array/list.go | 228 ++++------------------- go/arrow/cdata/cdata.go | 10 +- go/arrow/type_traits.go | 32 ++++ go/arrow/type_traits_decimal128.go | 4 +- go/arrow/type_traits_decimal256.go | 4 +- go/arrow/type_traits_float16.go | 4 +- go/arrow/type_traits_interval.go | 12 +- go/arrow/type_traits_numeric.gen.go | 60 ++---- go/arrow/type_traits_numeric.gen.go.tmpl | 4 +- go/arrow/type_traits_timestamp.go | 4 +- go/arrow/type_traits_view.go | 4 +- 11 files changed, 94 insertions(+), 272 deletions(-) create mode 100644 go/arrow/type_traits.go diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 4b627341167..4624965fd38 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -19,7 +19,6 @@ package array import ( "bytes" "fmt" - "math" "strings" "sync/atomic" @@ -1411,217 +1410,56 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { return b.Unmarshal(dec) } -// Pre-conditions: -// -// input.DataType() is ListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func minListViewOffset32(input arrow.ArrayData) int32 { - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) - } - - // It's very likely that the first non-null non-empty list-view starts at - // offset 0 of the child array. - i := 0 - for i < input.Len() && (isNull(i) || sizes[i] == 0) { - i += 1 - } - if i >= input.Len() { - return 0 - } - minOffset := offsets[i] - if minOffset == 0 { - // early exit: offset 0 found already - return 0 - } - - // Slow path: scan the buffers entirely. - i += 1 - for ; i < input.Len(); i += 1 { - if isNull(i) { - continue - } - offset := offsets[i] - if offset < minOffset && sizes[i] > 0 { - minOffset = offset - } - } - return minOffset -} - -// Find the maximum offset+size in a LIST_VIEW array. +// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // -// input.DataType() is ListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func maxListViewOffset32(input arrow.ArrayData) int { +// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 +// input.Len() > 0 +func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) - } - - i := input.Len() - 1 // safe because input.Len() > 0 - for i != 0 && (isNull(i) || sizes[i] == 0) { - i -= 1 - } - offset := offsets[i] - size := sizes[i] - if i == 0 { - if isNull(i) || sizes[i] == 0 { - return 0 - } else { - return int(offset + size) - } - } + offsets := arrow.CastFromBytesTo[Offset](input.Buffers()[1].Bytes())[inputOffset:] - values := input.Children()[0] - maxEnd := int(offsets[i] + sizes[i]) - if maxEnd == values.Len() { - // Early-exit: maximum possible view-end found already. - return maxEnd - } - - // Slow path: scan the buffers entirely. - for ; i >= 0; i -= 1 { - offset := offsets[i] - size := sizes[i] - if size > 0 && !isNull(i) { - if int(offset+size) > maxEnd { - maxEnd = int(offset + size) - if maxEnd == values.Len() { - return maxEnd - } - } - } - } - return maxEnd -} - -// Pre-conditions: -// -// input.DataType() is LargeListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func minLargeListViewOffset64(input arrow.ArrayData) int64 { - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] - sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():] - - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) - } - - // It's very likely that the first non-null non-empty list-view starts at - // offset 0 of the child array. i := 0 - for i < input.Len() && (isNull(i) || sizes[i] == 0) { - i += 1 - } - if i >= input.Len() { - return 0 - } - minOffset := offsets[i] - if minOffset == 0 { - // early exit: offset 0 found already - return 0 - } + minOffset := offsets[i] // safe because input.Len() > 0 - // Slow path: scan the buffers entirely. - i += 1 - for ; i < input.Len(); i += 1 { - if isNull(i) { - continue + for i += 1; i < input.Len(); i += 1 { + if minOffset == 0 { + // Fast path: the minimum offset is frequently 0 (the start of the child array), + // and frequently a view which has this offset will be near the start of the array. + return 0 } - offset := offsets[i] - if offset < minOffset && sizes[i] > 0 { + if offset := offsets[i]; offset < minOffset { minOffset = offset } } return minOffset } -// Find the maximum offset+size in a LARGE_LIST_VIEW array. +// Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array. // // Pre-conditions: // -// input.DataType() is LargeListViewType -// input.Len() > 0 && input.NullN() != input.Len() -func maxLargeListViewOffset64(input arrow.ArrayData) int64 { +// input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 +// input.Len() > 0 +func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() - var bitmap []byte - if input.Buffers()[0] != nil { - bitmap = input.Buffers()[0].Bytes() - } - offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:] + offsets := arrow.CastFromBytesTo[Offset](input.Buffers()[1].Bytes())[inputOffset:] + sizes := arrow.CastFromBytesTo[Offset](input.Buffers()[2].Bytes())[inputOffset:] - isNull := func(i int) bool { - return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) - } + maxLegalOffset := Offset(input.Children()[0].Len()) - // It's very likely that the first non-null non-empty list-view starts at - // offset zero, so we check that first and potentially early-return a 0. - i := input.Len() - 1 // safe because input.Len() > 0 - for i != 0 && (isNull(i) || sizes[i] == 0) { - i -= 1 - } - offset := offsets[i] - size := sizes[i] - if i == 0 { - if isNull(i) || sizes[i] == 0 { - return 0 - } else { - return offset + size - } - } + i := input.Len() - 1 + maxEnd := offsets[i] + sizes[i] // safe because input.Len() > 0 - if offset > math.MaxInt64-size { - // Early-exit: 64-bit overflow detected. This is not possible on a - // valid list-view, but we return the maximum possible value to - // avoid undefined behavior. - return math.MaxInt64 - } - values := input.Children()[0] - maxEnd := offsets[i] + sizes[i] - if maxEnd == int64(values.Len()) { - // Early-exit: maximum possible view-end found already. - return maxEnd - } - - // Slow path: scan the buffers entirely. - for ; i >= 0; i -= 1 { - offset := offsets[i] - size := sizes[i] - if size > 0 && !isNull(i) { - if offset+size > maxEnd { - if offset > math.MaxInt64-size { - // 64-bit overflow detected. This is not possible on a valid list-view, - // but we saturate maxEnd to the maximum possible value to avoid - // undefined behavior. - return math.MaxInt64 - } - maxEnd = offset + size - if maxEnd == int64(values.Len()) { - return maxEnd - } - } + for i -= 1; i >= 0; i -= 1 { + if maxEnd == maxLegalOffset { + // Fast path: the maximum offset+size is frequently exactly the end of the child array, + // and frequently a view which has this offset+size will be near the end of the array. + return maxEnd + } + if end := offsets[i] + sizes[i]; end > maxEnd { + maxEnd = end } } return maxEnd @@ -1634,11 +1472,11 @@ func rangeOfValuesUsed(input arrow.ArrayData) (int, int) { var minOffset, maxEnd int switch input.DataType().(type) { case *arrow.ListViewType: - minOffset = int(minListViewOffset32(input)) - maxEnd = maxListViewOffset32(input) + minOffset = int(minListViewOffset[int32](input)) + maxEnd = int(maxListViewEnd[int32](input)) case *arrow.LargeListViewType: - minOffset = int(minLargeListViewOffset64(input)) - maxEnd = int(maxLargeListViewOffset64(input)) + minOffset = int(minListViewOffset[int64](input)) + maxEnd = int(maxListViewEnd[int64](input)) case *arrow.ListType: offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():] minOffset = int(offsets[0]) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 6399fe904c3..1ad630e84ad 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -266,11 +266,11 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { case 'L': // large list dt = arrow.LargeListOfField(childFields[0]) case 'v': // list view/large list view - if f[2] == 'l' { - dt = arrow.ListViewOfField(childFields[0]) - } else if f[2] == 'L' { - dt = arrow.LargeListViewOfField(childFields[0]) - } + if f[2] == 'l' { + dt = arrow.ListViewOfField(childFields[0]) + } else if f[2] == 'L' { + dt = arrow.LargeListViewOfField(childFields[0]) + } case 'w': // fixed size list is w:# where # is the list size. listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) if err != nil { diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go new file mode 100644 index 00000000000..9528c2b08fd --- /dev/null +++ b/go/arrow/type_traits.go @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package arrow + +import ( + "reflect" + "unsafe" +) + +// CastFromBytesTo[T] reinterprets the slice b to a slice of type T. +// +// NOTE: len(b) must be a multiple of T's size. +func CastFromBytesTo[T interface{}](b []byte) []T { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + ptr := (*T)(unsafe.Pointer(h.Data)) + size := int(unsafe.Sizeof(*ptr)) + return unsafe.Slice(ptr, cap(b)/size)[:len(b)/size] +} diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index f573ad3c65a..3ec28fddc02 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -47,9 +47,7 @@ func (decimal128Traits) PutValue(b []byte, v decimal128.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*decimal128.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal128SizeBytes)[:len(b)/Decimal128SizeBytes] + return CastFromBytesTo[decimal128.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index adf3cc3e0bc..a5ecc014e9f 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -44,9 +44,7 @@ func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { // CastFromBytes reinterprets the slice b to a slice of decimal256 func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*decimal256.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal256SizeBytes)[:len(b)/Decimal256SizeBytes] + return CastFromBytesTo[decimal256.Num](b) } func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index e59efd4c248..487a4db0cc4 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -46,9 +46,7 @@ func (float16Traits) PutValue(b []byte, v float16.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (float16Traits) CastFromBytes(b []byte) []float16.Num { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float16.Num)(unsafe.Pointer(h.Data)), cap(b)/Float16SizeBytes)[:len(b)/Float16SizeBytes] + return CastFromBytesTo[float16.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index 5fbd7a52489..aa994cbe86c 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -57,9 +57,7 @@ func (monthTraits) PutValue(b []byte, v MonthInterval) { // // NOTE: len(b) must be a multiple of MonthIntervalSizeBytes. func (monthTraits) CastFromBytes(b []byte) []MonthInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*MonthInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthIntervalSizeBytes)[:len(b)/MonthIntervalSizeBytes] + return CastFromBytesTo[MonthInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -94,9 +92,7 @@ func (daytimeTraits) PutValue(b []byte, v DayTimeInterval) { // // NOTE: len(b) must be a multiple of DayTimeIntervalSizeBytes. func (daytimeTraits) CastFromBytes(b []byte) []DayTimeInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*DayTimeInterval)(unsafe.Pointer(h.Data)), cap(b)/DayTimeIntervalSizeBytes)[:len(b)/DayTimeIntervalSizeBytes] + return CastFromBytesTo[DayTimeInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -132,9 +128,7 @@ func (monthDayNanoTraits) PutValue(b []byte, v MonthDayNanoInterval) { // // NOTE: len(b) must be a multiple of MonthDayNanoIntervalSizeBytes. func (monthDayNanoTraits) CastFromBytes(b []byte) []MonthDayNanoInterval { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*MonthDayNanoInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthDayNanoIntervalSizeBytes)[:len(b)/MonthDayNanoIntervalSizeBytes] + return CastFromBytesTo[MonthDayNanoInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index 57606c0fce6..94e79a4c4ce 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -65,9 +65,7 @@ func (int64Traits) PutValue(b []byte, v int64) { // // NOTE: len(b) must be a multiple of Int64SizeBytes. func (int64Traits) CastFromBytes(b []byte) []int64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int64)(unsafe.Pointer(h.Data)), cap(b)/Int64SizeBytes)[:len(b)/Int64SizeBytes] + return CastFromBytesTo[int64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -101,9 +99,7 @@ func (uint64Traits) PutValue(b []byte, v uint64) { // // NOTE: len(b) must be a multiple of Uint64SizeBytes. func (uint64Traits) CastFromBytes(b []byte) []uint64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/Uint64SizeBytes)[:len(b)/Uint64SizeBytes] + return CastFromBytesTo[uint64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -137,9 +133,7 @@ func (float64Traits) PutValue(b []byte, v float64) { // // NOTE: len(b) must be a multiple of Float64SizeBytes. func (float64Traits) CastFromBytes(b []byte) []float64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float64)(unsafe.Pointer(h.Data)), cap(b)/Float64SizeBytes)[:len(b)/Float64SizeBytes] + return CastFromBytesTo[float64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -173,9 +167,7 @@ func (int32Traits) PutValue(b []byte, v int32) { // // NOTE: len(b) must be a multiple of Int32SizeBytes. func (int32Traits) CastFromBytes(b []byte) []int32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int32)(unsafe.Pointer(h.Data)), cap(b)/Int32SizeBytes)[:len(b)/Int32SizeBytes] + return CastFromBytesTo[int32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -209,9 +201,7 @@ func (uint32Traits) PutValue(b []byte, v uint32) { // // NOTE: len(b) must be a multiple of Uint32SizeBytes. func (uint32Traits) CastFromBytes(b []byte) []uint32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint32)(unsafe.Pointer(h.Data)), cap(b)/Uint32SizeBytes)[:len(b)/Uint32SizeBytes] + return CastFromBytesTo[uint32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -245,9 +235,7 @@ func (float32Traits) PutValue(b []byte, v float32) { // // NOTE: len(b) must be a multiple of Float32SizeBytes. func (float32Traits) CastFromBytes(b []byte) []float32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*float32)(unsafe.Pointer(h.Data)), cap(b)/Float32SizeBytes)[:len(b)/Float32SizeBytes] + return CastFromBytesTo[float32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -281,9 +269,7 @@ func (int16Traits) PutValue(b []byte, v int16) { // // NOTE: len(b) must be a multiple of Int16SizeBytes. func (int16Traits) CastFromBytes(b []byte) []int16 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int16)(unsafe.Pointer(h.Data)), cap(b)/Int16SizeBytes)[:len(b)/Int16SizeBytes] + return CastFromBytesTo[int16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -317,9 +303,7 @@ func (uint16Traits) PutValue(b []byte, v uint16) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (uint16Traits) CastFromBytes(b []byte) []uint16 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint16)(unsafe.Pointer(h.Data)), cap(b)/Uint16SizeBytes)[:len(b)/Uint16SizeBytes] + return CastFromBytesTo[uint16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -353,9 +337,7 @@ func (int8Traits) PutValue(b []byte, v int8) { // // NOTE: len(b) must be a multiple of Int8SizeBytes. func (int8Traits) CastFromBytes(b []byte) []int8 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*int8)(unsafe.Pointer(h.Data)), cap(b)/Int8SizeBytes)[:len(b)/Int8SizeBytes] + return CastFromBytesTo[int8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -389,9 +371,7 @@ func (uint8Traits) PutValue(b []byte, v uint8) { // // NOTE: len(b) must be a multiple of Uint8SizeBytes. func (uint8Traits) CastFromBytes(b []byte) []uint8 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*uint8)(unsafe.Pointer(h.Data)), cap(b)/Uint8SizeBytes)[:len(b)/Uint8SizeBytes] + return CastFromBytesTo[uint8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -425,9 +405,7 @@ func (time32Traits) PutValue(b []byte, v Time32) { // // NOTE: len(b) must be a multiple of Time32SizeBytes. func (time32Traits) CastFromBytes(b []byte) []Time32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Time32)(unsafe.Pointer(h.Data)), cap(b)/Time32SizeBytes)[:len(b)/Time32SizeBytes] + return CastFromBytesTo[Time32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -461,9 +439,7 @@ func (time64Traits) PutValue(b []byte, v Time64) { // // NOTE: len(b) must be a multiple of Time64SizeBytes. func (time64Traits) CastFromBytes(b []byte) []Time64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Time64)(unsafe.Pointer(h.Data)), cap(b)/Time64SizeBytes)[:len(b)/Time64SizeBytes] + return CastFromBytesTo[Time64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -497,9 +473,7 @@ func (date32Traits) PutValue(b []byte, v Date32) { // // NOTE: len(b) must be a multiple of Date32SizeBytes. func (date32Traits) CastFromBytes(b []byte) []Date32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Date32)(unsafe.Pointer(h.Data)), cap(b)/Date32SizeBytes)[:len(b)/Date32SizeBytes] + return CastFromBytesTo[Date32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -533,9 +507,7 @@ func (date64Traits) PutValue(b []byte, v Date64) { // // NOTE: len(b) must be a multiple of Date64SizeBytes. func (date64Traits) CastFromBytes(b []byte) []Date64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Date64)(unsafe.Pointer(h.Data)), cap(b)/Date64SizeBytes)[:len(b)/Date64SizeBytes] + return CastFromBytesTo[Date64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. @@ -569,9 +541,7 @@ func (durationTraits) PutValue(b []byte, v Duration) { // // NOTE: len(b) must be a multiple of DurationSizeBytes. func (durationTraits) CastFromBytes(b []byte) []Duration { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Duration)(unsafe.Pointer(h.Data)), cap(b)/DurationSizeBytes)[:len(b)/DurationSizeBytes] + return CastFromBytesTo[Duration](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index c491047b514..e53e8eacbc8 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -66,9 +66,7 @@ func ({{.name}}Traits) PutValue(b []byte, v {{.Type}}) { // // NOTE: len(b) must be a multiple of {{.Name}}SizeBytes. func ({{.name}}Traits) CastFromBytes(b []byte) []{{.Type}} { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*{{.Type}})(unsafe.Pointer(h.Data)), cap(b)/{{.Name}}SizeBytes)[:len(b)/{{.Name}}SizeBytes] + return CastFromBytesTo[{{.Type}}](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index c1a9aba3db3..70c38fdc5da 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -43,9 +43,7 @@ func (timestampTraits) PutValue(b []byte, v Timestamp) { // // NOTE: len(b) must be a multiple of TimestampSizeBytes. func (timestampTraits) CastFromBytes(b []byte) []Timestamp { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*Timestamp)(unsafe.Pointer(h.Data)), cap(b)/TimestampSizeBytes)[:len(b)/TimestampSizeBytes] + return CastFromBytesTo[Timestamp](b) } // CastToBytes reinterprets the slice b to a slice of bytes. diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go index c3846db2946..6d65765e27f 100644 --- a/go/arrow/type_traits_view.go +++ b/go/arrow/type_traits_view.go @@ -39,9 +39,7 @@ func (viewHeaderTraits) PutValue(b []byte, v ViewHeader) { } func (viewHeaderTraits) CastFromBytes(b []byte) (res []ViewHeader) { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*ViewHeader)(unsafe.Pointer(h.Data)), cap(b)/ViewHeaderSizeBytes)[:len(b)/ViewHeaderSizeBytes] + return CastFromBytesTo[ViewHeader](b) } func (viewHeaderTraits) CastToBytes(b []ViewHeader) (res []byte) { From 33a679f73f3b276cdec5ae17b376a453beb4cc83 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 11:25:24 -0500 Subject: [PATCH 09/28] move GetBytes, GetData and other utilities to type_traits.go --- go/arrow/array/list.go | 6 +- go/arrow/avro/reader_types.go | 2 +- go/arrow/bitutil/endian_default.go | 1 + go/arrow/bitutil/endian_s390x.go | 2 +- go/arrow/compute/arithmetic.go | 4 +- go/arrow/compute/arithmetic_test.go | 36 +- go/arrow/compute/exec/utils.go | 140 +----- go/arrow/compute/exec/utils_test.go | 2 +- go/arrow/compute/fieldref.go | 51 +- .../internal/kernels/base_arithmetic.go | 12 +- .../internal/kernels/base_arithmetic_amd64.go | 55 +-- .../kernels/basic_arithmetic_noasm.go | 2 +- .../compute/internal/kernels/boolean_cast.go | 2 +- go/arrow/compute/internal/kernels/helpers.go | 42 +- .../compute/internal/kernels/numeric_cast.go | 22 +- .../internal/kernels/scalar_arithmetic.go | 6 +- .../kernels/scalar_comparison_amd64.go | 5 +- .../kernels/scalar_comparison_noasm.go | 2 +- .../internal/kernels/scalar_comparisons.go | 30 +- .../compute/internal/kernels/string_casts.go | 10 +- .../compute/internal/kernels/vector_hash.go | 2 +- .../internal/kernels/vector_run_end_encode.go | 24 +- .../internal/kernels/vector_selection.go | 28 +- go/arrow/compute/scalar_compare_test.go | 16 +- go/arrow/compute/vector_hash_test.go | 20 +- go/arrow/compute/vector_selection_test.go | 30 +- go/arrow/flight/doc.go | 1 - go/arrow/flight/gen/flight/Flight.pb.go | 26 +- go/arrow/flight/gen/flight/FlightSql.pb.go | 443 +++++++++--------- go/arrow/flight/gen/flight/Flight_grpc.pb.go | 1 + go/arrow/flight/server.go | 2 +- go/arrow/gen-flatbuffers.go | 1 + go/arrow/internal/debug/assert_off.go | 1 + go/arrow/internal/debug/assert_on.go | 1 + go/arrow/internal/debug/doc.go | 6 +- go/arrow/internal/debug/log_off.go | 1 + go/arrow/internal/debug/log_on.go | 1 + go/arrow/internal/debug/util.go | 1 + go/arrow/internal/flatbuf/Binary.go | 2 +- go/arrow/internal/flatbuf/BinaryView.go | 14 +- go/arrow/internal/flatbuf/Block.go | 19 +- go/arrow/internal/flatbuf/BodyCompression.go | 18 +- .../internal/flatbuf/BodyCompressionMethod.go | 6 +- go/arrow/internal/flatbuf/Buffer.go | 34 +- go/arrow/internal/flatbuf/Date.go | 12 +- go/arrow/internal/flatbuf/Decimal.go | 24 +- go/arrow/internal/flatbuf/DictionaryBatch.go | 24 +- .../internal/flatbuf/DictionaryEncoding.go | 48 +- go/arrow/internal/flatbuf/DictionaryKind.go | 10 +- go/arrow/internal/flatbuf/Endianness.go | 4 +- go/arrow/internal/flatbuf/Feature.go | 38 +- go/arrow/internal/flatbuf/Field.go | 34 +- go/arrow/internal/flatbuf/FieldNode.go | 40 +- go/arrow/internal/flatbuf/FixedSizeBinary.go | 4 +- go/arrow/internal/flatbuf/FixedSizeList.go | 4 +- go/arrow/internal/flatbuf/Footer.go | 10 +- go/arrow/internal/flatbuf/KeyValue.go | 6 +- go/arrow/internal/flatbuf/LargeBinary.go | 4 +- go/arrow/internal/flatbuf/LargeList.go | 4 +- go/arrow/internal/flatbuf/LargeListView.go | 4 +- go/arrow/internal/flatbuf/LargeUtf8.go | 4 +- go/arrow/internal/flatbuf/ListView.go | 6 +- go/arrow/internal/flatbuf/Map.go | 54 +-- go/arrow/internal/flatbuf/MessageHeader.go | 16 +- go/arrow/internal/flatbuf/Null.go | 2 +- go/arrow/internal/flatbuf/RecordBatch.go | 102 ++-- go/arrow/internal/flatbuf/RunEndEncoded.go | 10 +- go/arrow/internal/flatbuf/Schema.go | 20 +- .../internal/flatbuf/SparseMatrixIndexCSR.go | 134 +++--- .../internal/flatbuf/SparseMatrixIndexCSX.go | 142 +++--- go/arrow/internal/flatbuf/SparseTensor.go | 28 +- .../internal/flatbuf/SparseTensorIndexCOO.go | 100 ++-- .../internal/flatbuf/SparseTensorIndexCSF.go | 254 +++++----- go/arrow/internal/flatbuf/Struct_.go | 6 +- go/arrow/internal/flatbuf/Tensor.go | 24 +- go/arrow/internal/flatbuf/TensorDim.go | 14 +- go/arrow/internal/flatbuf/Time.go | 28 +- go/arrow/internal/flatbuf/Timestamp.go | 250 +++++----- go/arrow/internal/flatbuf/Type.go | 6 +- go/arrow/internal/flatbuf/Union.go | 8 +- go/arrow/internal/flatbuf/Utf8.go | 2 +- go/arrow/internal/flatbuf/Utf8View.go | 14 +- go/arrow/internal/testing/tools/bits.go | 2 +- go/arrow/ipc/cmd/arrow-cat/main.go | 66 +-- go/arrow/ipc/cmd/arrow-ls/main.go | 62 +-- go/arrow/math/math_amd64.go | 1 + go/arrow/math/math_arm64.go | 5 +- go/arrow/math/math_noasm.go | 1 + go/arrow/math/math_ppc64le.go | 1 + go/arrow/math/math_s390x.go | 1 + go/arrow/memory/cgo_allocator.go | 4 +- go/arrow/memory/cgo_allocator_defaults.go | 5 +- go/arrow/memory/cgo_allocator_logging.go | 5 +- go/arrow/memory/memory_amd64.go | 1 + go/arrow/memory/memory_arm64.go | 1 + go/arrow/memory/memory_avx2_amd64.go | 1 + go/arrow/memory/memory_js_wasm.go | 1 + go/arrow/memory/memory_neon_arm64.go | 1 + go/arrow/memory/memory_noasm.go | 1 + go/arrow/memory/memory_sse4_amd64.go | 1 + go/arrow/type_traits.go | 121 ++++- go/arrow/type_traits_decimal128.go | 7 +- go/arrow/type_traits_decimal256.go | 7 +- go/arrow/type_traits_float16.go | 7 +- go/arrow/type_traits_interval.go | 19 +- go/arrow/type_traits_numeric.gen.go | 91 ++-- go/arrow/type_traits_numeric.gen.go.tmpl | 7 +- go/arrow/type_traits_timestamp.go | 7 +- go/arrow/type_traits_view.go | 7 +- 109 files changed, 1492 insertions(+), 1565 deletions(-) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 4624965fd38..653222c145b 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -1418,7 +1418,7 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { // input.Len() > 0 func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() - offsets := arrow.CastFromBytesTo[Offset](input.Buffers()[1].Bytes())[inputOffset:] + offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] i := 0 minOffset := offsets[i] // safe because input.Len() > 0 @@ -1444,8 +1444,8 @@ func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { // input.Len() > 0 func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() - offsets := arrow.CastFromBytesTo[Offset](input.Buffers()[1].Bytes())[inputOffset:] - sizes := arrow.CastFromBytesTo[Offset](input.Buffers()[2].Bytes())[inputOffset:] + offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] + sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] maxLegalOffset := Offset(input.Children()[0].Len()) diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go index 5658c6e587d..974fea1f14e 100644 --- a/go/arrow/avro/reader_types.go +++ b/go/arrow/avro/reader_types.go @@ -22,7 +22,7 @@ import ( "errors" "fmt" "math/big" - + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/decimal128" diff --git a/go/arrow/bitutil/endian_default.go b/go/arrow/bitutil/endian_default.go index 9f5d3cdc7d2..ecbbaa70d04 100644 --- a/go/arrow/bitutil/endian_default.go +++ b/go/arrow/bitutil/endian_default.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !s390x // +build !s390x package bitutil diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go index a9bba439128..e99605f5848 100644 --- a/go/arrow/bitutil/endian_s390x.go +++ b/go/arrow/bitutil/endian_s390x.go @@ -18,7 +18,7 @@ package bitutil import ( "math/bits" - "unsafe" + "unsafe" ) var toFromLEFunc = bits.ReverseBytes64 diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go index 1ee1959b2dd..052d79610bc 100644 --- a/go/arrow/compute/arithmetic.go +++ b/go/arrow/compute/arithmetic.go @@ -678,8 +678,8 @@ func RegisterScalarArithmetic(reg FunctionRegistry) { // the allocated space is for duration (an int64) but we // wrote the time32 - time32 as if the output was time32 // so a quick copy in reverse expands the int32s to int64. - rawData := exec.GetData[int32](out.Buffers[1].Buf) - outData := exec.GetData[int64](out.Buffers[1].Buf) + rawData := arrow.GetData[int32](out.Buffers[1].Buf) + outData := arrow.GetData[int64](out.Buffers[1].Buf) for i := out.Len - 1; i >= 0; i-- { outData[i] = int64(rawData[i]) diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go index c9c3f1ceb03..34c1bc6d98d 100644 --- a/go/arrow/compute/arithmetic_test.go +++ b/go/arrow/compute/arithmetic_test.go @@ -195,7 +195,7 @@ func (b *Float16BinaryFuncTestSuite) TestSub() { } } -type BinaryArithmeticSuite[T exec.NumericTypes] struct { +type BinaryArithmeticSuite[T arrow.NumericType] struct { BinaryFuncTestSuite opts compute.ArithmeticOptions @@ -205,7 +205,7 @@ type BinaryArithmeticSuite[T exec.NumericTypes] struct { } func (BinaryArithmeticSuite[T]) DataType() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) { @@ -564,7 +564,7 @@ func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() { bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number") } -type BinaryIntegralArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct { +type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryArithmeticSuite[T] } @@ -2412,7 +2412,7 @@ func TestUnaryArithmeticNull(t *testing.T) { } } -type UnaryArithmeticSuite[T exec.NumericTypes, O fnOpts] struct { +type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct { suite.Suite mem *memory.CheckedAllocator @@ -2433,7 +2433,7 @@ func (us *UnaryArithmeticSuite[T, O]) TearDownTest() { } func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar { @@ -2532,7 +2532,7 @@ func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O] us.ErrorContains(err, msg) } -type UnaryArithmeticIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct { UnaryArithmeticSuite[T, compute.ArithmeticOptions] } @@ -2598,7 +2598,7 @@ func (us *UnaryArithmeticIntegral[T]) TestLog() { } } -type UnaryArithmeticSigned[T exec.IntTypes] struct { +type UnaryArithmeticSigned[T arrow.IntType] struct { UnaryArithmeticIntegral[T] } @@ -2678,7 +2678,7 @@ func (us *UnaryArithmeticSigned[T]) TestNegate() { }) } -type UnaryArithmeticUnsigned[T exec.UintTypes] struct { +type UnaryArithmeticUnsigned[T arrow.UintType] struct { UnaryArithmeticIntegral[T] } @@ -2965,12 +2965,12 @@ func TestUnaryArithmetic(t *testing.T) { suite.Run(t, new(DecimalUnaryArithmeticSuite)) } -type BitwiseArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct { +type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct { BinaryFuncTestSuite } func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType { - return exec.GetDataType[T]() + return arrow.GetDataType[T]() } // to make it easier to test different widths, tests give bytes which @@ -3061,7 +3061,7 @@ var roundModes = []compute.RoundMode{ compute.RoundHalfToOdd, } -type UnaryRoundSuite[T exec.NumericTypes] struct { +type UnaryRoundSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundOptions] } @@ -3073,7 +3073,7 @@ func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) { us.opts.NDigits = v } -type UnaryRoundToMultipleSuite[T exec.NumericTypes] struct { +type UnaryRoundToMultipleSuite[T arrow.NumericType] struct { UnaryArithmeticSuite[T, compute.RoundToMultipleOptions] } @@ -3085,15 +3085,15 @@ func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) { us.opts.Multiple = scalar.NewFloat64Scalar(val) } -type UnaryRoundIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundSuite[T] } -type UnaryRoundToMultipleIntegral[T exec.IntTypes | exec.UintTypes] struct { +type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct { UnaryRoundToMultipleSuite[T] } -type UnaryRoundSigned[T exec.IntTypes] struct { +type UnaryRoundSigned[T arrow.IntType] struct { UnaryRoundIntegral[T] } @@ -3130,7 +3130,7 @@ func (us *UnaryRoundSigned[T]) TestRound() { } } -type UnaryRoundToMultipleSigned[T exec.IntTypes] struct { +type UnaryRoundToMultipleSigned[T arrow.IntType] struct { UnaryRoundToMultipleIntegral[T] } @@ -3164,7 +3164,7 @@ func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() { } } -type UnaryRoundUnsigned[T exec.UintTypes] struct { +type UnaryRoundUnsigned[T arrow.UintType] struct { UnaryRoundIntegral[T] } @@ -3201,7 +3201,7 @@ func (us *UnaryRoundUnsigned[T]) TestRound() { } } -type UnaryRoundToMultipleUnsigned[T exec.UintTypes] struct { +type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct { UnaryRoundToMultipleIntegral[T] } diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go index 276e4570ca9..1b5e69a502c 100644 --- a/go/arrow/compute/exec/utils.go +++ b/go/arrow/compute/exec/utils.go @@ -21,96 +21,21 @@ package exec import ( "fmt" "math" - "reflect" "sync/atomic" "unsafe" "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/bitutil" - "github.com/apache/arrow/go/v15/arrow/decimal128" - "github.com/apache/arrow/go/v15/arrow/decimal256" - "github.com/apache/arrow/go/v15/arrow/float16" "github.com/apache/arrow/go/v15/arrow/memory" "golang.org/x/exp/constraints" "golang.org/x/exp/slices" ) -// IntTypes is a type constraint for raw values represented as signed -// integer types by Arrow. We aren't just using constraints.Signed -// because we don't want to include the raw `int` type here whose size -// changes based on the architecture (int32 on 32-bit architectures and -// int64 on 64-bit architectures). -// -// This will also cover types like MonthInterval or the time types -// as their underlying types are int32 and int64 which will get covered -// by using the ~ -type IntTypes interface { - ~int8 | ~int16 | ~int32 | ~int64 -} - -// UintTypes is a type constraint for raw values represented as unsigned -// integer types by Arrow. We aren't just using constraints.Unsigned -// because we don't want to include the raw `uint` type here whose size -// changes based on the architecture (uint32 on 32-bit architectures and -// uint64 on 64-bit architectures). We also don't want to include uintptr -type UintTypes interface { - ~uint8 | ~uint16 | ~uint32 | ~uint64 -} - -// FloatTypes is a type constraint for raw values for representing -// floating point values in Arrow. This consists of constraints.Float and -// float16.Num -type FloatTypes interface { - float16.Num | constraints.Float -} - -// NumericTypes is a type constraint for just signed/unsigned integers -// and float32/float64. -type NumericTypes interface { - IntTypes | UintTypes | constraints.Float -} - -// DecimalTypes is a type constraint for raw values representing larger -// decimal type values in Arrow, specifically decimal128 and decimal256. -type DecimalTypes interface { - decimal128.Num | decimal256.Num -} - -// FixedWidthTypes is a type constraint for raw values in Arrow that -// can be represented as FixedWidth byte slices. Specifically this is for -// using Go generics to easily re-type a byte slice to a properly-typed -// slice. Booleans are excluded here since they are represented by Arrow -// as a bitmap and thus the buffer can't be just reinterpreted as a []bool -type FixedWidthTypes interface { - IntTypes | UintTypes | - FloatTypes | DecimalTypes | - arrow.DayTimeInterval | arrow.MonthDayNanoInterval -} - -type TemporalTypes interface { - arrow.Date32 | arrow.Date64 | arrow.Time32 | arrow.Time64 | - arrow.Timestamp | arrow.Duration | arrow.DayTimeInterval | - arrow.MonthInterval | arrow.MonthDayNanoInterval -} - -func GetValues[T FixedWidthTypes](data arrow.ArrayData, i int) []T { - if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { - return nil - } - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()) - return ret[data.Offset():] -} - -func GetOffsets[T int32 | int64](data arrow.ArrayData, i int) []T { - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()+1) - return ret[data.Offset():] -} - // GetSpanValues returns a properly typed slice by reinterpreting // the buffer at index i using unsafe.Slice. This will take into account // the offset of the given ArraySpan. -func GetSpanValues[T FixedWidthTypes](span *ArraySpan, i int) []T { +func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T { if len(span.Buffers[i].Buf) == 0 { return nil } @@ -126,16 +51,6 @@ func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T { return ret[span.Offset:] } -func GetBytes[T FixedWidthTypes](in []T) []byte { - var z T - return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*int(unsafe.Sizeof(z))) -} - -func GetData[T FixedWidthTypes](in []byte) []T { - var z T - return unsafe.Slice((*T)(unsafe.Pointer(&in[0])), len(in)/int(unsafe.Sizeof(z))) -} - func Min[T constraints.Ordered](a, b T) T { if a < b { return a @@ -165,59 +80,22 @@ func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error) arrow.ErrInvalid) } -var typMap = map[reflect.Type]arrow.DataType{ - reflect.TypeOf(false): arrow.FixedWidthTypes.Boolean, - reflect.TypeOf(int8(0)): arrow.PrimitiveTypes.Int8, - reflect.TypeOf(int16(0)): arrow.PrimitiveTypes.Int16, - reflect.TypeOf(int32(0)): arrow.PrimitiveTypes.Int32, - reflect.TypeOf(int64(0)): arrow.PrimitiveTypes.Int64, - reflect.TypeOf(uint8(0)): arrow.PrimitiveTypes.Uint8, - reflect.TypeOf(uint16(0)): arrow.PrimitiveTypes.Uint16, - reflect.TypeOf(uint32(0)): arrow.PrimitiveTypes.Uint32, - reflect.TypeOf(uint64(0)): arrow.PrimitiveTypes.Uint64, - reflect.TypeOf(float32(0)): arrow.PrimitiveTypes.Float32, - reflect.TypeOf(float64(0)): arrow.PrimitiveTypes.Float64, - reflect.TypeOf(string("")): arrow.BinaryTypes.String, - reflect.TypeOf(arrow.Date32(0)): arrow.FixedWidthTypes.Date32, - reflect.TypeOf(arrow.Date64(0)): arrow.FixedWidthTypes.Date64, - reflect.TypeOf(true): arrow.FixedWidthTypes.Boolean, - reflect.TypeOf(float16.Num{}): arrow.FixedWidthTypes.Float16, - reflect.TypeOf([]byte{}): arrow.BinaryTypes.Binary, -} - -// GetDataType returns the appropriate arrow.DataType for the given type T -// only for non-parametric types. This uses a map and reflection internally -// so don't call this in a tight loop, instead call this once and then use -// a closure with the result. -func GetDataType[T NumericTypes | bool | string | []byte | float16.Num]() arrow.DataType { - var z T - return typMap[reflect.TypeOf(z)] -} - -// GetType returns the appropriate arrow.Type type T, only for non-parametric -// types. This uses a map and reflection internally so don't call this in -// a tight loop, instead call it once and then use a closure with the result. -func GetType[T NumericTypes | bool | string]() arrow.Type { - var z T - return typMap[reflect.TypeOf(z)].ID() -} - -type arrayBuilder[T NumericTypes | bool] interface { +type arrayBuilder[T arrow.NumericType | bool] interface { array.Builder Append(T) AppendValues([]T, []bool) } -func ArrayFromSlice[T NumericTypes | bool](mem memory.Allocator, data []T) arrow.Array { - bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T]) +func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array { + bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, nil) return bldr.NewArray() } -func ArrayFromSliceWithValid[T NumericTypes | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { - bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T]) +func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array { + bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T]) defer bldr.Release() bldr.AppendValues(data, valid) @@ -323,7 +201,7 @@ func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) { } type arrayTypes interface { - FixedWidthTypes | TemporalTypes | bool | string | []byte + arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte } type ArrayIter[T arrayTypes] interface { @@ -345,11 +223,11 @@ func (b *BoolIter) Next() (out bool) { return } -type PrimitiveIter[T FixedWidthTypes] struct { +type PrimitiveIter[T arrow.FixedWidthType] struct { Values []T } -func NewPrimitiveIter[T FixedWidthTypes](arr *ArraySpan) ArrayIter[T] { +func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] { return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)} } diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go index b26e4ff41e7..345d6dcf3b4 100644 --- a/go/arrow/compute/exec/utils_test.go +++ b/go/arrow/compute/exec/utils_test.go @@ -53,7 +53,7 @@ func TestRechunkConsistentArraysTrivial(t *testing.T) { } } -func assertEqual[T exec.NumericTypes](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { +func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) { exp := exec.ArrayFromSlice(mem, data) defer exp.Release() assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr) diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go index 565ae3bfadb..036e1e355ed 100644 --- a/go/arrow/compute/fieldref.go +++ b/go/arrow/compute/fieldref.go @@ -282,31 +282,31 @@ type refImpl interface { // // Nested fields can be referenced as well, given the schema: // -// arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, -// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, -// }) +// arrow.NewSchema([]arrow.Field{ +// {Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})}, +// {Name: "b", Type: arrow.PrimitiveTypes.Int32}, +// }) // // the following all indicate the nested field named "n": // -// FieldRefPath(FieldPath{0, 0}) -// FieldRefList("a", 0) -// FieldRefList("a", "n") -// FieldRefList(0, "n") -// NewFieldRefFromDotPath(".a[0]") +// FieldRefPath(FieldPath{0, 0}) +// FieldRefList("a", 0) +// FieldRefList("a", "n") +// FieldRefList(0, "n") +// NewFieldRefFromDotPath(".a[0]") // // FieldPaths matching a FieldRef are retrieved with the FindAll* functions // Multiple matches are possible because field names may be duplicated within // a schema. For example: // -// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ -// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, -// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, -// }) -// matches := FieldRefName("a").FindAll(aIsAmbiguous) -// assert.Len(matches, 2) -// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) -// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) +// aIsAmbiguous := arrow.NewSchema([]arrow.Field{ +// {Name: "a", Type: arrow.PrimitiveTypes.Int32}, +// {Name: "a", Type: arrow.PrimitiveTypes.Float32}, +// }) +// matches := FieldRefName("a").FindAll(aIsAmbiguous) +// assert.Len(matches, 2) +// assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0)) +// assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1)) type FieldRef struct { impl refImpl } @@ -346,17 +346,18 @@ func FieldRefList(elems ...interface{}) FieldRef { // NewFieldRefFromDotPath parses a dot path into a field ref. // // dot_path = '.' name -// | '[' digit+ ']' -// | dot_path+ +// +// | '[' digit+ ']' +// | dot_path+ // // Examples // -// ".alpha" => FieldRefName("alpha") -// "[2]" => FieldRefIndex(2) -// ".beta[3]" => FieldRefList("beta", 3) -// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) -// ".hello world" => FieldRefName("hello world") -// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) +// ".alpha" => FieldRefName("alpha") +// "[2]" => FieldRefIndex(2) +// ".beta[3]" => FieldRefList("beta", 3) +// "[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7) +// ".hello world" => FieldRefName("hello world") +// `.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`) // // Note: when parsing a name, a '\' preceding any other character will be // dropped from the resulting name. therefore if a name must contain the characters diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go index 4ef0031f314..b795c04c39e 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic.go @@ -81,7 +81,7 @@ const ( OpLogbChecked ) -func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) { +func mulWithOverflow[T arrow.IntType | arrow.UintType](a, b T) (T, error) { min, max := MinOf[T](), MaxOf[T]() switch { case a > 0: @@ -107,7 +107,7 @@ func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) { return a * b, nil } -func getGoArithmeticBinary[OutT, Arg0T, Arg1T exec.NumericTypes](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] { +func getGoArithmeticBinary[OutT, Arg0T, Arg1T arrow.NumericType](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] { return binaryOps[OutT, Arg0T, Arg1T]{ arrArr: func(_ *exec.KernelCtx, left []Arg0T, right []Arg1T, out []OutT) error { var err error @@ -143,7 +143,7 @@ var ( errLogNeg = fmt.Errorf("%w: logarithm of negative number", arrow.ErrInvalid) ) -func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getGoArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { switch op { case OpAdd: return ScalarBinary(getGoArithmeticBinary(func(a, b InT, _ *error) OutT { return OutT(a + b) })) @@ -178,7 +178,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit if SizeOf[InT]() == SizeOf[OutT]() { return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - in, output := exec.GetBytes(arg), exec.GetBytes(out) + in, output := arrow.GetBytes(arg), arrow.GetBytes(out) copy(output, in) return nil }) @@ -314,7 +314,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit } if SizeOf[InT]() == SizeOf[OutT]() { return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - in, output := exec.GetBytes(arg), exec.GetBytes(out) + in, output := arrow.GetBytes(arg), arrow.GetBytes(out) copy(output, in) return nil }) @@ -837,7 +837,7 @@ func ArithmeticExecSameType(ty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec return nil } -func arithmeticExec[InT exec.IntTypes | exec.UintTypes](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec { +func arithmeticExec[InT arrow.IntType | arrow.UintType](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec { switch oty { case arrow.INT8: return getArithmeticOpIntegral[InT, int8](op) diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go index 942b8e4ff56..51b1866fb68 100644 --- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go +++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go @@ -21,63 +21,64 @@ package kernels import ( "unsafe" + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/compute/exec" "github.com/apache/arrow/go/v15/arrow/internal/debug" "golang.org/x/exp/constraints" "golang.org/x/sys/cpu" ) -func getAvx2ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] { - typ := exec.GetType[T]() +func getAvx2ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] { + typ := arrow.GetType[T]() return binaryOps[T, T, T]{ arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error { - arithmeticAvx2(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticAvx2(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error { - arithmeticArrScalarAvx2(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticArrScalarAvx2(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error { - arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1)) + arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1)) return nil }, } } -func getSSE4ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] { - typ := exec.GetType[T]() +func getSSE4ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] { + typ := arrow.GetType[T]() return binaryOps[T, T, T]{ arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error { - arithmeticSSE4(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticSSE4(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error { - arithmeticArrScalarSSE4(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0)) + arithmeticArrScalarSSE4(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0)) return nil }, scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error { - arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1)) + arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1)) return nil }, } } -func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { if cpu.X86.HasAVX2 { switch op { case OpAdd, OpSub, OpMul: return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpNegate: - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) case OpSign: - inType, outType := exec.GetType[InT](), exec.GetType[OutT]() + inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]() return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - arithmeticUnaryDiffTypesAvx2(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryDiffTypesAvx2(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } @@ -86,15 +87,15 @@ func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arithm case OpAdd, OpSub, OpMul: return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpNegate: - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(ctx *exec.KernelCtx, arg, out []InT) error { - arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) case OpSign: - inType, outType := exec.GetType[InT](), exec.GetType[OutT]() + inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]() return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error { - arithmeticUnaryDiffTypesSSE4(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryDiffTypesSSE4(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } @@ -109,38 +110,38 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec. if cpu.X86.HasAVX2 { switch op { case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } } else if cpu.X86.HasSSE42 { switch op { case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op)) case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign: - if exec.GetType[InT]() != exec.GetType[OutT]() { + if arrow.GetType[InT]() != arrow.GetType[OutT]() { debug.Assert(false, "not implemented") return nil } - typ := exec.GetType[InT]() + typ := arrow.GetType[InT]() return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error { - arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg)) + arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg)) return nil }) } diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go index 8e46ca030c8..d6841c57c7b 100644 --- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go +++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go @@ -27,6 +27,6 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec. return getGoArithmeticOpFloating[InT, OutT](op) } -func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec { +func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec { return getGoArithmeticOpIntegral[InT, OutT](op) } diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go index 923c5b3f545..6109d257909 100644 --- a/go/arrow/compute/internal/kernels/boolean_cast.go +++ b/go/arrow/compute/internal/kernels/boolean_cast.go @@ -27,7 +27,7 @@ import ( "github.com/apache/arrow/go/v15/arrow/compute/exec" ) -func isNonZero[T exec.FixedWidthTypes](ctx *exec.KernelCtx, in []T, out []byte) error { +func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error { var zero T for i, v := range in { bitutil.SetBitTo(out, i, v != zero) diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go index 686c4b3e0c2..1ac09ba43bf 100644 --- a/go/arrow/compute/internal/kernels/helpers.go +++ b/go/arrow/compute/internal/kernels/helpers.go @@ -37,9 +37,9 @@ import ( // which will receive a slice containing the raw input data along with // a slice to populate for the output data. // -// Note that bool is not included in exec.FixedWidthTypes since it is +// Note that bool is not included in arrow.FixedWidthType since it is // represented as a bitmap, not as a slice of bool. -func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec { +func ScalarUnary[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { arg0 := in.Values[0].Array inData := exec.GetSpanValues[Arg0T](&arg0, 1) @@ -51,7 +51,7 @@ func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Ar // ScalarUnaryNotNull is for generating a kernel to operate only on the // non-null values in the input array. The zerovalue of the output type // is used for any null input values. -func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec { +func ScalarUnaryNotNull[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { var ( arg0 = &in.Values[0].Array @@ -78,7 +78,7 @@ func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCt // ScalarUnaryBoolOutput is like ScalarUnary only it is for cases of boolean // output. The function should take in a slice of the input type and a slice // of bytes to fill with the output boolean bitmap. -func ScalarUnaryBoolOutput[Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec { +func ScalarUnaryBoolOutput[Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { arg0 := in.Values[0].Array inData := exec.GetSpanValues[Arg0T](&arg0, 1) @@ -127,7 +127,7 @@ func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](defVal bool, op f // It implements the handling to iterate the offsets and values calling // the provided function on each byte slice. The zero value of the OutT // will be used as the output for elements of the input that are null. -func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec { +func ScalarUnaryNotNullBinaryArg[OutT arrow.FixedWidthType, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error { var ( arg0 = &in.Values[0].Array @@ -156,14 +156,14 @@ func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int6 // ScalarUnaryBoolArg is like ScalarUnary except it specifically expects a // function that takes a byte slice since booleans arrays are represented // as a bitmap. -func ScalarUnaryBoolArg[OutT exec.FixedWidthTypes](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec { +func ScalarUnaryBoolArg[OutT arrow.FixedWidthType](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, input *exec.ExecSpan, out *exec.ExecResult) error { outData := exec.GetSpanValues[OutT](out, 1) return op(ctx, input.Values[0].Array.Buffers[1].Buf, outData) } } -func UnboxScalar[T exec.FixedWidthTypes](val scalar.PrimitiveScalar) T { +func UnboxScalar[T arrow.FixedWidthType](val scalar.PrimitiveScalar) T { return *(*T)(unsafe.Pointer(&val.Data()[0])) } @@ -174,11 +174,11 @@ func UnboxBinaryScalar(val scalar.BinaryScalar) []byte { return val.Data() } -type arrArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error -type arrScalarFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error -type scalarArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error +type arrArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error +type arrScalarFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error +type scalarArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error -type binaryOps[OutT, Arg0T, Arg1T exec.FixedWidthTypes] struct { +type binaryOps[OutT, Arg0T, Arg1T arrow.FixedWidthType] struct { arrArr arrArrFn[OutT, Arg0T, Arg1T] arrScalar arrScalarFn[OutT, Arg0T, Arg1T] scalarArr scalarArrFn[OutT, Arg0T, Arg1T] @@ -190,7 +190,7 @@ type binaryBoolOps struct { scalarArr func(ctx *exec.KernelCtx, lhs bool, rhs, out bitutil.Bitmap) error } -func ScalarBinary[OutT, Arg0T, Arg1T exec.FixedWidthTypes](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec { +func ScalarBinary[OutT, Arg0T, Arg1T arrow.FixedWidthType](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec { arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error { var ( a0 = exec.GetSpanValues[Arg0T](arg0, 1) @@ -281,7 +281,7 @@ func ScalarBinaryBools(ops *binaryBoolOps) exec.ArrayKernelExec { } } -func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec { +func ScalarBinaryNotNull[OutT, Arg0T, Arg1T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec { arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) (err error) { // fast path if one side is entirely null if arg0.UpdateNullCount() == arg0.Len || arg1.UpdateNullCount() == arg1.Len { @@ -379,7 +379,7 @@ func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec. } } -type binaryBinOp[T exec.FixedWidthTypes | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T +type binaryBinOp[T arrow.FixedWidthType | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T func ScalarBinaryBinaryArgsBoolOut(itrFn func(*exec.ArraySpan) exec.ArrayIter[[]byte], op binaryBinOp[bool]) exec.ArrayKernelExec { arrArr := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error { @@ -577,7 +577,7 @@ func intsCanFit(data *exec.ArraySpan, target arrow.Type) error { } } -func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBound, upperBound T) error { +func intsInRange[T arrow.IntType | arrow.UintType](data *exec.ArraySpan, lowerBound, upperBound T) error { if MinOf[T]() >= lowerBound && MaxOf[T]() <= upperBound { return nil } @@ -653,7 +653,7 @@ func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBo } type numeric interface { - exec.IntTypes | exec.UintTypes | constraints.Float + arrow.IntType | arrow.UintType | constraints.Float } func memCpySpan[T numeric](in, out *exec.ArraySpan) { @@ -883,12 +883,12 @@ func (bldr *execBufBuilder) finish() (buf *memory.Buffer) { return } -type bufferBuilder[T exec.FixedWidthTypes] struct { +type bufferBuilder[T arrow.FixedWidthType] struct { execBufBuilder zero T } -func newBufferBuilder[T exec.FixedWidthTypes](mem memory.Allocator) *bufferBuilder[T] { +func newBufferBuilder[T arrow.FixedWidthType](mem memory.Allocator) *bufferBuilder[T] { return &bufferBuilder[T]{ execBufBuilder: execBufBuilder{ mem: mem, @@ -901,11 +901,11 @@ func (b *bufferBuilder[T]) reserve(additional int) { } func (b *bufferBuilder[T]) unsafeAppend(value T) { - b.execBufBuilder.unsafeAppend(exec.GetBytes([]T{value})) + b.execBufBuilder.unsafeAppend(arrow.GetBytes([]T{value})) } func (b *bufferBuilder[T]) unsafeAppendSlice(values []T) { - b.execBufBuilder.unsafeAppend(exec.GetBytes(values)) + b.execBufBuilder.unsafeAppend(arrow.GetBytes(values)) } func (b *bufferBuilder[T]) len() int { return b.sz / int(unsafe.Sizeof(b.zero)) } @@ -914,7 +914,7 @@ func (b *bufferBuilder[T]) cap() int { return cap(b.data) / int(unsafe.Sizeof(b.zero)) } -func checkIndexBoundsImpl[T exec.IntTypes | exec.UintTypes](values *exec.ArraySpan, upperLimit uint64) error { +func checkIndexBoundsImpl[T arrow.IntType | arrow.UintType](values *exec.ArraySpan, upperLimit uint64) error { // for unsigned integers, if the values array is larger // than the maximum index value, then there's no need to bounds check isSigned := !arrow.IsUnsignedInteger(values.Type.ID()) diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go index c055552bf7f..d31edfdd308 100644 --- a/go/arrow/compute/internal/kernels/numeric_cast.go +++ b/go/arrow/compute/internal/kernels/numeric_cast.go @@ -69,13 +69,13 @@ func CastIntegerToFloating(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec. return nil } -type decimal[T exec.DecimalTypes] interface { +type decimal[T decimal128.Num | decimal256.Num] interface { Less(T) bool GreaterEqual(T) bool LowBits() uint64 } -func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT { +func decimalToIntImpl[InT decimal128.Num | decimal256.Num, OutT arrow.IntType | arrow.UintType](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT { if !allowOverflow && (v.Less(min) || v.GreaterEqual(max)) { debug.Log("integer value out of bounds from decimal") *err = fmt.Errorf("%w: integer value out of bounds", arrow.ErrInvalid) @@ -84,7 +84,7 @@ func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes return OutT(v.LowBits()) } -func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastDecimal256ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(CastState) inputType = batch.Values[0].Type().(*arrow.Decimal256Type) @@ -125,7 +125,7 @@ func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC return ex(ctx, batch, out) } -func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastDecimal128ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( opts = ctx.State.(CastState) inputType = batch.Values[0].Type().(*arrow.Decimal128Type) @@ -166,7 +166,7 @@ func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC return ex(ctx, batch, out) } -func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec { +func integerToDecimal128[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec { var getDecimal func(v T) decimal128.Num switch inType { case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64: @@ -183,7 +183,7 @@ func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou }) } -func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec { +func integerToDecimal256[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec { var getDecimal func(v T) decimal256.Num switch inType { case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64: @@ -200,7 +200,7 @@ func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou }) } -func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func CastIntegerToDecimal[OutT decimal128.Num | decimal256.Num, Arg0 arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( precision, scale int32 executor exec.ArrayKernelExec @@ -234,7 +234,7 @@ func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.Uint return executor(ctx, batch, out) } -func getCastIntToDecimal[T exec.DecimalTypes](inType arrow.Type) exec.ArrayKernelExec { +func getCastIntToDecimal[T decimal128.Num | decimal256.Num](inType arrow.Type) exec.ArrayKernelExec { switch inType { case arrow.UINT8: return CastIntegerToDecimal[T, uint8] @@ -543,7 +543,7 @@ func boolToNum[T numeric](_ *exec.KernelCtx, in []byte, out []T) error { return nil } -func checkFloatTrunc[InT constraints.Float, OutT exec.IntTypes | exec.UintTypes](in, out *exec.ArraySpan) error { +func checkFloatTrunc[InT constraints.Float, OutT arrow.IntType | arrow.UintType](in, out *exec.ArraySpan) error { wasTrunc := func(out OutT, in InT) bool { return InT(out) != in } @@ -665,7 +665,7 @@ func checkIntToFloatTrunc(in *exec.ArraySpan, outType arrow.Type) error { return nil } -func parseStringToNumberImpl[T exec.IntTypes | exec.UintTypes | exec.FloatTypes, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec { +func parseStringToNumberImpl[T arrow.IntType | arrow.UintType | arrow.FloatType, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec { return ScalarUnaryNotNullBinaryArg[T, OffsetT](func(_ *exec.KernelCtx, in []byte, err *error) T { st := *(*string)(unsafe.Pointer(&in)) v, e := parseFn(st) @@ -749,7 +749,7 @@ func addCommonNumberCasts[T numeric](outTy arrow.DataType, kernels []exec.Scalar return kernels } -func GetCastToInteger[T exec.IntTypes | exec.UintTypes](outType arrow.DataType) []exec.ScalarKernel { +func GetCastToInteger[T arrow.IntType | arrow.UintType](outType arrow.DataType) []exec.ScalarKernel { kernels := make([]exec.ScalarKernel, 0) output := exec.NewOutputType(outType) diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go index cf17e9fd954..f1ed21065e4 100644 --- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go +++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go @@ -254,7 +254,7 @@ func GetBitwiseBinaryKernels(op BitwiseOp) []exec.ScalarKernel { return append(kernels, NullExecKernel(2)) } -func bitwiseNot[T exec.IntTypes | exec.UintTypes](_ *exec.KernelCtx, arg T, _ *error) T { +func bitwiseNot[T arrow.IntType | arrow.UintType](_ *exec.KernelCtx, arg T, _ *error) T { return ^arg } @@ -290,7 +290,7 @@ const ( ShiftRight ) -func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec { +func shiftKernelSignedImpl[T arrow.IntType, Unsigned arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec { errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid) maxShift := T(8*SizeOf[T]() - 1) @@ -334,7 +334,7 @@ func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDi return nil } -func shiftKernelUnsignedImpl[T exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec { +func shiftKernelUnsignedImpl[T arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec { errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid) maxShift := T(8 * SizeOf[T]()) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go index 8e5ce1ab7c1..52cd2c31a2a 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go @@ -22,7 +22,6 @@ import ( "unsafe" "github.com/apache/arrow/go/v15/arrow" - "github.com/apache/arrow/go/v15/arrow/compute/exec" "golang.org/x/sys/cpu" ) @@ -32,12 +31,12 @@ type cmpfn func(arrow.Type, []byte, []byte, []byte, int64, int) var comparisonMap map[CompareOperator][3]cmpfn -func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData { +func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { if pureGo { return genGoCompareKernel(getCmpOp[T](op)) } - ty := exec.GetType[T]() + ty := arrow.GetType[T]() byteWidth := int(unsafe.Sizeof(T(0))) comparisonFns := comparisonMap[op] return &CompareData{ diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go index c0aef5a04e9..e1a22b9b9f9 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go @@ -20,6 +20,6 @@ package kernels import "github.com/apache/arrow/go/v15/arrow/compute/exec" -func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData { +func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { return genGoCompareKernel(getCmpOp[T](op)) } diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go index 9a7640a8d8a..29e6db29cb2 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparisons.go +++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go @@ -35,22 +35,22 @@ import ( type binaryKernel func(left, right, out []byte, offset int) -type cmpFn[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, []RightT, []uint32) -type cmpScalarLeft[LeftT, RightT exec.FixedWidthTypes] func(LeftT, []RightT, []uint32) -type cmpScalarRight[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, RightT, []uint32) +type cmpFn[LeftT, RightT arrow.FixedWidthType] func([]LeftT, []RightT, []uint32) +type cmpScalarLeft[LeftT, RightT arrow.FixedWidthType] func(LeftT, []RightT, []uint32) +type cmpScalarRight[LeftT, RightT arrow.FixedWidthType] func([]LeftT, RightT, []uint32) -type cmpOp[T exec.FixedWidthTypes] struct { +type cmpOp[T arrow.FixedWidthType] struct { arrArr cmpFn[T, T] arrScalar cmpScalarRight[T, T] scalarArr cmpScalarLeft[T, T] } -func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKernel { +func comparePrimitiveArrayArray[T arrow.FixedWidthType](op cmpFn[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( - left = exec.GetData[T](leftBytes) - right = exec.GetData[T](rightBytes) + left = arrow.GetData[T](leftBytes) + right = arrow.GetData[T](rightBytes) nvals = len(left) nbatches = nvals / batchSize tmpOutput [batchSize]uint32 @@ -83,11 +83,11 @@ func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKe } } -func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T]) binaryKernel { +func comparePrimitiveArrayScalar[T arrow.FixedWidthType](op cmpScalarRight[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( - left = exec.GetData[T](leftBytes) + left = arrow.GetData[T](leftBytes) rightVal = *(*T)(unsafe.Pointer(&rightBytes[0])) nvals = len(left) nbatches = nvals / batchSize @@ -121,12 +121,12 @@ func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T] } } -func comparePrimitiveScalarArray[T exec.FixedWidthTypes](op cmpScalarLeft[T, T]) binaryKernel { +func comparePrimitiveScalarArray[T arrow.FixedWidthType](op cmpScalarLeft[T, T]) binaryKernel { return func(leftBytes, rightBytes, out []byte, offset int) { const batchSize = 32 var ( leftVal = *(*T)(unsafe.Pointer(&leftBytes[0])) - right = exec.GetData[T](rightBytes) + right = arrow.GetData[T](rightBytes) nvals = len(right) nbatches = nvals / batchSize @@ -181,7 +181,7 @@ func getOffsetSpanBytes(span *exec.ArraySpan) []byte { return buf[start : start+(span.Len*byteWidth)] } -func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { +func compareKernel[T arrow.FixedWidthType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { kn := ctx.Kernel.(*exec.ScalarKernel) knData := kn.Data.(CompareFuncData).Funcs() @@ -202,7 +202,7 @@ func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.Exec return nil } -func genGoCompareKernel[T exec.FixedWidthTypes](op *cmpOp[T]) *CompareData { +func genGoCompareKernel[T arrow.FixedWidthType](op *cmpOp[T]) *CompareData { return &CompareData{ funcAA: comparePrimitiveArrayArray(op.arrArr), funcAS: comparePrimitiveArrayScalar(op.arrScalar), @@ -376,7 +376,7 @@ func genDecimalCompareKernel[T decimal128.Num | decimal256.Num](op CompareOperat return } -func getCmpOp[T exec.NumericTypes](op CompareOperator) *cmpOp[T] { +func getCmpOp[T arrow.NumericType](op CompareOperator) *cmpOp[T] { switch op { case CmpEQ: return &cmpOp[T]{ @@ -524,7 +524,7 @@ func getBinaryCmp(op CompareOperator) binaryBinOp[bool] { return nil } -func numericCompareKernel[T exec.NumericTypes](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) { +func numericCompareKernel[T arrow.NumericType](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) { ex := compareKernel[T] kn = exec.NewScalarKernelWithSig(&exec.KernelSignature{ InputTypes: []exec.InputType{ty, ty}, diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go index 76da901e33f..d9cf52320b3 100644 --- a/go/arrow/compute/internal/kernels/string_casts.go +++ b/go/arrow/compute/internal/kernels/string_casts.go @@ -116,7 +116,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1) castNumericUnsafe(arrow.INT64, arrow.INT32, - exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets)) + arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets)) return nil default: // upcast from int32 -> int64 @@ -127,7 +127,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1) castNumericUnsafe(arrow.INT32, arrow.INT64, - exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets)) + arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets)) return nil } } @@ -201,8 +201,8 @@ func GetFsbCastKernels() []exec.ScalarKernel { func float16Formatter(v float16.Num) string { return v.String() } func date32Formatter(v arrow.Date32) string { return v.FormattedString() } func date64Formatter(v arrow.Date64) string { return v.FormattedString() } -func numericFormatterSigned[T exec.IntTypes](v T) string { return strconv.FormatInt(int64(v), 10) } -func numericFormatterUnsigned[T exec.UintTypes](v T) string { return strconv.FormatUint(uint64(v), 10) } +func numericFormatterSigned[T arrow.IntType](v T) string { return strconv.FormatInt(int64(v), 10) } +func numericFormatterUnsigned[T arrow.UintType](v T) string { return strconv.FormatUint(uint64(v), 10) } func float32Formatter(v float32) string { return strconv.FormatFloat(float64(v), 'g', -1, 32) } func float64Formatter(v float64) string { return strconv.FormatFloat(v, 'g', -1, 64) } @@ -247,7 +247,7 @@ func timeToStringCastExec[T timeIntrinsic](ctx *exec.KernelCtx, batch *exec.Exec return nil } -func numericToStringCastExec[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](formatter func(T) string) exec.ArrayKernelExec { +func numericToStringCastExec[T arrow.IntType | arrow.UintType | arrow.FloatType](formatter func(T) string) exec.ArrayKernelExec { return func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error { var ( input = &batch.Values[0].Array diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go index 9401e31cc5b..f6c9a7f39db 100644 --- a/go/arrow/compute/internal/kernels/vector_hash.go +++ b/go/arrow/compute/internal/kernels/vector_hash.go @@ -178,7 +178,7 @@ func doAppendFixedSize(action Action, memo hashing.MemoTable, arr *exec.ArraySpa }) } -func doAppendNumeric[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error { +func doAppendNumeric[T arrow.IntType | arrow.UintType | arrow.FloatType](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error { arrData := exec.GetSpanValues[T](arr, 1) shouldEncodeNulls := action.ShouldEncodeNulls() return bitutils.VisitBitBlocksShort(arr.Buffers[0].Buf, arr.Offset, arr.Len, diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go index 076bef13684..017b9712025 100644 --- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go +++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go @@ -46,18 +46,18 @@ type RunEndsType interface { int16 | int32 | int64 } -func readFixedWidthVal[V exec.FixedWidthTypes](inputValidity, inputValues []byte, offset int64, out *V) bool { +func readFixedWidthVal[V arrow.FixedWidthType](inputValidity, inputValues []byte, offset int64, out *V) bool { sz := int64(unsafe.Sizeof(*out)) *out = *(*V)(unsafe.Pointer(&inputValues[offset*sz])) return bitutil.BitIsSet(inputValidity, int(offset)) } -func writeFixedWidthVal[V exec.FixedWidthTypes](result *exec.ExecResult, offset int64, valid bool, value V) { +func writeFixedWidthVal[V arrow.FixedWidthType](result *exec.ExecResult, offset int64, valid bool, value V) { if len(result.Buffers[0].Buf) != 0 { bitutil.SetBitTo(result.Buffers[0].Buf, int(offset), valid) } - arr := exec.GetData[V](result.Buffers[1].Buf) + arr := arrow.GetData[V](result.Buffers[1].Buf) arr[offset] = value } @@ -73,7 +73,7 @@ func writeBoolVal(result *exec.ExecResult, offset int64, valid bool, value bool) bitutil.SetBitTo(result.Buffers[1].Buf, int(offset), value) } -type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] struct { +type runEndEncodeLoopFixedWidth[R RunEndsType, V arrow.FixedWidthType | bool] struct { inputLen, inputOffset int64 inputValidity []byte inputValues []byte @@ -84,7 +84,7 @@ type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] st } func (re *runEndEncodeLoopFixedWidth[R, V]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) readOffset := re.inputOffset var currentRun V @@ -155,7 +155,7 @@ func (re *runEndEncodeLoopFixedWidth[R, V]) PreallocOutput(ctx *exec.KernelCtx, valueBuffer = ctx.Allocate(int(numOutput) * bufSpec.ByteWidth) } - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) out.Release() *out = exec.ExecResult{ @@ -230,7 +230,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6 } valueBuffer := ctx.Allocate(re.width * int(numOutput)) - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) out.Release() *out = exec.ExecResult{ @@ -258,7 +258,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6 } func (re *runEndEncodeFSB[R]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) outputValues := out.Children[1].Buffers[1].Buf readOffset := re.inputOffset @@ -362,7 +362,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO valueBuffer := ctx.Allocate(int(re.estimatedValuesLen)) offsetsBuffer := ctx.Allocate(int(numOutput+1) * int(SizeOf[O]())) - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType) *out = exec.ExecResult{ Type: reeType, Len: re.inputLen, @@ -389,7 +389,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO } func (re *runEndEncodeLoopBinary[R, O]) WriteEncodedRuns(out *exec.ExecResult) int64 { - outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf) + outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf) outputOffsets := exec.GetSpanOffsets[O](&out.Children[1], 1) outputValues := out.Children[1].Buffers[2].Buf @@ -443,7 +443,7 @@ func validateRunEndType[R RunEndsType](length int64) error { return nil } -func createEncoder[R RunEndsType, V exec.FixedWidthTypes](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] { +func createEncoder[R RunEndsType, V arrow.FixedWidthType](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] { return &runEndEncodeLoopFixedWidth[R, V]{ inputLen: input.Len, inputOffset: input.Offset, @@ -539,7 +539,7 @@ func runEndEncodeImpl[R RunEndsType](ctx *exec.KernelCtx, batch *exec.ExecSpan, ) if inputLen == 0 { - reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), inputArr.Type) + reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), inputArr.Type) *out = exec.ExecResult{ Type: reeType, Children: []exec.ArraySpan{ diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go index 714e452325b..f08bb4100bf 100644 --- a/go/arrow/compute/internal/kernels/vector_selection.go +++ b/go/arrow/compute/internal/kernels/vector_selection.go @@ -99,12 +99,12 @@ type builder[T any] interface { UnsafeAppendBoolToBitmap(bool) } -func getTakeIndices[T exec.IntTypes | exec.UintTypes](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData { +func getTakeIndices[T arrow.IntType | arrow.UintType](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData { var ( filterData = filter.Buffers[1].Buf haveFilterNulls = filter.MayHaveNulls() filterIsValid = filter.Buffers[0].Buf - idxType = exec.GetDataType[T]() + idxType = arrow.GetDataType[T]() ) if haveFilterNulls && nullSelect == EmitNulls { @@ -394,7 +394,7 @@ func primitiveFilterImpl(wr writeFiltered, values *exec.ArraySpan, filter *exec. } } -type filterWriter[T exec.UintTypes] struct { +type filterWriter[T arrow.UintType] struct { outPosition int outOffset int valuesOffset int @@ -519,7 +519,7 @@ func PrimitiveFilter(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecRe return nil } -type primitiveGetter[T exec.IntTypes | bool] interface { +type primitiveGetter[T arrow.IntType | bool] interface { IsValid(int64) bool GetValue(int64) T NullCount() int64 @@ -542,7 +542,7 @@ func (b *boolGetter) GetValue(i int64) bool { func (b *boolGetter) NullCount() int64 { return b.inner.Nulls } func (b *boolGetter) Len() int64 { return b.inner.Len } -type primitiveGetterImpl[T exec.IntTypes] struct { +type primitiveGetterImpl[T arrow.IntType] struct { inner *exec.ArraySpan values []T } @@ -608,7 +608,7 @@ func (c *chunkedBoolGetter) GetValue(i int64) bool { func (c *chunkedBoolGetter) NullCount() int64 { return c.nulls } func (c *chunkedBoolGetter) Len() int64 { return c.len } -type chunkedPrimitiveGetter[T exec.IntTypes] struct { +type chunkedPrimitiveGetter[T arrow.IntType] struct { inner *arrow.Chunked resolver *exec.ChunkResolver nulls int64 @@ -619,7 +619,7 @@ type chunkedPrimitiveGetter[T exec.IntTypes] struct { valuesOffset []int64 } -func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] { +func newChunkedPrimitiveGetter[T arrow.IntType](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] { nchunks := len(arr.Chunks()) lengths := make([]int64, nchunks) valuesData := make([][]T, nchunks) @@ -630,7 +630,7 @@ func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrim lengths[i] = int64(c.Len()) valuesOffset[i] = int64(c.Data().Offset()) valuesIsValid[i] = c.NullBitmapBytes() - valuesData[i] = exec.GetValues[T](c.Data(), 1) + valuesData[i] = arrow.GetValues[T](c.Data(), 1) } return &chunkedPrimitiveGetter[T]{ @@ -662,7 +662,7 @@ func (c *chunkedPrimitiveGetter[T]) GetValue(i int64) T { func (c *chunkedPrimitiveGetter[T]) NullCount() int64 { return c.nulls } func (c *chunkedPrimitiveGetter[T]) Len() int64 { return c.len } -func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) { +func primitiveTakeImpl[IdxT arrow.UintType, ValT arrow.IntType](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) { var ( indicesData = exec.GetSpanValues[IdxT](indices, 1) indicesIsValid = indices.Buffers[0].Buf @@ -747,7 +747,7 @@ func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitive out.Nulls = out.Len - validCount } -func booleanTakeImpl[IdxT exec.UintTypes](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) { +func booleanTakeImpl[IdxT arrow.UintType](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) { var ( indicesData = exec.GetSpanValues[IdxT](indices, 1) indicesIsValid = indices.Buffers[0].Buf @@ -876,7 +876,7 @@ func booleanTakeDispatch(values, indices *exec.ArraySpan, out *exec.ExecResult) return nil } -func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked, out []*exec.ExecResult) error { +func takeIdxChunkedDispatch[ValT arrow.IntType](values, indices *arrow.Chunked, out []*exec.ExecResult) error { getter := newChunkedPrimitiveGetter[ValT](values) var fn func(primitiveGetter[ValT], *exec.ArraySpan, *exec.ExecResult) @@ -901,7 +901,7 @@ func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked, return nil } -func takeIdxDispatch[ValT exec.IntTypes](values, indices *exec.ArraySpan, out *exec.ExecResult) error { +func takeIdxDispatch[ValT arrow.IntType](values, indices *exec.ArraySpan, out *exec.ExecResult) error { getter := &primitiveGetterImpl[ValT]{inner: values, values: exec.GetSpanValues[ValT](values, 1)} switch indices.Type.(arrow.FixedWidthDataType).Bytes() { @@ -1368,7 +1368,7 @@ func binaryFilterImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, values, filter return nil } -func takeExecImpl[T exec.UintTypes](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error { +func takeExecImpl[T arrow.UintType](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error { var ( validityBuilder = validityBuilder{mem: exec.GetAllocator(ctx.Ctx)} indicesValues = exec.GetSpanValues[T](indices, 1) @@ -1600,7 +1600,7 @@ func ListImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan, out.Buffers[1].WrapBuffer(offsetBuilder.finish()) out.Children = make([]exec.ArraySpan, 1) - out.Children[0].Type = exec.GetDataType[OffsetT]() + out.Children[0].Type = arrow.GetDataType[OffsetT]() out.Children[0].Len = int64(childIdxBuilder.len()) out.Children[0].Buffers[1].WrapBuffer(childIdxBuilder.finish()) diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go index d209f72c800..1fa0591692e 100644 --- a/go/arrow/compute/scalar_compare_test.go +++ b/go/arrow/compute/scalar_compare_test.go @@ -89,7 +89,7 @@ func (c *CompareSuite) validateCompareScalarArr(op kernels.CompareOperator, dt a c.validateCompareDatum(op, lhs, &compute.ArrayDatum{rhs.Data()}, &compute.ArrayDatum{exp.Data()}) } -func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs, rhs T) bool { +func slowCompare[T arrow.NumericType | string](op kernels.CompareOperator, lhs, rhs T) bool { switch op { case kernels.CmpEQ: return lhs == rhs @@ -108,7 +108,7 @@ func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs, } } -// func simpleScalarArrayCompare[T exec.NumericTypes](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { +// func simpleScalarArrayCompare[T arrow.NumericType](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { // var ( // swap = lhs.Kind() == compute.KindArray // span exec.ArraySpan @@ -230,7 +230,7 @@ type valuer[T any] interface { Value(int) T } -func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { +func simpleArrArrCompare[T arrow.NumericType | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum { var ( lArr = lhs.(*compute.ArrayDatum).MakeArray() rArr = rhs.(*compute.ArrayDatum).MakeArray() @@ -263,7 +263,7 @@ func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op return compute.NewDatum(result) } -type NumericCompareSuite[T exec.NumericTypes] struct { +type NumericCompareSuite[T arrow.NumericType] struct { CompareSuite } @@ -282,7 +282,7 @@ type NumericCompareSuite[T exec.NumericTypes] struct { // } func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() one := compute.NewDatum(scalar.MakeScalar(T(1))) n.Run(dt.String(), func() { @@ -361,7 +361,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() { } func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() one := compute.NewDatum(scalar.MakeScalar(T(1))) n.Run(dt.String(), func() { @@ -440,7 +440,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() { } func (n *NumericCompareSuite[T]) TestNullScalar() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() null := compute.NewDatum(scalar.MakeNullScalar(dt)) n.Run(dt.String(), func() { @@ -453,7 +453,7 @@ func (n *NumericCompareSuite[T]) TestNullScalar() { } func (n *NumericCompareSuite[T]) TestSimpleCompareArrArr() { - dt := exec.GetDataType[T]() + dt := arrow.GetDataType[T]() n.Run(dt.String(), func() { n.validateCompare(kernels.CmpEQ, dt, `[]`, `[]`, `[]`) diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index 9410720de79..1ceed8d1fc0 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -36,7 +36,7 @@ import ( "golang.org/x/exp/constraints" ) -func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) { +func checkUniqueDict[I arrow.IntType | arrow.UintType](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) { out, err := compute.Unique(context.TODO(), input) require.NoError(t, err) defer out.Release() @@ -52,8 +52,8 @@ func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compu require.Truef(t, array.Equal(exDict, resultDict), "wanted: %s\ngot: %s", exDict, resultDict) - want := exec.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1) - got := exec.GetValues[I](result.Indices().Data(), 1) + want := arrow.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1) + got := arrow.GetValues[I](result.Indices().Data(), 1) assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got) } @@ -81,15 +81,15 @@ func checkDictionaryUnique(t *testing.T, input compute.ArrayLikeDatum, expected } } -func checkUniqueFixedWidth[T exec.FixedWidthTypes](t *testing.T, input, expected arrow.Array) { +func checkUniqueFixedWidth[T arrow.FixedWidthType](t *testing.T, input, expected arrow.Array) { result, err := compute.UniqueArray(context.TODO(), input) require.NoError(t, err) defer result.Release() require.Truef(t, arrow.TypeEqual(result.DataType(), expected.DataType()), "wanted: %s\ngot: %s", expected.DataType(), result.DataType()) - want := exec.GetValues[T](expected.Data(), 1) - got := exec.GetValues[T](expected.Data(), 1) + want := arrow.GetValues[T](expected.Data(), 1) + got := arrow.GetValues[T](expected.Data(), 1) assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got) } @@ -124,7 +124,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp } type ArrowType interface { - exec.FixedWidthTypes | string | []byte + arrow.FixedWidthType | string | []byte } type builder[T ArrowType] interface { @@ -166,7 +166,7 @@ func checkUniqueFixedSizeBinary(t *testing.T, mem memory.Allocator, dt *arrow.Fi assert.ElementsMatch(t, want, got) } -func checkUniqueFW[T exec.FixedWidthTypes](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) { +func checkUniqueFW[T arrow.FixedWidthType](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) { input := makeArray(mem, dt, inValues, inValid) defer input.Release() expected := makeArray(mem, dt, outValues, outValid) @@ -189,7 +189,7 @@ func checkUniqueVW[T string | []byte](t *testing.T, mem memory.Allocator, dt arr } } -type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Float] struct { +type PrimitiveHashKernelSuite[T arrow.IntType | arrow.UintType | constraints.Float] struct { suite.Suite mem *memory.CheckedAllocator @@ -197,7 +197,7 @@ type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Flo } func (ps *PrimitiveHashKernelSuite[T]) SetupSuite() { - ps.dt = exec.GetDataType[T]() + ps.dt = arrow.GetDataType[T]() } func (ps *PrimitiveHashKernelSuite[T]) SetupTest() { diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go index f44840ba720..4e38bc995cd 100644 --- a/go/arrow/compute/vector_selection_test.go +++ b/go/arrow/compute/vector_selection_test.go @@ -459,9 +459,9 @@ func (f *FilterKernelNumeric) TestFilterNumeric() { }) } -type comparator[T exec.NumericTypes] func(a, b T) bool +type comparator[T arrow.NumericType] func(a, b T) bool -func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T] { +func getComparator[T arrow.NumericType](op kernels.CompareOperator) comparator[T] { return []comparator[T]{ // EQUAL func(a, b T) bool { return a == b }, @@ -478,7 +478,7 @@ func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T }[int8(op)] } -func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { +func compareAndFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { filtered := make([]T, 0, len(data)) for _, v := range data { if fn(v) { @@ -488,12 +488,12 @@ func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, f return exec.ArrayFromSlice(mem, filtered) } -func compareAndFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { +func compareAndFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) return compareAndFilterImpl(mem, data, func(e T) bool { return cmp(e, val) }) } -func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { +func compareAndFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) i := 0 return compareAndFilterImpl(mem, data, func(e T) bool { @@ -503,7 +503,7 @@ func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, othe }) } -func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { +func createFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array { bldr := array.NewBooleanBuilder(mem) defer bldr.Release() for _, v := range data { @@ -512,12 +512,12 @@ func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn fu return bldr.NewArray() } -func createFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { +func createFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) return createFilterImpl(mem, data, func(e T) bool { return cmp(e, val) }) } -func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { +func createFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array { cmp := getComparator[T](op) i := 0 return createFilterImpl(mem, data, func(e T) bool { @@ -527,8 +527,8 @@ func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other [] }) } -func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) { - dt := exec.GetDataType[T]() +func compareScalarAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) { + dt := arrow.GetDataType[T]() rng := gen.NewRandomArrayGenerator(randomSeed, mem) t.Run("compare scalar and filter", func(t *testing.T) { @@ -537,7 +537,7 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem t.Run(fmt.Sprintf("random %d", length), func(t *testing.T) { arr := rng.Numeric(dt.ID(), length, 0, 100, 0) defer arr.Release() - data := exec.GetData[T](arr.Data().Buffers()[1].Bytes()) + data := arrow.GetData[T](arr.Data().Buffers()[1].Bytes()) for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} { selection := createFilterValue(mem, data, 50, op) defer selection.Release() @@ -556,8 +556,8 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem }) } -func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) { - dt := exec.GetDataType[T]() +func compareArrayAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) { + dt := arrow.GetDataType[T]() rng := gen.NewRandomArrayGenerator(randomSeed, mem) t.Run("compare array and filter", func(t *testing.T) { for i := 3; i < 10; i++ { @@ -568,8 +568,8 @@ func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem m rhs := rng.Numeric(dt.ID(), length, 0, 100, 0) defer rhs.Release() - data := exec.GetData[T](lhs.Data().Buffers()[1].Bytes()) - other := exec.GetData[T](rhs.Data().Buffers()[1].Bytes()) + data := arrow.GetData[T](lhs.Data().Buffers()[1].Bytes()) + other := arrow.GetData[T](rhs.Data().Buffers()[1].Bytes()) for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} { selection := createFilterSlice(mem, data, other, op) defer selection.Release() diff --git a/go/arrow/flight/doc.go b/go/arrow/flight/doc.go index 68d1ca3458f..c36a808b00e 100644 --- a/go/arrow/flight/doc.go +++ b/go/arrow/flight/doc.go @@ -74,5 +74,4 @@ // the main thread reset the timer every time a write operation completes successfully // (that means one needs to use to_batches() + write_batch and not write_table). - package flight diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go index 0438bca28be..745de90cd99 100644 --- a/go/arrow/flight/gen/flight/Flight.pb.go +++ b/go/arrow/flight/gen/flight/Flight.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" timestamppb "google.golang.org/protobuf/types/known/timestamppb" - reflect "reflect" - sync "sync" ) const ( @@ -38,7 +39,6 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// // The result of a cancel operation. // // This is used by CancelFlightInfoResult.status. @@ -103,7 +103,6 @@ func (CancelStatus) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{0} } -// // Describes what type of descriptor is defined. type FlightDescriptor_DescriptorType int32 @@ -161,7 +160,6 @@ func (FlightDescriptor_DescriptorType) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{12, 0} } -// // The request that a client provides to a server on handshake. type HandshakeRequest struct { state protoimpl.MessageState @@ -281,7 +279,6 @@ func (x *HandshakeResponse) GetPayload() []byte { return nil } -// // A message for doing simple auth. type BasicAuth struct { state protoimpl.MessageState @@ -376,7 +373,6 @@ func (*Empty) Descriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{3} } -// // Describes an available action, including both the name used for execution // along with a short description of the purpose of the action. type ActionType struct { @@ -434,7 +430,6 @@ func (x *ActionType) GetDescription() string { return "" } -// // A service specific expression that can be used to return a limited set // of available Arrow Flight streams. type Criteria struct { @@ -484,7 +479,6 @@ func (x *Criteria) GetExpression() []byte { return nil } -// // An opaque action specific for the service. type Action struct { state protoimpl.MessageState @@ -541,7 +535,6 @@ func (x *Action) GetBody() []byte { return nil } -// // The request of the CancelFlightInfo action. // // The request should be stored in Action.body. @@ -592,7 +585,6 @@ func (x *CancelFlightInfoRequest) GetInfo() *FlightInfo { return nil } -// // The request of the RenewFlightEndpoint action. // // The request should be stored in Action.body. @@ -643,7 +635,6 @@ func (x *RenewFlightEndpointRequest) GetEndpoint() *FlightEndpoint { return nil } -// // An opaque result returned after executing an action. type Result struct { state protoimpl.MessageState @@ -692,7 +683,6 @@ func (x *Result) GetBody() []byte { return nil } -// // The result of the CancelFlightInfo action. // // The result should be stored in Result.body. @@ -743,7 +733,6 @@ func (x *CancelFlightInfoResult) GetStatus() CancelStatus { return CancelStatus_CANCEL_STATUS_UNSPECIFIED } -// // Wrap the result of a getSchema call type SchemaResult struct { state protoimpl.MessageState @@ -796,7 +785,6 @@ func (x *SchemaResult) GetSchema() []byte { return nil } -// // The name or tag for a Flight. May be used as a way to retrieve or generate // a flight or be used to expose a set of previously defined flights. type FlightDescriptor struct { @@ -868,7 +856,6 @@ func (x *FlightDescriptor) GetPath() []string { return nil } -// // The access coordinates for retrieval of a dataset. With a FlightInfo, a // consumer is able to determine how to retrieve a dataset. type FlightInfo struct { @@ -1000,7 +987,6 @@ func (x *FlightInfo) GetAppMetadata() []byte { return nil } -// // The information to process a long-running query. type PollInfo struct { state protoimpl.MessageState @@ -1100,7 +1086,6 @@ func (x *PollInfo) GetExpirationTime() *timestamppb.Timestamp { return nil } -// // A particular stream or split associated with a flight. type FlightEndpoint struct { state protoimpl.MessageState @@ -1196,7 +1181,6 @@ func (x *FlightEndpoint) GetAppMetadata() []byte { return nil } -// // A location where a Flight service will accept retrieval of a particular // stream given a ticket. type Location struct { @@ -1246,7 +1230,6 @@ func (x *Location) GetUri() string { return "" } -// // An opaque identifier that the service can use to retrieve a particular // portion of a stream. // @@ -1299,7 +1282,6 @@ func (x *Ticket) GetTicket() []byte { return nil } -// // A batch of Arrow data as part of a stream of batches. type FlightData struct { state protoimpl.MessageState @@ -1384,7 +1366,7 @@ func (x *FlightData) GetDataBody() []byte { return nil } -//* +// * // The response message associated with the submission of a DoPut. type PutResult struct { state protoimpl.MessageState diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go index 279dc29c426..fb81a0dc9ed 100644 --- a/go/arrow/flight/gen/flight/FlightSql.pb.go +++ b/go/arrow/flight/gen/flight/FlightSql.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" descriptorpb "google.golang.org/protobuf/types/descriptorpb" - reflect "reflect" - sync "sync" ) const ( @@ -1702,7 +1703,7 @@ func (SqlSupportsConvert) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{16} } -//* +// * // The JDBC/ODBC-defined type of any object. // All the values here are the same as in the JDBC and ODBC specs. type XdbcDataType int32 @@ -1817,7 +1818,7 @@ func (XdbcDataType) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{17} } -//* +// * // Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL. type XdbcDatetimeSubcode int32 @@ -2293,22 +2294,23 @@ func (ActionCancelQueryResult_CancelResult) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{29, 0} } -// // Represents a metadata request. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the metadata request. // // The returned Arrow schema will be: // < -// info_name: uint32 not null, -// value: dense_union< -// string_value: utf8, -// bool_value: bool, -// bigint_value: int64, -// int32_bitmask: int32, -// string_list: list -// int32_to_int32_list_map: map> +// +// info_name: uint32 not null, +// value: dense_union< +// string_value: utf8, +// bool_value: bool, +// bigint_value: int64, +// int32_bitmask: int32, +// string_list: list +// int32_to_int32_list_map: map> +// // > // where there is one row per requested piece of metadata information. type CommandGetSqlInfo struct { @@ -2376,61 +2378,62 @@ func (x *CommandGetSqlInfo) GetInfo() []uint32 { return nil } -// // Represents a request to retrieve information about data type supported on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned schema will be: // < -// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), -// data_type: int32 not null (The SQL data type), -// column_size: int32 (The maximum size supported by that column. -// In case of exact numeric types, this represents the maximum precision. -// In case of string types, this represents the character length. -// In case of datetime data types, this represents the length in characters of the string representation. -// NULL is returned for data types where column size is not applicable.), -// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for -// data types where a literal prefix is not applicable.), -// literal_suffix: utf8 (Character or characters used to terminate a literal, -// NULL is returned for data types where a literal suffix is not applicable.), -// create_params: list -// (A list of keywords corresponding to which parameters can be used when creating -// a column for that specific type. -// NULL is returned if there are no parameters for the data type definition.), -// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the -// Nullable enum.), -// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), -// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the -// Searchable enum.), -// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is -// not applicable to the data type or the data type is not numeric.), -// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), -// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute -// is not applicable to the data type or the data type is not numeric.), -// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL -// is returned if a localized name is not supported by the data source), -// minimum_scale: int32 (The minimum scale of the data type on the data source. -// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE -// columns both contain this value. NULL is returned if scale is not applicable.), -// maximum_scale: int32 (The maximum scale of the data type on the data source. -// NULL is returned if scale is not applicable.), -// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values -// as data_type value. Except for interval and datetime, which -// uses generic values. More info about those types can be -// obtained through datetime_subcode. The possible values can be seen -// in the XdbcDataType enum.), -// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains -// its sub types. For type different from interval and datetime, this value -// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), -// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains -// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For -// exact numeric types, this column contains the value 10 to indicate that -// column size specifies a number of decimal digits. Otherwise, this column is NULL.), -// interval_precision: int32 (If the data type is an interval data type, then this column contains the value -// of the interval leading precision. Otherwise, this column is NULL. This fields -// is only relevant to be used by ODBC). +// +// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), +// data_type: int32 not null (The SQL data type), +// column_size: int32 (The maximum size supported by that column. +// In case of exact numeric types, this represents the maximum precision. +// In case of string types, this represents the character length. +// In case of datetime data types, this represents the length in characters of the string representation. +// NULL is returned for data types where column size is not applicable.), +// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for +// data types where a literal prefix is not applicable.), +// literal_suffix: utf8 (Character or characters used to terminate a literal, +// NULL is returned for data types where a literal suffix is not applicable.), +// create_params: list +// (A list of keywords corresponding to which parameters can be used when creating +// a column for that specific type. +// NULL is returned if there are no parameters for the data type definition.), +// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the +// Nullable enum.), +// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), +// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the +// Searchable enum.), +// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is +// not applicable to the data type or the data type is not numeric.), +// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), +// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute +// is not applicable to the data type or the data type is not numeric.), +// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL +// is returned if a localized name is not supported by the data source), +// minimum_scale: int32 (The minimum scale of the data type on the data source. +// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE +// columns both contain this value. NULL is returned if scale is not applicable.), +// maximum_scale: int32 (The maximum scale of the data type on the data source. +// NULL is returned if scale is not applicable.), +// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values +// as data_type value. Except for interval and datetime, which +// uses generic values. More info about those types can be +// obtained through datetime_subcode. The possible values can be seen +// in the XdbcDataType enum.), +// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains +// its sub types. For type different from interval and datetime, this value +// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), +// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains +// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For +// exact numeric types, this column contains the value 10 to indicate that +// column size specifies a number of decimal digits. Otherwise, this column is NULL.), +// interval_precision: int32 (If the data type is an interval data type, then this column contains the value +// of the interval leading precision. Otherwise, this column is NULL. This fields +// is only relevant to be used by ODBC). +// // > // The returned data should be ordered by data_type and then by type_name. type CommandGetXdbcTypeInfo struct { @@ -2482,16 +2485,17 @@ func (x *CommandGetXdbcTypeInfo) GetDataType() int32 { return 0 } -// // Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend. // The definition of a catalog depends on vendor/implementation. It is usually the database itself // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8 not null +// +// catalog_name: utf8 not null +// // > // The returned data should be ordered by catalog_name. type CommandGetCatalogs struct { @@ -2532,17 +2536,18 @@ func (*CommandGetCatalogs) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{2} } -// // Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend. // The definition of a database schema depends on vendor/implementation. It is usually a collection of tables. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8 not null +// +// catalog_name: utf8, +// db_schema_name: utf8 not null +// // > // The returned data should be ordered by catalog_name, then db_schema_name. type CommandGetDbSchemas struct { @@ -2610,32 +2615,34 @@ func (x *CommandGetDbSchemas) GetDbSchemaFilterPattern() string { return "" } -// // Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// table_type: utf8 not null, -// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, -// it is serialized as an IPC message.) +// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// table_type: utf8 not null, +// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, +// it is serialized as an IPC message.) +// // > // Fields on table_schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// // The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. type CommandGetTables struct { state protoimpl.MessageState @@ -2737,17 +2744,18 @@ func (x *CommandGetTables) GetIncludeSchema() bool { return false } -// // Represents a request to retrieve the list of table types on a Flight SQL enabled backend. // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. // TABLE, VIEW, and SYSTEM TABLE are commonly supported. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// table_type: utf8 not null +// +// table_type: utf8 not null +// // > // The returned data should be ordered by table_type. type CommandGetTableTypes struct { @@ -2788,20 +2796,21 @@ func (*CommandGetTableTypes) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{5} } -// // Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// column_name: utf8 not null, -// key_name: utf8, -// key_sequence: int32 not null +// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// column_name: utf8 not null, +// key_name: utf8, +// key_sequence: int32 not null +// // > // The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. type CommandGetPrimaryKeys struct { @@ -2876,28 +2885,29 @@ func (x *CommandGetPrimaryKeys) GetTable() string { return "" } -// // Represents a request to retrieve a description of the foreign key columns that reference the given table's // primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. @@ -2973,35 +2983,36 @@ func (x *CommandGetExportedKeys) GetTable() string { return "" } -// // Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetImportedKeys struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3074,37 +3085,38 @@ func (x *CommandGetImportedKeys) GetTable() string { return "" } -// // Represents a request to retrieve a description of the foreign key columns in the given foreign key table that // reference the primary key or the columns representing a unique constraint of the parent table (could be the same // or a different table) on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null +// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null +// // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetCrossReference struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3212,7 +3224,6 @@ func (x *CommandGetCrossReference) GetFkTable() string { return "" } -// // Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. type ActionCreatePreparedStatementRequest struct { state protoimpl.MessageState @@ -3272,7 +3283,6 @@ func (x *ActionCreatePreparedStatementRequest) GetTransactionId() []byte { return nil } -// // An embedded message describing a Substrait plan to execute. type SubstraitPlan struct { state protoimpl.MessageState @@ -3336,7 +3346,6 @@ func (x *SubstraitPlan) GetVersion() string { return "" } -// // Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. type ActionCreatePreparedSubstraitPlanRequest struct { state protoimpl.MessageState @@ -3396,7 +3405,6 @@ func (x *ActionCreatePreparedSubstraitPlanRequest) GetTransactionId() []byte { return nil } -// // Wrap the result of a "CreatePreparedStatement" or "CreatePreparedSubstraitPlan" action. // // The resultant PreparedStatement can be closed either: @@ -3472,7 +3480,6 @@ func (x *ActionCreatePreparedStatementResult) GetParameterSchema() []byte { return nil } -// // Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend. // Closes server resources associated with the prepared statement handle. type ActionClosePreparedStatementRequest struct { @@ -3523,7 +3530,6 @@ func (x *ActionClosePreparedStatementRequest) GetPreparedStatementHandle() []byt return nil } -// // Request message for the "BeginTransaction" action. // Begins a transaction. type ActionBeginTransactionRequest struct { @@ -3564,7 +3570,6 @@ func (*ActionBeginTransactionRequest) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{15} } -// // Request message for the "BeginSavepoint" action. // Creates a savepoint within a transaction. // @@ -3627,7 +3632,6 @@ func (x *ActionBeginSavepointRequest) GetName() string { return "" } -// // The result of a "BeginTransaction" action. // // The transaction can be manipulated with the "EndTransaction" action, or @@ -3683,7 +3687,6 @@ func (x *ActionBeginTransactionResult) GetTransactionId() []byte { return nil } -// // The result of a "BeginSavepoint" action. // // The transaction can be manipulated with the "EndSavepoint" action. @@ -3739,7 +3742,6 @@ func (x *ActionBeginSavepointResult) GetSavepointId() []byte { return nil } -// // Request message for the "EndTransaction" action. // // Commit (COMMIT) or rollback (ROLLBACK) the transaction. @@ -3803,7 +3805,6 @@ func (x *ActionEndTransactionRequest) GetAction() ActionEndTransactionRequest_En return ActionEndTransactionRequest_END_TRANSACTION_UNSPECIFIED } -// // Request message for the "EndSavepoint" action. // // Release (RELEASE) the savepoint or rollback (ROLLBACK) to the @@ -3869,22 +3870,21 @@ func (x *ActionEndSavepointRequest) GetAction() ActionEndSavepointRequest_EndSav return ActionEndSavepointRequest_END_SAVEPOINT_UNSPECIFIED } -// // Represents a SQL query. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. type CommandStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3942,23 +3942,22 @@ func (x *CommandStatementQuery) GetTransactionId() []byte { return nil } -// // Represents a Substrait plan. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. -// - DoPut: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. +// - DoPut: execute the query. type CommandStatementSubstraitPlan struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4016,7 +4015,7 @@ func (x *CommandStatementSubstraitPlan) GetTransactionId() []byte { return nil } -//* +// * // Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery. // This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. type TicketStatementQuery struct { @@ -4067,23 +4066,22 @@ func (x *TicketStatementQuery) GetStatementHandle() []byte { return nil } -// // Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for // the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. -// - GetFlightInfo: execute the prepared statement instance. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. +// - GetFlightInfo: execute the prepared statement instance. type CommandPreparedStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4132,7 +4130,6 @@ func (x *CommandPreparedStatementQuery) GetPreparedStatementHandle() []byte { return nil } -// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included SQL update. type CommandStatementUpdate struct { @@ -4192,7 +4189,6 @@ func (x *CommandStatementUpdate) GetTransactionId() []byte { return nil } -// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included // prepared statement handle as an update. @@ -4244,7 +4240,6 @@ func (x *CommandPreparedStatementUpdate) GetPreparedStatementHandle() []byte { return nil } -// // Returned from the RPC call DoPut when a CommandStatementUpdate // CommandPreparedStatementUpdate was in the request, containing // results from the update. @@ -4297,7 +4292,6 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 { return 0 } -// // Request message for the "CancelQuery" action. // // Explicitly cancel a running query. @@ -4366,7 +4360,6 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte { return nil } -// // The result of cancelling a query. // // The result should be wrapped in a google.protobuf.Any message. diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go index 87d9abc5926..34fcd5d09c9 100644 --- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go +++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go @@ -8,6 +8,7 @@ package flight import ( context "context" + grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go index c9c8b390a86..3e1da64dcf8 100644 --- a/go/arrow/flight/server.go +++ b/go/arrow/flight/server.go @@ -42,7 +42,7 @@ type ( FlightEndpoint = flight.FlightEndpoint Location = flight.Location FlightInfo = flight.FlightInfo - PollInfo = flight.PollInfo + PollInfo = flight.PollInfo FlightData = flight.FlightData PutResult = flight.PutResult Ticket = flight.Ticket diff --git a/go/arrow/gen-flatbuffers.go b/go/arrow/gen-flatbuffers.go index 5c8eba4a247..720016e0bf1 100644 --- a/go/arrow/gen-flatbuffers.go +++ b/go/arrow/gen-flatbuffers.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ignore // +build ignore package main diff --git a/go/arrow/internal/debug/assert_off.go b/go/arrow/internal/debug/assert_off.go index 52b9a233169..1450ecc98a2 100644 --- a/go/arrow/internal/debug/assert_off.go +++ b/go/arrow/internal/debug/assert_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !assert // +build !assert package debug diff --git a/go/arrow/internal/debug/assert_on.go b/go/arrow/internal/debug/assert_on.go index 2aa5d6ace4c..4a57169b313 100644 --- a/go/arrow/internal/debug/assert_on.go +++ b/go/arrow/internal/debug/assert_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build assert // +build assert package debug diff --git a/go/arrow/internal/debug/doc.go b/go/arrow/internal/debug/doc.go index 3ee1783ca4b..094e427a22e 100644 --- a/go/arrow/internal/debug/doc.go +++ b/go/arrow/internal/debug/doc.go @@ -17,14 +17,12 @@ /* Package debug provides APIs for conditional runtime assertions and debug logging. - -Using Assert +# Using Assert To enable runtime assertions, build with the assert tag. When the assert tag is omitted, the code for the assertion will be omitted from the binary. - -Using Log +# Using Log To enable runtime debug logs, build with the debug tag. When the debug tag is omitted, the code for logging will be omitted from the binary. diff --git a/go/arrow/internal/debug/log_off.go b/go/arrow/internal/debug/log_off.go index 48da8e1ee94..760a5cdc0dc 100644 --- a/go/arrow/internal/debug/log_off.go +++ b/go/arrow/internal/debug/log_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !debug // +build !debug package debug diff --git a/go/arrow/internal/debug/log_on.go b/go/arrow/internal/debug/log_on.go index 99d0c8ae33f..2588e7d1069 100644 --- a/go/arrow/internal/debug/log_on.go +++ b/go/arrow/internal/debug/log_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug // +build debug package debug diff --git a/go/arrow/internal/debug/util.go b/go/arrow/internal/debug/util.go index 7bd3d5389e6..ea4eba7fb5c 100644 --- a/go/arrow/internal/debug/util.go +++ b/go/arrow/internal/debug/util.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug || assert // +build debug assert package debug diff --git a/go/arrow/internal/flatbuf/Binary.go b/go/arrow/internal/flatbuf/Binary.go index e8018e74c41..95e015595b5 100644 --- a/go/arrow/internal/flatbuf/Binary.go +++ b/go/arrow/internal/flatbuf/Binary.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Opaque binary data +// / Opaque binary data type Binary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/BinaryView.go b/go/arrow/internal/flatbuf/BinaryView.go index 09ca5e7db96..f6906674bdb 100644 --- a/go/arrow/internal/flatbuf/BinaryView.go +++ b/go/arrow/internal/flatbuf/BinaryView.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Binary, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Binary, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type BinaryView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Block.go b/go/arrow/internal/flatbuf/Block.go index 57a697b1968..8e33d3e6415 100644 --- a/go/arrow/internal/flatbuf/Block.go +++ b/go/arrow/internal/flatbuf/Block.go @@ -35,31 +35,34 @@ func (rcv *Block) Table() flatbuffers.Table { return rcv._tab.Table } -/// Index to the start of the RecordBlock (note this is past the Message header) +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// Index to the start of the RecordBlock (note this is past the Message header) + +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// Length of the metadata +// / Length of the metadata func (rcv *Block) MetaDataLength() int32 { return rcv._tab.GetInt32(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// Length of the metadata + +// / Length of the metadata func (rcv *Block) MutateMetaDataLength(n int32) bool { return rcv._tab.MutateInt32(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) BodyLength() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(16)) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). + +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) MutateBodyLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(16), n) } diff --git a/go/arrow/internal/flatbuf/BodyCompression.go b/go/arrow/internal/flatbuf/BodyCompression.go index 6468e231352..c23c2919021 100644 --- a/go/arrow/internal/flatbuf/BodyCompression.go +++ b/go/arrow/internal/flatbuf/BodyCompression.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Optional compression for the memory buffers constituting IPC message -/// bodies. Intended for use with RecordBatch but could be used for other -/// message types +// / Optional compression for the memory buffers constituting IPC message +// / bodies. Intended for use with RecordBatch but could be used for other +// / message types type BodyCompression struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *BodyCompression) Table() flatbuffers.Table { return rcv._tab } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) Codec() CompressionType { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *BodyCompression) Codec() CompressionType { return 0 } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) MutateCodec(n CompressionType) bool { return rcv._tab.MutateInt8Slot(4, int8(n)) } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) Method() BodyCompressionMethod { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -70,7 +70,7 @@ func (rcv *BodyCompression) Method() BodyCompressionMethod { return 0 } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) MutateMethod(n BodyCompressionMethod) bool { return rcv._tab.MutateInt8Slot(6, int8(n)) } diff --git a/go/arrow/internal/flatbuf/BodyCompressionMethod.go b/go/arrow/internal/flatbuf/BodyCompressionMethod.go index 108ab3e07fb..bb7234b3989 100644 --- a/go/arrow/internal/flatbuf/BodyCompressionMethod.go +++ b/go/arrow/internal/flatbuf/BodyCompressionMethod.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// Provided for forward compatibility in case we need to support different -/// strategies for compressing the IPC message body (like whole-body -/// compression rather than buffer-level) in the future +// / Provided for forward compatibility in case we need to support different +// / strategies for compressing the IPC message body (like whole-body +// / compression rather than buffer-level) in the future type BodyCompressionMethod int8 const ( diff --git a/go/arrow/internal/flatbuf/Buffer.go b/go/arrow/internal/flatbuf/Buffer.go index eba8d99b28e..e650e06a570 100644 --- a/go/arrow/internal/flatbuf/Buffer.go +++ b/go/arrow/internal/flatbuf/Buffer.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Buffer represents a single contiguous memory segment +// / ---------------------------------------------------------------------- +// / A Buffer represents a single contiguous memory segment type Buffer struct { _tab flatbuffers.Struct } @@ -37,30 +37,32 @@ func (rcv *Buffer) Table() flatbuffers.Table { return rcv._tab.Table } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts + +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. + +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/Date.go b/go/arrow/internal/flatbuf/Date.go index 32983ec54cc..985a8f79955 100644 --- a/go/arrow/internal/flatbuf/Date.go +++ b/go/arrow/internal/flatbuf/Date.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Date is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: -/// -/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no -/// leap seconds), where the values are evenly divisible by 86400000 -/// * Days (32 bits) since the UNIX epoch +// / Date is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +// / +// / * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +// / leap seconds), where the values are evenly divisible by 86400000 +// / * Days (32 bits) since the UNIX epoch type Date struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Decimal.go b/go/arrow/internal/flatbuf/Decimal.go index c9de254d1dc..2fc9d5ad658 100644 --- a/go/arrow/internal/flatbuf/Decimal.go +++ b/go/arrow/internal/flatbuf/Decimal.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Exact decimal value represented as an integer value in two's -/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers -/// are used. The representation uses the endianness indicated -/// in the Schema. +// / Exact decimal value represented as an integer value in two's +// / complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +// / are used. The representation uses the endianness indicated +// / in the Schema. type Decimal struct { _tab flatbuffers.Table } @@ -46,7 +46,7 @@ func (rcv *Decimal) Table() flatbuffers.Table { return rcv._tab } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) Precision() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,12 +55,12 @@ func (rcv *Decimal) Precision() int32 { return 0 } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) MutatePrecision(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) Scale() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -69,13 +69,13 @@ func (rcv *Decimal) Scale() int32 { return 0 } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) MutateScale(n int32) bool { return rcv._tab.MutateInt32Slot(6, n) } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) BitWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,8 +84,8 @@ func (rcv *Decimal) BitWidth() int32 { return 128 } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) MutateBitWidth(n int32) bool { return rcv._tab.MutateInt32Slot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryBatch.go b/go/arrow/internal/flatbuf/DictionaryBatch.go index 25b5384e46a..999c5fda463 100644 --- a/go/arrow/internal/flatbuf/DictionaryBatch.go +++ b/go/arrow/internal/flatbuf/DictionaryBatch.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// For sending dictionary encoding information. Any Field can be -/// dictionary-encoded, but in this case none of its children may be -/// dictionary-encoded. -/// There is one vector / column per dictionary, but that vector / column -/// may be spread across multiple dictionary batches by using the isDelta -/// flag +// / For sending dictionary encoding information. Any Field can be +// / dictionary-encoded, but in this case none of its children may be +// / dictionary-encoded. +// / There is one vector / column per dictionary, but that vector / column +// / may be spread across multiple dictionary batches by using the isDelta +// / flag type DictionaryBatch struct { _tab flatbuffers.Table } @@ -73,9 +73,9 @@ func (rcv *DictionaryBatch) Data(obj *RecordBatch) *RecordBatch { return nil } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) IsDelta() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,9 +84,9 @@ func (rcv *DictionaryBatch) IsDelta() bool { return false } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) MutateIsDelta(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryEncoding.go b/go/arrow/internal/flatbuf/DictionaryEncoding.go index a9b09530b2a..44c3874219f 100644 --- a/go/arrow/internal/flatbuf/DictionaryEncoding.go +++ b/go/arrow/internal/flatbuf/DictionaryEncoding.go @@ -42,9 +42,9 @@ func (rcv *DictionaryEncoding) Table() flatbuffers.Table { return rcv._tab } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) Id() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -53,18 +53,18 @@ func (rcv *DictionaryEncoding) Id() int64 { return 0 } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) MutateId(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -78,15 +78,15 @@ func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { return nil } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) IsOrdered() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -95,10 +95,10 @@ func (rcv *DictionaryEncoding) IsOrdered() bool { return false } -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) MutateIsOrdered(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryKind.go b/go/arrow/internal/flatbuf/DictionaryKind.go index 126ba5f7f6b..68251005156 100644 --- a/go/arrow/internal/flatbuf/DictionaryKind.go +++ b/go/arrow/internal/flatbuf/DictionaryKind.go @@ -20,11 +20,11 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Dictionary encoding metadata -/// Maintained for forwards compatibility, in the future -/// Dictionaries might be explicit maps between integers and values -/// allowing for non-contiguous index values +// / ---------------------------------------------------------------------- +// / Dictionary encoding metadata +// / Maintained for forwards compatibility, in the future +// / Dictionaries might be explicit maps between integers and values +// / allowing for non-contiguous index values type DictionaryKind int16 const ( diff --git a/go/arrow/internal/flatbuf/Endianness.go b/go/arrow/internal/flatbuf/Endianness.go index cefa2ff9c06..c9619b7b0d9 100644 --- a/go/arrow/internal/flatbuf/Endianness.go +++ b/go/arrow/internal/flatbuf/Endianness.go @@ -20,8 +20,8 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Endianness of the platform producing the data +// / ---------------------------------------------------------------------- +// / Endianness of the platform producing the data type Endianness int16 const ( diff --git a/go/arrow/internal/flatbuf/Feature.go b/go/arrow/internal/flatbuf/Feature.go index ae5a0398b60..2204c440ed4 100644 --- a/go/arrow/internal/flatbuf/Feature.go +++ b/go/arrow/internal/flatbuf/Feature.go @@ -20,35 +20,35 @@ package flatbuf import "strconv" -/// Represents Arrow Features that might not have full support -/// within implementations. This is intended to be used in -/// two scenarios: -/// 1. A mechanism for readers of Arrow Streams -/// and files to understand that the stream or file makes -/// use of a feature that isn't supported or unknown to -/// the implementation (and therefore can meet the Arrow -/// forward compatibility guarantees). -/// 2. A means of negotiating between a client and server -/// what features a stream is allowed to use. The enums -/// values here are intented to represent higher level -/// features, additional details maybe negotiated -/// with key-value pairs specific to the protocol. -/// -/// Enums added to this list should be assigned power-of-two values -/// to facilitate exchanging and comparing bitmaps for supported -/// features. +// / Represents Arrow Features that might not have full support +// / within implementations. This is intended to be used in +// / two scenarios: +// / 1. A mechanism for readers of Arrow Streams +// / and files to understand that the stream or file makes +// / use of a feature that isn't supported or unknown to +// / the implementation (and therefore can meet the Arrow +// / forward compatibility guarantees). +// / 2. A means of negotiating between a client and server +// / what features a stream is allowed to use. The enums +// / values here are intented to represent higher level +// / features, additional details maybe negotiated +// / with key-value pairs specific to the protocol. +// / +// / Enums added to this list should be assigned power-of-two values +// / to facilitate exchanging and comparing bitmaps for supported +// / features. type Feature int64 const ( /// Needed to make flatbuffers happy. - FeatureUNUSED Feature = 0 + FeatureUNUSED Feature = 0 /// The stream makes use of multiple full dictionaries with the /// same ID and assumes clients implement dictionary replacement /// correctly. FeatureDICTIONARY_REPLACEMENT Feature = 1 /// The stream makes use of compressed bodies as described /// in Message.fbs. - FeatureCOMPRESSED_BODY Feature = 2 + FeatureCOMPRESSED_BODY Feature = 2 ) var EnumNamesFeature = map[Feature]string{ diff --git a/go/arrow/internal/flatbuf/Field.go b/go/arrow/internal/flatbuf/Field.go index c03cf2f878b..8aed29bc481 100644 --- a/go/arrow/internal/flatbuf/Field.go +++ b/go/arrow/internal/flatbuf/Field.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A field represents a named column in a record / row batch or child of a -/// nested type. +// / ---------------------------------------------------------------------- +// / A field represents a named column in a record / row batch or child of a +// / nested type. type Field struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *Field) Table() flatbuffers.Table { return rcv._tab } -/// Name is not required, in i.e. a List +// / Name is not required, in i.e. a List func (rcv *Field) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,8 +54,8 @@ func (rcv *Field) Name() []byte { return nil } -/// Name is not required, in i.e. a List -/// Whether or not this field can contain nulls. Should be true in general. +// / Name is not required, in i.e. a List +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) Nullable() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -64,7 +64,7 @@ func (rcv *Field) Nullable() bool { return false } -/// Whether or not this field can contain nulls. Should be true in general. +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) MutateNullable(n bool) bool { return rcv._tab.MutateBoolSlot(6, n) } @@ -81,7 +81,7 @@ func (rcv *Field) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(8, byte(n)) } -/// This is the type of the decoded value if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. func (rcv *Field) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -91,8 +91,8 @@ func (rcv *Field) Type(obj *flatbuffers.Table) bool { return false } -/// This is the type of the decoded value if the field is dictionary encoded. -/// Present only if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. +// / Present only if the field is dictionary encoded. func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -106,9 +106,9 @@ func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { return nil } -/// Present only if the field is dictionary encoded. -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. +// / Present only if the field is dictionary encoded. +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. func (rcv *Field) Children(obj *Field, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -129,9 +129,9 @@ func (rcv *Field) ChildrenLength() int { return 0 } -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. -/// User-defined metadata +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. +// / User-defined metadata func (rcv *Field) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -152,7 +152,7 @@ func (rcv *Field) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FieldStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/FieldNode.go b/go/arrow/internal/flatbuf/FieldNode.go index 606b30bfebb..0e258a3d2cd 100644 --- a/go/arrow/internal/flatbuf/FieldNode.go +++ b/go/arrow/internal/flatbuf/FieldNode.go @@ -22,15 +22,15 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for describing a table row batch (a collection of -/// equal-length Arrow arrays) -/// Metadata about a field at some level of a nested type tree (but not -/// its children). -/// -/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` -/// would have {length: 5, null_count: 2} for its List node, and {length: 6, -/// null_count: 0} for its Int16 node, as separate FieldNode structs +// / ---------------------------------------------------------------------- +// / Data structures for describing a table row batch (a collection of +// / equal-length Arrow arrays) +// / Metadata about a field at some level of a nested type tree (but not +// / its children). +// / +// / For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +// / would have {length: 5, null_count: 2} for its List node, and {length: 6, +// / null_count: 0} for its Int16 node, as separate FieldNode structs type FieldNode struct { _tab flatbuffers.Struct } @@ -44,26 +44,28 @@ func (rcv *FieldNode) Table() flatbuffers.Table { return rcv._tab.Table } -/// The number of value slots in the Arrow array at this level of a nested -/// tree +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The number of value slots in the Arrow array at this level of a nested -/// tree + +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) NullCount() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. + +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) MutateNullCount(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeBinary.go b/go/arrow/internal/flatbuf/FixedSizeBinary.go index 4e660d5077f..2725dfb90b9 100644 --- a/go/arrow/internal/flatbuf/FixedSizeBinary.go +++ b/go/arrow/internal/flatbuf/FixedSizeBinary.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeBinary) Table() flatbuffers.Table { return rcv._tab } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) ByteWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeBinary) ByteWidth() int32 { return 0 } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) MutateByteWidth(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeList.go b/go/arrow/internal/flatbuf/FixedSizeList.go index dabf5cc8581..534ca27f2fe 100644 --- a/go/arrow/internal/flatbuf/FixedSizeList.go +++ b/go/arrow/internal/flatbuf/FixedSizeList.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeList) Table() flatbuffers.Table { return rcv._tab } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) ListSize() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeList) ListSize() int32 { return 0 } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) MutateListSize(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/Footer.go b/go/arrow/internal/flatbuf/Footer.go index 65b0ff09546..d65af41e7f6 100644 --- a/go/arrow/internal/flatbuf/Footer.go +++ b/go/arrow/internal/flatbuf/Footer.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Arrow File metadata -/// +// / ---------------------------------------------------------------------- +// / Arrow File metadata +// / type Footer struct { _tab flatbuffers.Table } @@ -108,7 +108,7 @@ func (rcv *Footer) RecordBatchesLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func (rcv *Footer) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -129,7 +129,7 @@ func (rcv *Footer) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FooterStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/KeyValue.go b/go/arrow/internal/flatbuf/KeyValue.go index c1b85318ecd..0cd5dc62923 100644 --- a/go/arrow/internal/flatbuf/KeyValue.go +++ b/go/arrow/internal/flatbuf/KeyValue.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// user defined key value pairs to add custom metadata to arrow -/// key namespacing is the responsibility of the user +// / ---------------------------------------------------------------------- +// / user defined key value pairs to add custom metadata to arrow +// / key namespacing is the responsibility of the user type KeyValue struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeBinary.go b/go/arrow/internal/flatbuf/LargeBinary.go index 2c3befcc16f..b25ecc41aff 100644 --- a/go/arrow/internal/flatbuf/LargeBinary.go +++ b/go/arrow/internal/flatbuf/LargeBinary.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Binary, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Binary, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeBinary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeList.go b/go/arrow/internal/flatbuf/LargeList.go index 92f22845874..d8bfb9c07df 100644 --- a/go/arrow/internal/flatbuf/LargeList.go +++ b/go/arrow/internal/flatbuf/LargeList.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as List, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as List, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeList struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeListView.go b/go/arrow/internal/flatbuf/LargeListView.go index 5b1df149cd1..4608c1dec53 100644 --- a/go/arrow/internal/flatbuf/LargeListView.go +++ b/go/arrow/internal/flatbuf/LargeListView.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent -/// extremely large data values. +// / Same as ListView, but with 64-bit offsets and sizes, allowing to represent +// / extremely large data values. type LargeListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeUtf8.go b/go/arrow/internal/flatbuf/LargeUtf8.go index e78b33e1100..4478fed856e 100644 --- a/go/arrow/internal/flatbuf/LargeUtf8.go +++ b/go/arrow/internal/flatbuf/LargeUtf8.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Utf8, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Utf8, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeUtf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/ListView.go b/go/arrow/internal/flatbuf/ListView.go index 46b1e0b3cbf..cde43cf5b68 100644 --- a/go/arrow/internal/flatbuf/ListView.go +++ b/go/arrow/internal/flatbuf/ListView.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Represents the same logical types that List can, but contains offsets and -/// sizes allowing for writes in any order and sharing of child values among -/// list values. +// / Represents the same logical types that List can, but contains offsets and +// / sizes allowing for writes in any order and sharing of child values among +// / list values. type ListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Map.go b/go/arrow/internal/flatbuf/Map.go index 8802aba1ebd..d4871e55819 100644 --- a/go/arrow/internal/flatbuf/Map.go +++ b/go/arrow/internal/flatbuf/Map.go @@ -22,31 +22,31 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Map is a logical nested type that is represented as -/// -/// List> -/// -/// In this layout, the keys and values are each respectively contiguous. We do -/// not constrain the key and value types, so the application is responsible -/// for ensuring that the keys are hashable and unique. Whether the keys are sorted -/// may be set in the metadata for this field. -/// -/// In a field with Map type, the field has a child Struct field, which then -/// has two children: key type and the second the value type. The names of the -/// child fields may be respectively "entries", "key", and "value", but this is -/// not enforced. -/// -/// Map -/// ```text -/// - child[0] entries: Struct -/// - child[0] key: K -/// - child[1] value: V -/// ``` -/// Neither the "entries" field nor the "key" field may be nullable. -/// -/// The metadata is structured so that Arrow systems without special handling -/// for Map can make Map an alias for List. The "layout" attribute for the Map -/// field must have the same contents as a List. +// / A Map is a logical nested type that is represented as +// / +// / List> +// / +// / In this layout, the keys and values are each respectively contiguous. We do +// / not constrain the key and value types, so the application is responsible +// / for ensuring that the keys are hashable and unique. Whether the keys are sorted +// / may be set in the metadata for this field. +// / +// / In a field with Map type, the field has a child Struct field, which then +// / has two children: key type and the second the value type. The names of the +// / child fields may be respectively "entries", "key", and "value", but this is +// / not enforced. +// / +// / Map +// / ```text +// / - child[0] entries: Struct +// / - child[0] key: K +// / - child[1] value: V +// / ``` +// / Neither the "entries" field nor the "key" field may be nullable. +// / +// / The metadata is structured so that Arrow systems without special handling +// / for Map can make Map an alias for List. The "layout" attribute for the Map +// / field must have the same contents as a List. type Map struct { _tab flatbuffers.Table } @@ -67,7 +67,7 @@ func (rcv *Map) Table() flatbuffers.Table { return rcv._tab } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) KeysSorted() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -76,7 +76,7 @@ func (rcv *Map) KeysSorted() bool { return false } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) MutateKeysSorted(n bool) bool { return rcv._tab.MutateBoolSlot(4, n) } diff --git a/go/arrow/internal/flatbuf/MessageHeader.go b/go/arrow/internal/flatbuf/MessageHeader.go index c12fc105811..d7f9907c7a7 100644 --- a/go/arrow/internal/flatbuf/MessageHeader.go +++ b/go/arrow/internal/flatbuf/MessageHeader.go @@ -20,14 +20,14 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// The root Message type -/// This union enables us to easily send different message types without -/// redundant storage, and in the future we can easily add new message types. -/// -/// Arrow implementations do not need to implement all of the message types, -/// which may include experimental metadata types. For maximum compatibility, -/// it is best to send data using RecordBatch +// / ---------------------------------------------------------------------- +// / The root Message type +// / This union enables us to easily send different message types without +// / redundant storage, and in the future we can easily add new message types. +// / +// / Arrow implementations do not need to implement all of the message types, +// / which may include experimental metadata types. For maximum compatibility, +// / it is best to send data using RecordBatch type MessageHeader byte const ( diff --git a/go/arrow/internal/flatbuf/Null.go b/go/arrow/internal/flatbuf/Null.go index 3c3eb4bda36..3b93a1b6ee9 100644 --- a/go/arrow/internal/flatbuf/Null.go +++ b/go/arrow/internal/flatbuf/Null.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// These are stored in the flatbuffer in the Type union below +// / These are stored in the flatbuffer in the Type union below type Null struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/RecordBatch.go b/go/arrow/internal/flatbuf/RecordBatch.go index c50f4a6e868..52c72a8a20a 100644 --- a/go/arrow/internal/flatbuf/RecordBatch.go +++ b/go/arrow/internal/flatbuf/RecordBatch.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A data header describing the shared memory layout of a "record" or "row" -/// batch. Some systems call this a "row batch" internally and others a "record -/// batch". +// / A data header describing the shared memory layout of a "record" or "row" +// / batch. Some systems call this a "row batch" internally and others a "record +// / batch". type RecordBatch struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *RecordBatch) Table() flatbuffers.Table { return rcv._tab } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) Length() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *RecordBatch) Length() int64 { return 0 } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) MutateLength(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Nodes correspond to the pre-ordered flattened logical schema +// / Nodes correspond to the pre-ordered flattened logical schema func (rcv *RecordBatch) Nodes(obj *FieldNode, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -81,13 +81,13 @@ func (rcv *RecordBatch) NodesLength() int { return 0 } -/// Nodes correspond to the pre-ordered flattened logical schema -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap +// / Nodes correspond to the pre-ordered flattened logical schema +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap func (rcv *RecordBatch) Buffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -107,13 +107,13 @@ func (rcv *RecordBatch) BuffersLength() int { return 0 } -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap -/// Optional compression of the message body +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap +// / Optional compression of the message body func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -127,21 +127,21 @@ func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { return nil } -/// Optional compression of the message body -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Optional compression of the message body +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) VariadicBufferCounts(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -159,20 +159,20 @@ func (rcv *RecordBatch) VariadicBufferCountsLength() int { return 0 } -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) MutateVariadicBufferCounts(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/RunEndEncoded.go b/go/arrow/internal/flatbuf/RunEndEncoded.go index fa414c1bf0e..b88460b2e22 100644 --- a/go/arrow/internal/flatbuf/RunEndEncoded.go +++ b/go/arrow/internal/flatbuf/RunEndEncoded.go @@ -22,11 +22,11 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Contains two child arrays, run_ends and values. -/// The run_ends child array must be a 16/32/64-bit integer array -/// which encodes the indices at which the run with the value in -/// each corresponding index in the values child array ends. -/// Like list/struct types, the value array can be of any type. +// / Contains two child arrays, run_ends and values. +// / The run_ends child array must be a 16/32/64-bit integer array +// / which encodes the indices at which the run with the value in +// / each corresponding index in the values child array ends. +// / Like list/struct types, the value array can be of any type. type RunEndEncoded struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Schema.go b/go/arrow/internal/flatbuf/Schema.go index 4ee5ecc9e5e..ae5b248a766 100644 --- a/go/arrow/internal/flatbuf/Schema.go +++ b/go/arrow/internal/flatbuf/Schema.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Schema describes the columns in a row batch +// / ---------------------------------------------------------------------- +// / A Schema describes the columns in a row batch type Schema struct { _tab flatbuffers.Table } @@ -44,9 +44,9 @@ func (rcv *Schema) Table() flatbuffers.Table { return rcv._tab } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) Endianness() Endianness { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,9 +55,9 @@ func (rcv *Schema) Endianness() Endianness { return 0 } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) MutateEndianness(n Endianness) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } @@ -102,7 +102,7 @@ func (rcv *Schema) CustomMetadataLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) Features(j int) Feature { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -120,7 +120,7 @@ func (rcv *Schema) FeaturesLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) MutateFeatures(j int, n Feature) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go index de8217650b2..2477af10035 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Row format, that is matrix-specific. +// / Compressed Sparse Row format, that is matrix-specific. type SparseMatrixIndexCSR struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSR) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -57,29 +57,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -93,29 +93,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -129,16 +129,16 @@ func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -152,15 +152,15 @@ func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSRStart(builder *flatbuffers.Builder) { builder.StartObject(4) } diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go index c28cc5d082f..7f262deedbf 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse format, that is matrix-specific. +// / Compressed Sparse format, that is matrix-specific. type SparseMatrixIndexCSX struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSX) Table() flatbuffers.Table { return rcv._tab } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -52,12 +52,12 @@ func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { return 0 } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) MutateCompressedAxis(n SparseMatrixCompressedAxis) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -71,30 +71,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -108,30 +108,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -145,16 +145,16 @@ func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -168,15 +168,15 @@ func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSXStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/SparseTensor.go b/go/arrow/internal/flatbuf/SparseTensor.go index 6f3f55797d7..8f67e1fc08b 100644 --- a/go/arrow/internal/flatbuf/SparseTensor.go +++ b/go/arrow/internal/flatbuf/SparseTensor.go @@ -54,9 +54,9 @@ func (rcv *SparseTensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -66,10 +66,10 @@ func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. -/// The dimensions of the tensor, optionally named. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. +// / The dimensions of the tensor, optionally named. func (rcv *SparseTensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -90,8 +90,8 @@ func (rcv *SparseTensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named. -/// The number of non-zero values in a sparse tensor. +// / The dimensions of the tensor, optionally named. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) NonZeroLength() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -100,7 +100,7 @@ func (rcv *SparseTensor) NonZeroLength() int64 { return 0 } -/// The number of non-zero values in a sparse tensor. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) MutateNonZeroLength(n int64) bool { return rcv._tab.MutateInt64Slot(10, n) } @@ -117,7 +117,7 @@ func (rcv *SparseTensor) MutateSparseIndexType(n SparseTensorIndex) bool { return rcv._tab.MutateByteSlot(12, byte(n)) } -/// Sparse tensor index +// / Sparse tensor index func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -127,8 +127,8 @@ func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { return false } -/// Sparse tensor index -/// The location and size of the tensor's data +// / Sparse tensor index +// / The location and size of the tensor's data func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -142,7 +142,7 @@ func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func SparseTensorStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go index f8eee99fa69..bf1c218e2e4 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go @@ -22,38 +22,38 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// EXPERIMENTAL: Data structures for sparse tensors -/// Coordinate (COO) format of sparse tensor index. -/// -/// COO's index list are represented as a NxM matrix, -/// where N is the number of non-zero values, -/// and M is the number of dimensions of a sparse tensor. -/// -/// indicesBuffer stores the location and size of the data of this indices -/// matrix. The value type and the stride of the indices matrix is -/// specified in indicesType and indicesStrides fields. -/// -/// For example, let X be a 2x3x4x5 tensor, and it has the following -/// 6 non-zero values: -/// ```text -/// X[0, 1, 2, 0] := 1 -/// X[1, 1, 2, 3] := 2 -/// X[0, 2, 1, 0] := 3 -/// X[0, 1, 3, 0] := 4 -/// X[0, 1, 2, 1] := 5 -/// X[1, 2, 0, 4] := 6 -/// ``` -/// In COO format, the index matrix of X is the following 4x6 matrix: -/// ```text -/// [[0, 0, 0, 0, 1, 1], -/// [1, 1, 1, 2, 1, 2], -/// [2, 2, 3, 1, 2, 0], -/// [0, 1, 0, 0, 3, 4]] -/// ``` -/// When isCanonical is true, the indices is sorted in lexicographical order -/// (row-major order), and it does not have duplicated entries. Otherwise, -/// the indices may not be sorted, or may have duplicated entries. +// / ---------------------------------------------------------------------- +// / EXPERIMENTAL: Data structures for sparse tensors +// / Coordinate (COO) format of sparse tensor index. +// / +// / COO's index list are represented as a NxM matrix, +// / where N is the number of non-zero values, +// / and M is the number of dimensions of a sparse tensor. +// / +// / indicesBuffer stores the location and size of the data of this indices +// / matrix. The value type and the stride of the indices matrix is +// / specified in indicesType and indicesStrides fields. +// / +// / For example, let X be a 2x3x4x5 tensor, and it has the following +// / 6 non-zero values: +// / ```text +// / X[0, 1, 2, 0] := 1 +// / X[1, 1, 2, 3] := 2 +// / X[0, 2, 1, 0] := 3 +// / X[0, 1, 3, 0] := 4 +// / X[0, 1, 2, 1] := 5 +// / X[1, 2, 0, 4] := 6 +// / ``` +// / In COO format, the index matrix of X is the following 4x6 matrix: +// / ```text +// / [[0, 0, 0, 0, 1, 1], +// / [1, 1, 1, 2, 1, 2], +// / [2, 2, 3, 1, 2, 0], +// / [0, 1, 0, 0, 3, 4]] +// / ``` +// / When isCanonical is true, the indices is sorted in lexicographical order +// / (row-major order), and it does not have duplicated entries. Otherwise, +// / the indices may not be sorted, or may have duplicated entries. type SparseTensorIndexCOO struct { _tab flatbuffers.Table } @@ -74,7 +74,7 @@ func (rcv *SparseTensorIndexCOO) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indicesBuffer +// / The type of values in indicesBuffer func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The type of values in indicesBuffer +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) IndicesStrides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *SparseTensorIndexCOO) IndicesStridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { return false } -/// The location and size of the indices matrix's data +// / The location and size of the indices matrix's data func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -133,12 +133,12 @@ func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// The location and size of the indices matrix's data -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / The location and size of the indices matrix's data +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) IsCanonical() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -147,11 +147,11 @@ func (rcv *SparseTensorIndexCOO) IsCanonical() bool { return false } -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) MutateIsCanonical(n bool) bool { return rcv._tab.MutateBoolSlot(10, n) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go index a824c84ebfe..66226e0412c 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Fiber (CSF) sparse tensor index. +// / Compressed Sparse Fiber (CSF) sparse tensor index. type SparseTensorIndexCSF struct { _tab flatbuffers.Table } @@ -43,37 +43,37 @@ func (rcv *SparseTensorIndexCSF) Table() flatbuffers.Table { return rcv._tab } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -87,51 +87,51 @@ func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { return nil } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndptrBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -151,21 +151,21 @@ func (rcv *SparseTensorIndexCSF) IndptrBuffersLength() int { return 0 } -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` -/// The type of values in indicesBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` +// / The type of values in indicesBuffers func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -179,18 +179,18 @@ func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffers -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` +// / The type of values in indicesBuffers +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndicesBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -210,23 +210,23 @@ func (rcv *SparseTensorIndexCSF) IndicesBuffersLength() int { return 0 } -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) AxisOrder(j int) int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -244,12 +244,12 @@ func (rcv *SparseTensorIndexCSF) AxisOrderLength() int { return 0 } -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) MutateAxisOrder(j int, n int32) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/Struct_.go b/go/arrow/internal/flatbuf/Struct_.go index 427e7060382..73752a17e00 100644 --- a/go/arrow/internal/flatbuf/Struct_.go +++ b/go/arrow/internal/flatbuf/Struct_.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct -/// (according to the physical memory layout). We used Struct_ here as -/// Struct is a reserved word in Flatbuffers +// / A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +// / (according to the physical memory layout). We used Struct_ here as +// / Struct is a reserved word in Flatbuffers type Struct_ struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Tensor.go b/go/arrow/internal/flatbuf/Tensor.go index 39d70e351e3..47bfe8067b5 100644 --- a/go/arrow/internal/flatbuf/Tensor.go +++ b/go/arrow/internal/flatbuf/Tensor.go @@ -54,8 +54,8 @@ func (rcv *Tensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -65,9 +65,9 @@ func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types -/// The dimensions of the tensor, optionally named +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types +// / The dimensions of the tensor, optionally named func (rcv *Tensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *Tensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The dimensions of the tensor, optionally named +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) Strides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *Tensor) StridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) MutateStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *Tensor) MutateStrides(j int, n int64) bool { return false } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func (rcv *Tensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -133,7 +133,7 @@ func (rcv *Tensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func TensorStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/TensorDim.go b/go/arrow/internal/flatbuf/TensorDim.go index 14b82120887..c6413b6a8c0 100644 --- a/go/arrow/internal/flatbuf/TensorDim.go +++ b/go/arrow/internal/flatbuf/TensorDim.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for dense tensors -/// Shape data for a single axis in a tensor +// / ---------------------------------------------------------------------- +// / Data structures for dense tensors +// / Shape data for a single axis in a tensor type TensorDim struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *TensorDim) Table() flatbuffers.Table { return rcv._tab } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) Size() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,12 +54,12 @@ func (rcv *TensorDim) Size() int64 { return 0 } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) MutateSize(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Name of the dimension, optional +// / Name of the dimension, optional func (rcv *TensorDim) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -68,7 +68,7 @@ func (rcv *TensorDim) Name() []byte { return nil } -/// Name of the dimension, optional +// / Name of the dimension, optional func TensorDimStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Time.go b/go/arrow/internal/flatbuf/Time.go index 2fb6e4c110e..13038a6e332 100644 --- a/go/arrow/internal/flatbuf/Time.go +++ b/go/arrow/internal/flatbuf/Time.go @@ -22,20 +22,20 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Time is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since midnight, stored in either of four units: seconds, -/// milliseconds, microseconds or nanoseconds. -/// -/// The integer `bitWidth` depends on the `unit` and must be one of the following: -/// * SECOND and MILLISECOND: 32 bits -/// * MICROSECOND and NANOSECOND: 64 bits -/// -/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds -/// (exclusive), adjusted for the time unit (for example, up to 86400000 -/// exclusive for the MILLISECOND unit). -/// This definition doesn't allow for leap seconds. Time values from -/// measurements with leap seconds will need to be corrected when ingesting -/// into Arrow (for example by replacing the value 86400 with 86399). +// / Time is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since midnight, stored in either of four units: seconds, +// / milliseconds, microseconds or nanoseconds. +// / +// / The integer `bitWidth` depends on the `unit` and must be one of the following: +// / * SECOND and MILLISECOND: 32 bits +// / * MICROSECOND and NANOSECOND: 64 bits +// / +// / The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +// / (exclusive), adjusted for the time unit (for example, up to 86400000 +// / exclusive for the MILLISECOND unit). +// / This definition doesn't allow for leap seconds. Time values from +// / measurements with leap seconds will need to be corrected when ingesting +// / into Arrow (for example by replacing the value 86400 with 86399). type Time struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Timestamp.go b/go/arrow/internal/flatbuf/Timestamp.go index d0058e13e65..ce172bacdd3 100644 --- a/go/arrow/internal/flatbuf/Timestamp.go +++ b/go/arrow/internal/flatbuf/Timestamp.go @@ -22,111 +22,111 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Timestamp is a 64-bit signed integer representing an elapsed time since a -/// fixed epoch, stored in either of four units: seconds, milliseconds, -/// microseconds or nanoseconds, and is optionally annotated with a timezone. -/// -/// Timestamp values do not include any leap seconds (in other words, all -/// days are considered 86400 seconds long). -/// -/// Timestamps with a non-empty timezone -/// ------------------------------------ -/// -/// If a Timestamp column has a non-empty timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone -/// (the Unix epoch), regardless of the Timestamp's own timezone. -/// -/// Therefore, timestamp values with a non-empty timezone correspond to -/// physical points in time together with some additional information about -/// how the data was obtained and/or how to display it (the timezone). -/// -/// For example, the timestamp value 0 with the timezone string "Europe/Paris" -/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the -/// application may prefer to display it as "January 1st 1970, 01h00" in -/// the Europe/Paris timezone (which is the same physical point in time). -/// -/// One consequence is that timestamp values with a non-empty timezone -/// can be compared and ordered directly, since they all share the same -/// well-known point of reference (the Unix epoch). -/// -/// Timestamps with an unset / empty timezone -/// ----------------------------------------- -/// -/// If a Timestamp column has no timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. -/// -/// Therefore, timestamp values without a timezone cannot be meaningfully -/// interpreted as physical points in time, but only as calendar / clock -/// indications ("wall clock time") in an unspecified timezone. -/// -/// For example, the timestamp value 0 with an empty timezone string -/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there -/// is not enough information to interpret it as a well-defined physical -/// point in time. -/// -/// One consequence is that timestamp values without a timezone cannot -/// be reliably compared or ordered, since they may have different points of -/// reference. In particular, it is *not* possible to interpret an unset -/// or empty timezone as the same as "UTC". -/// -/// Conversion between timezones -/// ---------------------------- -/// -/// If a Timestamp column has a non-empty timezone, changing the timezone -/// to a different non-empty value is a metadata-only operation: -/// the timestamp values need not change as their point of reference remains -/// the same (the Unix epoch). -/// -/// However, if a Timestamp column has no timezone value, changing it to a -/// non-empty value requires to think about the desired semantics. -/// One possibility is to assume that the original timestamp values are -/// relative to the epoch of the timezone being set; timestamp values should -/// then adjusted to the Unix epoch (for example, changing the timezone from -/// empty to "Europe/Paris" would require converting the timestamp values -/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is -/// nevertheless correct). -/// -/// Guidelines for encoding data from external libraries -/// ---------------------------------------------------- -/// -/// Date & time libraries often have multiple different data types for temporal -/// data. In order to ease interoperability between different implementations the -/// Arrow project has some recommendations for encoding these types into a Timestamp -/// column. -/// -/// An "instant" represents a physical point in time that has no relevant timezone -/// (for example, astronomical data). To encode an instant, use a Timestamp with -/// the timezone string set to "UTC", and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// A "zoned date-time" represents a physical point in time annotated with an -/// informative timezone (for example, the timezone in which the data was -/// recorded). To encode a zoned date-time, use a Timestamp with the timezone -/// string set to the name of the timezone, and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// (There is some ambiguity between an instant and a zoned date-time with the -/// UTC timezone. Both of these are stored the same in Arrow. Typically, -/// this distinction does not matter. If it does, then an application should -/// use custom metadata or an extension type to distinguish between the two cases.) -/// -/// An "offset date-time" represents a physical point in time combined with an -/// explicit offset from UTC. To encode an offset date-time, use a Timestamp -/// with the timezone string set to the numeric timezone offset string -/// (e.g. "+03:00"), and make sure the Timestamp values are relative to -/// the UTC epoch (January 1st 1970, midnight). -/// -/// A "naive date-time" (also called "local date-time" in some libraries) -/// represents a wall clock time combined with a calendar date, but with -/// no indication of how to map this information to a physical point in time. -/// Naive date-times must be handled with care because of this missing -/// information, and also because daylight saving time (DST) may make -/// some values ambiguous or nonexistent. A naive date-time may be -/// stored as a struct with Date and Time fields. However, it may also be -/// encoded into a Timestamp column with an empty timezone. The timestamp -/// values should be computed "as if" the timezone of the date-time values -/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would -/// be encoded as timestamp value 0. +// / Timestamp is a 64-bit signed integer representing an elapsed time since a +// / fixed epoch, stored in either of four units: seconds, milliseconds, +// / microseconds or nanoseconds, and is optionally annotated with a timezone. +// / +// / Timestamp values do not include any leap seconds (in other words, all +// / days are considered 86400 seconds long). +// / +// / Timestamps with a non-empty timezone +// / ------------------------------------ +// / +// / If a Timestamp column has a non-empty timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +// / (the Unix epoch), regardless of the Timestamp's own timezone. +// / +// / Therefore, timestamp values with a non-empty timezone correspond to +// / physical points in time together with some additional information about +// / how the data was obtained and/or how to display it (the timezone). +// / +// / For example, the timestamp value 0 with the timezone string "Europe/Paris" +// / corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +// / application may prefer to display it as "January 1st 1970, 01h00" in +// / the Europe/Paris timezone (which is the same physical point in time). +// / +// / One consequence is that timestamp values with a non-empty timezone +// / can be compared and ordered directly, since they all share the same +// / well-known point of reference (the Unix epoch). +// / +// / Timestamps with an unset / empty timezone +// / ----------------------------------------- +// / +// / If a Timestamp column has no timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +// / +// / Therefore, timestamp values without a timezone cannot be meaningfully +// / interpreted as physical points in time, but only as calendar / clock +// / indications ("wall clock time") in an unspecified timezone. +// / +// / For example, the timestamp value 0 with an empty timezone string +// / corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +// / is not enough information to interpret it as a well-defined physical +// / point in time. +// / +// / One consequence is that timestamp values without a timezone cannot +// / be reliably compared or ordered, since they may have different points of +// / reference. In particular, it is *not* possible to interpret an unset +// / or empty timezone as the same as "UTC". +// / +// / Conversion between timezones +// / ---------------------------- +// / +// / If a Timestamp column has a non-empty timezone, changing the timezone +// / to a different non-empty value is a metadata-only operation: +// / the timestamp values need not change as their point of reference remains +// / the same (the Unix epoch). +// / +// / However, if a Timestamp column has no timezone value, changing it to a +// / non-empty value requires to think about the desired semantics. +// / One possibility is to assume that the original timestamp values are +// / relative to the epoch of the timezone being set; timestamp values should +// / then adjusted to the Unix epoch (for example, changing the timezone from +// / empty to "Europe/Paris" would require converting the timestamp values +// / from "Europe/Paris" to "UTC", which seems counter-intuitive but is +// / nevertheless correct). +// / +// / Guidelines for encoding data from external libraries +// / ---------------------------------------------------- +// / +// / Date & time libraries often have multiple different data types for temporal +// / data. In order to ease interoperability between different implementations the +// / Arrow project has some recommendations for encoding these types into a Timestamp +// / column. +// / +// / An "instant" represents a physical point in time that has no relevant timezone +// / (for example, astronomical data). To encode an instant, use a Timestamp with +// / the timezone string set to "UTC", and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / A "zoned date-time" represents a physical point in time annotated with an +// / informative timezone (for example, the timezone in which the data was +// / recorded). To encode a zoned date-time, use a Timestamp with the timezone +// / string set to the name of the timezone, and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / (There is some ambiguity between an instant and a zoned date-time with the +// / UTC timezone. Both of these are stored the same in Arrow. Typically, +// / this distinction does not matter. If it does, then an application should +// / use custom metadata or an extension type to distinguish between the two cases.) +// / +// / An "offset date-time" represents a physical point in time combined with an +// / explicit offset from UTC. To encode an offset date-time, use a Timestamp +// / with the timezone string set to the numeric timezone offset string +// / (e.g. "+03:00"), and make sure the Timestamp values are relative to +// / the UTC epoch (January 1st 1970, midnight). +// / +// / A "naive date-time" (also called "local date-time" in some libraries) +// / represents a wall clock time combined with a calendar date, but with +// / no indication of how to map this information to a physical point in time. +// / Naive date-times must be handled with care because of this missing +// / information, and also because daylight saving time (DST) may make +// / some values ambiguous or nonexistent. A naive date-time may be +// / stored as a struct with Date and Time fields. However, it may also be +// / encoded into a Timestamp column with an empty timezone. The timestamp +// / values should be computed "as if" the timezone of the date-time values +// / was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +// / be encoded as timestamp value 0. type Timestamp struct { _tab flatbuffers.Table } @@ -159,16 +159,16 @@ func (rcv *Timestamp) MutateUnit(n TimeUnit) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func (rcv *Timestamp) Timezone() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -177,16 +177,16 @@ func (rcv *Timestamp) Timezone() []byte { return nil } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func TimestampStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Type.go b/go/arrow/internal/flatbuf/Type.go index ab2bce9c636..df8ba8650e1 100644 --- a/go/arrow/internal/flatbuf/Type.go +++ b/go/arrow/internal/flatbuf/Type.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Top-level Type value, enabling extensible type-specific metadata. We can -/// add new logical types to Type without breaking backwards compatibility +// / ---------------------------------------------------------------------- +// / Top-level Type value, enabling extensible type-specific metadata. We can +// / add new logical types to Type without breaking backwards compatibility type Type byte const ( diff --git a/go/arrow/internal/flatbuf/Union.go b/go/arrow/internal/flatbuf/Union.go index e34121d4757..0367fb3c1fb 100644 --- a/go/arrow/internal/flatbuf/Union.go +++ b/go/arrow/internal/flatbuf/Union.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A union is a complex type with children in Field -/// By default ids in the type vector refer to the offsets in the children -/// optionally typeIds provides an indirection between the child offset and the type id -/// for each child `typeIds[offset]` is the id used in the type vector +// / A union is a complex type with children in Field +// / By default ids in the type vector refer to the offsets in the children +// / optionally typeIds provides an indirection between the child offset and the type id +// / for each child `typeIds[offset]` is the id used in the type vector type Union struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8.go b/go/arrow/internal/flatbuf/Utf8.go index 4ff365a3750..cab4ce7743c 100644 --- a/go/arrow/internal/flatbuf/Utf8.go +++ b/go/arrow/internal/flatbuf/Utf8.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Unicode with UTF-8 encoding +// / Unicode with UTF-8 encoding type Utf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8View.go b/go/arrow/internal/flatbuf/Utf8View.go index 9cf82149019..f294126a618 100644 --- a/go/arrow/internal/flatbuf/Utf8View.go +++ b/go/arrow/internal/flatbuf/Utf8View.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Utf8, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Utf8, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type Utf8View struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/testing/tools/bits.go b/go/arrow/internal/testing/tools/bits.go index c123573e2fa..ea6a5432e5c 100644 --- a/go/arrow/internal/testing/tools/bits.go +++ b/go/arrow/internal/testing/tools/bits.go @@ -22,7 +22,7 @@ import "math/bits" // The low bit of each nibble is tested, therefore integers should be written as 8-digit // hex numbers consisting of 1s or 0s. // -// IntsToBitsLSB(0x11001010) -> 0x35 +// IntsToBitsLSB(0x11001010) -> 0x35 func IntsToBitsLSB(v ...int32) []byte { res := make([]byte, 0, len(v)) for _, b := range v { diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index 0251b08c09b..db4208dc8d3 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -18,40 +18,40 @@ // // Examples: // -// $> arrow-cat ./testdata/primitives.data -// version: V4 -// record 1/3... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2/3... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> arrow-cat ./testdata/primitives.data +// version: V4 +// record 1/3... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2/3... +// col[0] "bools": [true (null) (null) false true] +// [...] // -// $> gen-arrow-stream | arrow-cat -// record 1... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> gen-arrow-stream | arrow-cat +// record 1... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2... +// col[0] "bools": [true (null) (null) false true] +// [...] package main import ( diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 4230ae24499..49865be96cd 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -18,38 +18,38 @@ // // Examples: // -// $> arrow-ls ./testdata/primitives.data -// version: V4 -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> arrow-ls ./testdata/primitives.data +// version: V4 +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 // -// $> gen-arrow-stream | arrow-ls -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> gen-arrow-stream | arrow-ls +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 package main import ( diff --git a/go/arrow/math/math_amd64.go b/go/arrow/math/math_amd64.go index 44301dc2415..2397eef718d 100644 --- a/go/arrow/math/math_amd64.go +++ b/go/arrow/math/math_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go index 014664b0463..b150eb061f9 100644 --- a/go/arrow/math/math_arm64.go +++ b/go/arrow/math/math_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -25,8 +26,8 @@ import ( func init() { if cpu.ARM64.HasASIMD { initNEON() - } else { - initGo() + } else { + initGo() } } diff --git a/go/arrow/math/math_noasm.go b/go/arrow/math/math_noasm.go index 0fa924d90aa..5527ebf8018 100644 --- a/go/arrow/math/math_noasm.go +++ b/go/arrow/math/math_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package math diff --git a/go/arrow/math/math_ppc64le.go b/go/arrow/math/math_ppc64le.go index 3daeac7efaf..85c8f2fe2e7 100644 --- a/go/arrow/math/math_ppc64le.go +++ b/go/arrow/math/math_ppc64le.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_s390x.go b/go/arrow/math/math_s390x.go index 3daeac7efaf..85c8f2fe2e7 100644 --- a/go/arrow/math/math_s390x.go +++ b/go/arrow/math/math_s390x.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index af25d1899a6..a6a2f417989 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc +//go:build cgo && ccalloc +// +build cgo,ccalloc package memory diff --git a/go/arrow/memory/cgo_allocator_defaults.go b/go/arrow/memory/cgo_allocator_defaults.go index 501431a0e1e..0a2e9a342d3 100644 --- a/go/arrow/memory/cgo_allocator_defaults.go +++ b/go/arrow/memory/cgo_allocator_defaults.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build !cclog +//go:build cgo && ccalloc && !cclog +// +build cgo,ccalloc,!cclog package memory diff --git a/go/arrow/memory/cgo_allocator_logging.go b/go/arrow/memory/cgo_allocator_logging.go index 01ad6b39480..fe2e3a940ce 100644 --- a/go/arrow/memory/cgo_allocator_logging.go +++ b/go/arrow/memory/cgo_allocator_logging.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build cclog +//go:build cgo && ccalloc && cclog +// +build cgo,ccalloc,cclog package memory diff --git a/go/arrow/memory/memory_amd64.go b/go/arrow/memory/memory_amd64.go index 58356d64825..895ddc07cf8 100644 --- a/go/arrow/memory/memory_amd64.go +++ b/go/arrow/memory/memory_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_arm64.go b/go/arrow/memory/memory_arm64.go index 3db5d110131..52603349585 100755 --- a/go/arrow/memory/memory_arm64.go +++ b/go/arrow/memory/memory_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_avx2_amd64.go b/go/arrow/memory/memory_avx2_amd64.go index 2bd851ea532..39fb3a5f769 100644 --- a/go/arrow/memory/memory_avx2_amd64.go +++ b/go/arrow/memory/memory_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_js_wasm.go b/go/arrow/memory/memory_js_wasm.go index 9b94d99ff33..5cc0c84d39e 100644 --- a/go/arrow/memory/memory_js_wasm.go +++ b/go/arrow/memory/memory_js_wasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build wasm // +build wasm package memory diff --git a/go/arrow/memory/memory_neon_arm64.go b/go/arrow/memory/memory_neon_arm64.go index 6cb0400c9c5..806ca575f22 100755 --- a/go/arrow/memory/memory_neon_arm64.go +++ b/go/arrow/memory/memory_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_noasm.go b/go/arrow/memory/memory_noasm.go index bf8846fa2e0..44f19c091c7 100644 --- a/go/arrow/memory/memory_noasm.go +++ b/go/arrow/memory/memory_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package memory diff --git a/go/arrow/memory/memory_sse4_amd64.go b/go/arrow/memory/memory_sse4_amd64.go index 716c0d2704a..1711a1ee3ea 100644 --- a/go/arrow/memory/memory_sse4_amd64.go +++ b/go/arrow/memory/memory_sse4_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 9528c2b08fd..ce8af89bdc3 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -19,14 +19,121 @@ package arrow import ( "reflect" "unsafe" + + "github.com/apache/arrow/go/v15/arrow/decimal128" + "github.com/apache/arrow/go/v15/arrow/decimal256" + "github.com/apache/arrow/go/v15/arrow/float16" + "golang.org/x/exp/constraints" ) -// CastFromBytesTo[T] reinterprets the slice b to a slice of type T. +// IntType is a type constraint for raw values represented as signed +// integer types by We aren't just using constraints.Signed +// because we don't want to include the raw `int` type here whose size +// changes based on the architecture (int32 on 32-bit architectures and +// int64 on 64-bit architectures). // -// NOTE: len(b) must be a multiple of T's size. -func CastFromBytesTo[T interface{}](b []byte) []T { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - ptr := (*T)(unsafe.Pointer(h.Data)) - size := int(unsafe.Sizeof(*ptr)) - return unsafe.Slice(ptr, cap(b)/size)[:len(b)/size] +// This will also cover types like MonthInterval or the time types +// as their underlying types are int32 and int64 which will get covered +// by using the ~ +type IntType interface { + ~int8 | ~int16 | ~int32 | ~int64 +} + +// UintType is a type constraint for raw values represented as unsigned +// integer types by We aren't just using constraints.Unsigned +// because we don't want to include the raw `uint` type here whose size +// changes based on the architecture (uint32 on 32-bit architectures and +// uint64 on 64-bit architectures). We also don't want to include uintptr +type UintType interface { + ~uint8 | ~uint16 | ~uint32 | ~uint64 +} + +// FloatType is a type constraint for raw values for representing +// floating point values in This consists of constraints.Float and +// float16.Num +type FloatType interface { + float16.Num | constraints.Float +} + +// NumericType is a type constraint for just signed/unsigned integers +// and float32/float64. +type NumericType interface { + IntType | UintType | constraints.Float +} + +// FixedWidthType is a type constraint for raw values in Arrow that +// can be represented as FixedWidth byte slices. Specifically this is for +// using Go generics to easily re-type a byte slice to a properly-typed +// slice. Booleans are excluded here since they are represented by Arrow +// as a bitmap and thus the buffer can't be just reinterpreted as a []bool +type FixedWidthType interface { + IntType | UintType | + FloatType | decimal128.Num | decimal256.Num | + DayTimeInterval | MonthDayNanoInterval +} + +type TemporalType interface { + Date32 | Date64 | Time32 | Time64 | + Timestamp | Duration | DayTimeInterval | + MonthInterval | MonthDayNanoInterval +} + +func GetValues[T FixedWidthType](data ArrayData, i int) []T { + if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { + return nil + } + ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()) + return ret[data.Offset():] +} + +func GetOffsets[T int32 | int64](data ArrayData, i int) []T { + ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()+1) + return ret[data.Offset():] +} + +func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { + var z T + return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*int(unsafe.Sizeof(z))) +} + +func GetData[T FixedWidthType | ViewHeader](in []byte) []T { + var z T + return unsafe.Slice((*T)(unsafe.Pointer(&in[0])), len(in)/int(unsafe.Sizeof(z))) +} + +var typMap = map[reflect.Type]DataType{ + reflect.TypeOf(false): FixedWidthTypes.Boolean, + reflect.TypeOf(int8(0)): PrimitiveTypes.Int8, + reflect.TypeOf(int16(0)): PrimitiveTypes.Int16, + reflect.TypeOf(int32(0)): PrimitiveTypes.Int32, + reflect.TypeOf(int64(0)): PrimitiveTypes.Int64, + reflect.TypeOf(uint8(0)): PrimitiveTypes.Uint8, + reflect.TypeOf(uint16(0)): PrimitiveTypes.Uint16, + reflect.TypeOf(uint32(0)): PrimitiveTypes.Uint32, + reflect.TypeOf(uint64(0)): PrimitiveTypes.Uint64, + reflect.TypeOf(float32(0)): PrimitiveTypes.Float32, + reflect.TypeOf(float64(0)): PrimitiveTypes.Float64, + reflect.TypeOf(string("")): BinaryTypes.String, + reflect.TypeOf(Date32(0)): FixedWidthTypes.Date32, + reflect.TypeOf(Date64(0)): FixedWidthTypes.Date64, + reflect.TypeOf(true): FixedWidthTypes.Boolean, + reflect.TypeOf(float16.Num{}): FixedWidthTypes.Float16, + reflect.TypeOf([]byte{}): BinaryTypes.Binary, +} + +// GetDataType returns the appropriate DataType for the given type T +// only for non-parametric types. This uses a map and reflection internally +// so don't call this in a tight loop, instead call this once and then use +// a closure with the result. +func GetDataType[T NumericType | bool | string | []byte | float16.Num]() DataType { + var z T + return typMap[reflect.TypeOf(z)] +} + +// GetType returns the appropriate Type type T, only for non-parametric +// types. This uses a map and reflection internally so don't call this in +// a tight loop, instead call it once and then use a closure with the result. +func GetType[T NumericType | bool | string]() Type { + var z T + return typMap[reflect.TypeOf(z)].ID() } diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 3ec28fddc02..093d5d06db2 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/decimal128" @@ -47,14 +46,12 @@ func (decimal128Traits) PutValue(b []byte, v decimal128.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num { - return CastFromBytesTo[decimal128.Num](b) + return GetData[decimal128.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (decimal128Traits) CastToBytes(b []decimal128.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal128SizeBytes)[:len(b)*Decimal128SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index a5ecc014e9f..174dd621924 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/decimal256" @@ -44,13 +43,11 @@ func (decimal256Traits) PutValue(b []byte, v decimal256.Num) { // CastFromBytes reinterprets the slice b to a slice of decimal256 func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { - return CastFromBytesTo[decimal256.Num](b) + return GetData[decimal256.Num](b) } func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal256SizeBytes)[:len(b)*Decimal256SizeBytes] + return GetBytes(b) } func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go index 487a4db0cc4..5369ad352f8 100644 --- a/go/arrow/type_traits_float16.go +++ b/go/arrow/type_traits_float16.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -46,14 +45,12 @@ func (float16Traits) PutValue(b []byte, v float16.Num) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (float16Traits) CastFromBytes(b []byte) []float16.Num { - return CastFromBytesTo[float16.Num](b) + return GetData[float16.Num](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float16Traits) CastToBytes(b []float16.Num) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float16SizeBytes)[:len(b)*Float16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go index aa994cbe86c..ca530a72323 100644 --- a/go/arrow/type_traits_interval.go +++ b/go/arrow/type_traits_interval.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -57,14 +56,12 @@ func (monthTraits) PutValue(b []byte, v MonthInterval) { // // NOTE: len(b) must be a multiple of MonthIntervalSizeBytes. func (monthTraits) CastFromBytes(b []byte) []MonthInterval { - return CastFromBytesTo[MonthInterval](b) + return GetData[MonthInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthTraits) CastToBytes(b []MonthInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthIntervalSizeBytes)[:len(b)*MonthIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -92,14 +89,12 @@ func (daytimeTraits) PutValue(b []byte, v DayTimeInterval) { // // NOTE: len(b) must be a multiple of DayTimeIntervalSizeBytes. func (daytimeTraits) CastFromBytes(b []byte) []DayTimeInterval { - return CastFromBytesTo[DayTimeInterval](b) + return GetData[DayTimeInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (daytimeTraits) CastToBytes(b []DayTimeInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DayTimeIntervalSizeBytes)[:len(b)*DayTimeIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -128,14 +123,12 @@ func (monthDayNanoTraits) PutValue(b []byte, v MonthDayNanoInterval) { // // NOTE: len(b) must be a multiple of MonthDayNanoIntervalSizeBytes. func (monthDayNanoTraits) CastFromBytes(b []byte) []MonthDayNanoInterval { - return CastFromBytesTo[MonthDayNanoInterval](b) + return GetData[MonthDayNanoInterval](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (monthDayNanoTraits) CastToBytes(b []MonthDayNanoInterval) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthDayNanoIntervalSizeBytes)[:len(b)*MonthDayNanoIntervalSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go index 94e79a4c4ce..06412466032 100644 --- a/go/arrow/type_traits_numeric.gen.go +++ b/go/arrow/type_traits_numeric.gen.go @@ -20,7 +20,6 @@ package arrow import ( "math" - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -65,14 +64,12 @@ func (int64Traits) PutValue(b []byte, v int64) { // // NOTE: len(b) must be a multiple of Int64SizeBytes. func (int64Traits) CastFromBytes(b []byte) []int64 { - return CastFromBytesTo[int64](b) + return GetData[int64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int64Traits) CastToBytes(b []int64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int64SizeBytes)[:len(b)*Int64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -99,14 +96,12 @@ func (uint64Traits) PutValue(b []byte, v uint64) { // // NOTE: len(b) must be a multiple of Uint64SizeBytes. func (uint64Traits) CastFromBytes(b []byte) []uint64 { - return CastFromBytesTo[uint64](b) + return GetData[uint64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint64Traits) CastToBytes(b []uint64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint64SizeBytes)[:len(b)*Uint64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -133,14 +128,12 @@ func (float64Traits) PutValue(b []byte, v float64) { // // NOTE: len(b) must be a multiple of Float64SizeBytes. func (float64Traits) CastFromBytes(b []byte) []float64 { - return CastFromBytesTo[float64](b) + return GetData[float64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float64Traits) CastToBytes(b []float64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float64SizeBytes)[:len(b)*Float64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -167,14 +160,12 @@ func (int32Traits) PutValue(b []byte, v int32) { // // NOTE: len(b) must be a multiple of Int32SizeBytes. func (int32Traits) CastFromBytes(b []byte) []int32 { - return CastFromBytesTo[int32](b) + return GetData[int32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int32Traits) CastToBytes(b []int32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int32SizeBytes)[:len(b)*Int32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -201,14 +192,12 @@ func (uint32Traits) PutValue(b []byte, v uint32) { // // NOTE: len(b) must be a multiple of Uint32SizeBytes. func (uint32Traits) CastFromBytes(b []byte) []uint32 { - return CastFromBytesTo[uint32](b) + return GetData[uint32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint32Traits) CastToBytes(b []uint32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint32SizeBytes)[:len(b)*Uint32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -235,14 +224,12 @@ func (float32Traits) PutValue(b []byte, v float32) { // // NOTE: len(b) must be a multiple of Float32SizeBytes. func (float32Traits) CastFromBytes(b []byte) []float32 { - return CastFromBytesTo[float32](b) + return GetData[float32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (float32Traits) CastToBytes(b []float32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float32SizeBytes)[:len(b)*Float32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -269,14 +256,12 @@ func (int16Traits) PutValue(b []byte, v int16) { // // NOTE: len(b) must be a multiple of Int16SizeBytes. func (int16Traits) CastFromBytes(b []byte) []int16 { - return CastFromBytesTo[int16](b) + return GetData[int16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int16Traits) CastToBytes(b []int16) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int16SizeBytes)[:len(b)*Int16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -303,14 +288,12 @@ func (uint16Traits) PutValue(b []byte, v uint16) { // // NOTE: len(b) must be a multiple of Uint16SizeBytes. func (uint16Traits) CastFromBytes(b []byte) []uint16 { - return CastFromBytesTo[uint16](b) + return GetData[uint16](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint16Traits) CastToBytes(b []uint16) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint16SizeBytes)[:len(b)*Uint16SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -337,14 +320,12 @@ func (int8Traits) PutValue(b []byte, v int8) { // // NOTE: len(b) must be a multiple of Int8SizeBytes. func (int8Traits) CastFromBytes(b []byte) []int8 { - return CastFromBytesTo[int8](b) + return GetData[int8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (int8Traits) CastToBytes(b []int8) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int8SizeBytes)[:len(b)*Int8SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -371,14 +352,12 @@ func (uint8Traits) PutValue(b []byte, v uint8) { // // NOTE: len(b) must be a multiple of Uint8SizeBytes. func (uint8Traits) CastFromBytes(b []byte) []uint8 { - return CastFromBytesTo[uint8](b) + return GetData[uint8](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (uint8Traits) CastToBytes(b []uint8) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint8SizeBytes)[:len(b)*Uint8SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -405,14 +384,12 @@ func (time32Traits) PutValue(b []byte, v Time32) { // // NOTE: len(b) must be a multiple of Time32SizeBytes. func (time32Traits) CastFromBytes(b []byte) []Time32 { - return CastFromBytesTo[Time32](b) + return GetData[Time32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (time32Traits) CastToBytes(b []Time32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time32SizeBytes)[:len(b)*Time32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -439,14 +416,12 @@ func (time64Traits) PutValue(b []byte, v Time64) { // // NOTE: len(b) must be a multiple of Time64SizeBytes. func (time64Traits) CastFromBytes(b []byte) []Time64 { - return CastFromBytesTo[Time64](b) + return GetData[Time64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (time64Traits) CastToBytes(b []Time64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time64SizeBytes)[:len(b)*Time64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -473,14 +448,12 @@ func (date32Traits) PutValue(b []byte, v Date32) { // // NOTE: len(b) must be a multiple of Date32SizeBytes. func (date32Traits) CastFromBytes(b []byte) []Date32 { - return CastFromBytesTo[Date32](b) + return GetData[Date32](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (date32Traits) CastToBytes(b []Date32) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date32SizeBytes)[:len(b)*Date32SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -507,14 +480,12 @@ func (date64Traits) PutValue(b []byte, v Date64) { // // NOTE: len(b) must be a multiple of Date64SizeBytes. func (date64Traits) CastFromBytes(b []byte) []Date64 { - return CastFromBytesTo[Date64](b) + return GetData[Date64](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (date64Traits) CastToBytes(b []Date64) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date64SizeBytes)[:len(b)*Date64SizeBytes] + return GetBytes(b) } // Copy copies src to dst. @@ -541,14 +512,12 @@ func (durationTraits) PutValue(b []byte, v Duration) { // // NOTE: len(b) must be a multiple of DurationSizeBytes. func (durationTraits) CastFromBytes(b []byte) []Duration { - return CastFromBytesTo[Duration](b) + return GetData[Duration](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (durationTraits) CastToBytes(b []Duration) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DurationSizeBytes)[:len(b)*DurationSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl index e53e8eacbc8..e98f59528c6 100644 --- a/go/arrow/type_traits_numeric.gen.go.tmpl +++ b/go/arrow/type_traits_numeric.gen.go.tmpl @@ -18,7 +18,6 @@ package arrow import ( "math" - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -66,14 +65,12 @@ func ({{.name}}Traits) PutValue(b []byte, v {{.Type}}) { // // NOTE: len(b) must be a multiple of {{.Name}}SizeBytes. func ({{.name}}Traits) CastFromBytes(b []byte) []{{.Type}} { - return CastFromBytesTo[{{.Type}}](b) + return GetData[{{.Type}}](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func ({{.name}}Traits) CastToBytes(b []{{.Type}}) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*{{.Name}}SizeBytes)[:len(b)*{{.Name}}SizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go index 70c38fdc5da..8e9970a719f 100644 --- a/go/arrow/type_traits_timestamp.go +++ b/go/arrow/type_traits_timestamp.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -43,14 +42,12 @@ func (timestampTraits) PutValue(b []byte, v Timestamp) { // // NOTE: len(b) must be a multiple of TimestampSizeBytes. func (timestampTraits) CastFromBytes(b []byte) []Timestamp { - return CastFromBytesTo[Timestamp](b) + return GetData[Timestamp](b) } // CastToBytes reinterprets the slice b to a slice of bytes. func (timestampTraits) CastToBytes(b []Timestamp) []byte { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*TimestampSizeBytes)[:len(b)*TimestampSizeBytes] + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go index 6d65765e27f..be3f15fed69 100644 --- a/go/arrow/type_traits_view.go +++ b/go/arrow/type_traits_view.go @@ -17,7 +17,6 @@ package arrow import ( - "reflect" "unsafe" "github.com/apache/arrow/go/v15/arrow/endian" @@ -39,13 +38,11 @@ func (viewHeaderTraits) PutValue(b []byte, v ViewHeader) { } func (viewHeaderTraits) CastFromBytes(b []byte) (res []ViewHeader) { - return CastFromBytesTo[ViewHeader](b) + return GetData[ViewHeader](b) } func (viewHeaderTraits) CastToBytes(b []ViewHeader) (res []byte) { - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - - return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*ViewHeaderSizeBytes)[:len(b)*ViewHeaderSizeBytes] + return GetBytes(b) } func (viewHeaderTraits) Copy(dst, src []ViewHeader) { copy(dst, src) } From 44d8f73633106cd2b8faa52d6903141f2341965f Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 11:26:31 -0500 Subject: [PATCH 10/28] fmt --- .../kernels/basic_arithmetic_noasm.go | 1 + .../kernels/scalar_comparison_noasm.go | 2 +- go/arrow/type_traits.go | 34 +++++++++---------- go/arrow/type_traits_decimal128.go | 2 +- go/arrow/type_traits_decimal256.go | 2 +- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go index d6841c57c7b..2c1559fe0f0 100644 --- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go +++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go @@ -19,6 +19,7 @@ package kernels import ( + "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/compute/exec" "golang.org/x/exp/constraints" ) diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go index e1a22b9b9f9..b36524baa12 100644 --- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go +++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go @@ -18,7 +18,7 @@ package kernels -import "github.com/apache/arrow/go/v15/arrow/compute/exec" +import "github.com/apache/arrow/go/v15/arrow" func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData { return genGoCompareKernel(getCmpOp[T](op)) diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index ce8af89bdc3..35cb4014c4e 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -102,23 +102,23 @@ func GetData[T FixedWidthType | ViewHeader](in []byte) []T { } var typMap = map[reflect.Type]DataType{ - reflect.TypeOf(false): FixedWidthTypes.Boolean, - reflect.TypeOf(int8(0)): PrimitiveTypes.Int8, - reflect.TypeOf(int16(0)): PrimitiveTypes.Int16, - reflect.TypeOf(int32(0)): PrimitiveTypes.Int32, - reflect.TypeOf(int64(0)): PrimitiveTypes.Int64, - reflect.TypeOf(uint8(0)): PrimitiveTypes.Uint8, - reflect.TypeOf(uint16(0)): PrimitiveTypes.Uint16, - reflect.TypeOf(uint32(0)): PrimitiveTypes.Uint32, - reflect.TypeOf(uint64(0)): PrimitiveTypes.Uint64, - reflect.TypeOf(float32(0)): PrimitiveTypes.Float32, - reflect.TypeOf(float64(0)): PrimitiveTypes.Float64, - reflect.TypeOf(string("")): BinaryTypes.String, - reflect.TypeOf(Date32(0)): FixedWidthTypes.Date32, - reflect.TypeOf(Date64(0)): FixedWidthTypes.Date64, - reflect.TypeOf(true): FixedWidthTypes.Boolean, - reflect.TypeOf(float16.Num{}): FixedWidthTypes.Float16, - reflect.TypeOf([]byte{}): BinaryTypes.Binary, + reflect.TypeOf(false): FixedWidthTypes.Boolean, + reflect.TypeOf(int8(0)): PrimitiveTypes.Int8, + reflect.TypeOf(int16(0)): PrimitiveTypes.Int16, + reflect.TypeOf(int32(0)): PrimitiveTypes.Int32, + reflect.TypeOf(int64(0)): PrimitiveTypes.Int64, + reflect.TypeOf(uint8(0)): PrimitiveTypes.Uint8, + reflect.TypeOf(uint16(0)): PrimitiveTypes.Uint16, + reflect.TypeOf(uint32(0)): PrimitiveTypes.Uint32, + reflect.TypeOf(uint64(0)): PrimitiveTypes.Uint64, + reflect.TypeOf(float32(0)): PrimitiveTypes.Float32, + reflect.TypeOf(float64(0)): PrimitiveTypes.Float64, + reflect.TypeOf(string("")): BinaryTypes.String, + reflect.TypeOf(Date32(0)): FixedWidthTypes.Date32, + reflect.TypeOf(Date64(0)): FixedWidthTypes.Date64, + reflect.TypeOf(true): FixedWidthTypes.Boolean, + reflect.TypeOf(float16.Num{}): FixedWidthTypes.Float16, + reflect.TypeOf([]byte{}): BinaryTypes.Binary, } // GetDataType returns the appropriate DataType for the given type T diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go index 093d5d06db2..d600ba29c11 100644 --- a/go/arrow/type_traits_decimal128.go +++ b/go/arrow/type_traits_decimal128.go @@ -51,7 +51,7 @@ func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num { // CastToBytes reinterprets the slice b to a slice of bytes. func (decimal128Traits) CastToBytes(b []decimal128.Num) []byte { - return GetBytes(b) + return GetBytes(b) } // Copy copies src to dst. diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go index 174dd621924..fded46a0a52 100644 --- a/go/arrow/type_traits_decimal256.go +++ b/go/arrow/type_traits_decimal256.go @@ -47,7 +47,7 @@ func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num { } func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte { - return GetBytes(b) + return GetBytes(b) } func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) } From 8e4a09cd29bb06d57bdba7178f742d870631b7f6 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 12:09:41 -0500 Subject: [PATCH 11/28] GetOffsets --- go/arrow/compute/vector_hash_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index 1ceed8d1fc0..badd3e68a35 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -106,7 +106,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp createSlice := func(v arrow.Array) [][]byte { var ( - offsets = exec.GetOffsets[OffsetType](v.Data(), 1) + offsets = arrow.GetOffsets[OffsetType](v.Data(), 1) data = v.Data().Buffers()[2].Bytes() out = make([][]byte, v.Len()) ) From 250597137c471f48594e3c2fe66aa562f0ea4316 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 12 Dec 2023 12:18:54 -0500 Subject: [PATCH 12/28] unused import --- go/arrow/compute/vector_hash_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go index badd3e68a35..c37db584805 100644 --- a/go/arrow/compute/vector_hash_test.go +++ b/go/arrow/compute/vector_hash_test.go @@ -26,7 +26,6 @@ import ( "github.com/apache/arrow/go/v15/arrow" "github.com/apache/arrow/go/v15/arrow/array" "github.com/apache/arrow/go/v15/arrow/compute" - "github.com/apache/arrow/go/v15/arrow/compute/exec" "github.com/apache/arrow/go/v15/arrow/decimal128" "github.com/apache/arrow/go/v15/arrow/decimal256" "github.com/apache/arrow/go/v15/arrow/memory" From 70058686bafd99a8a96cf08523a4c9a8d34282e2 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 13 Dec 2023 12:21:35 -0500 Subject: [PATCH 13/28] GetData/GetBytes with empty slices --- go/arrow/type_traits.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 35cb4014c4e..7c460f7c98b 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -78,27 +78,30 @@ type TemporalType interface { MonthInterval | MonthDayNanoInterval } +func getSlice[Out, T interface{}](b []T, l int) []Out { + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), l) +} + func GetValues[T FixedWidthType](data ArrayData, i int) []T { if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { return nil } - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()) - return ret[data.Offset():] + return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len())[data.Offset():] } func GetOffsets[T int32 | int64](data ArrayData, i int) []T { - ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()+1) - return ret[data.Offset():] + return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len()+1)[data.Offset():] } func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { var z T - return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*int(unsafe.Sizeof(z))) + return getSlice[byte](in, len(in)*int(unsafe.Sizeof(z))) } func GetData[T FixedWidthType | ViewHeader](in []byte) []T { var z T - return unsafe.Slice((*T)(unsafe.Pointer(&in[0])), len(in)/int(unsafe.Sizeof(z))) + return getSlice[T](in, len(in)/int(unsafe.Sizeof(z))) } var typMap = map[reflect.Type]DataType{ From 0678158fd316d3faee0f6c1998d109f021eb9708 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 13 Dec 2023 12:49:56 -0500 Subject: [PATCH 14/28] fmt --- go/arrow/type_traits.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 7c460f7c98b..566f1617567 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -80,28 +80,28 @@ type TemporalType interface { func getSlice[Out, T interface{}](b []T, l int) []Out { h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), l) + return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), l) } func GetValues[T FixedWidthType](data ArrayData, i int) []T { if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { return nil } - return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len())[data.Offset():] + return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len())[data.Offset():] } func GetOffsets[T int32 | int64](data ArrayData, i int) []T { - return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len()+1)[data.Offset():] + return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len()+1)[data.Offset():] } func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { var z T - return getSlice[byte](in, len(in)*int(unsafe.Sizeof(z))) + return getSlice[byte](in, len(in)*int(unsafe.Sizeof(z))) } func GetData[T FixedWidthType | ViewHeader](in []byte) []T { var z T - return getSlice[T](in, len(in)/int(unsafe.Sizeof(z))) + return getSlice[T](in, len(in)/int(unsafe.Sizeof(z))) } var typMap = map[reflect.Type]DataType{ From 14fe8ec85bb3ba8a8cc23e705b7d9dd3e2dee45a Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 14:32:40 -0500 Subject: [PATCH 15/28] ensure slices have full capacity --- go/arrow/type_traits.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 566f1617567..73485f40ca1 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -78,30 +78,35 @@ type TemporalType interface { MonthInterval | MonthDayNanoInterval } -func getSlice[Out, T interface{}](b []T, l int) []Out { +func sliceAs[Out, T interface{}](b []T) []Out { + len_bytes := len(b) * int(unsafe.Sizeof(b[0])) + cap_bytes := cap(b) * int(unsafe.Sizeof(b[0])) + + var z Out + len_out := len_bytes / int(unsafe.Sizeof(z)) + cap_out := cap_bytes / int(unsafe.Sizeof(z)) + h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), l) + return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), cap_out)[:len_out] } func GetValues[T FixedWidthType](data ArrayData, i int) []T { if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { return nil } - return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len())[data.Offset():] + return sliceAs[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()] } func GetOffsets[T int32 | int64](data ArrayData, i int) []T { - return getSlice[T](data.Buffers()[i].Bytes(), data.Offset()+data.Len()+1)[data.Offset():] + return sliceAs[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()+1] } func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { - var z T - return getSlice[byte](in, len(in)*int(unsafe.Sizeof(z))) + return sliceAs[byte](in) } func GetData[T FixedWidthType | ViewHeader](in []byte) []T { - var z T - return getSlice[T](in, len(in)/int(unsafe.Sizeof(z))) + return sliceAs[T](in) } var typMap = map[reflect.Type]DataType{ From 12dd7ea04c9025e760e359188a6fa747151dca24 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 14:33:17 -0500 Subject: [PATCH 16/28] rename JSON fields --- go/arrow/internal/arrjson/arrjson_test.go | 32 +++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go index 31f3cb238ec..164210cbc23 100644 --- a/go/arrow/internal/arrjson/arrjson_test.go +++ b/go/arrow/internal/arrjson/arrjson_test.go @@ -6165,7 +6165,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "31C3A9" @@ -6187,7 +6187,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "35" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] }, { "name": "string_view", @@ -6199,7 +6199,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "1é" @@ -6221,7 +6221,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "5" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] } ] }, @@ -6238,7 +6238,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "31C3A9" @@ -6260,7 +6260,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "35353535" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] }, { "name": "string_view", @@ -6272,20 +6272,20 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 3, "INLINED": "1é" }, { "SIZE": 14, - "PREFIX": "32323232", + "PREFIX_HEX": "32323232", "BUFFER_INDEX": 0, "OFFSET": 0 }, { "SIZE": 14, - "PREFIX": "33333333", + "PREFIX_HEX": "33333333", "BUFFER_INDEX": 0, "OFFSET": 14 }, @@ -6298,7 +6298,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "5555" } ], - "VARIADIC_BUFFERS": [ + "VARIADIC_DATA_BUFFERS": [ "32323232323232323232323232323333333333333333333333333333" ] } @@ -6317,20 +6317,20 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 6, "INLINED": "31C3A931C3A9" }, { "SIZE": 14, - "PREFIX": "32323232", + "PREFIX_HEX": "32323232", "BUFFER_INDEX": 0, "OFFSET": 0 }, { "SIZE": 14, - "PREFIX": "33333333", + "PREFIX_HEX": "33333333", "BUFFER_INDEX": 0, "OFFSET": 14 }, @@ -6343,7 +6343,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "3535" } ], - "VARIADIC_BUFFERS": [ + "VARIADIC_DATA_BUFFERS": [ "32323232323232323232323232323333333333333333333333333333" ] }, @@ -6357,7 +6357,7 @@ func makeViewTypesWantJSONs() string { 1, 1 ], - "DATA": [ + "VIEWS": [ { "SIZE": 6, "INLINED": "1é1é" @@ -6379,7 +6379,7 @@ func makeViewTypesWantJSONs() string { "INLINED": "55" } ], - "VARIADIC_BUFFERS": [""] + "VARIADIC_DATA_BUFFERS": [""] } ] } From 062dade5af0dd3d54af0ea55779948ea9bea179d Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 14:36:06 -0500 Subject: [PATCH 17/28] replace null skipping in minOffset/maxEnd --- go/arrow/array/list.go | 90 ++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 21 deletions(-) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 653222c145b..22fb4a2cee1 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -1417,19 +1417,40 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { // input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 // input.Len() > 0 func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { - inputOffset := input.Offset() - offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } + offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():] + sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():] + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i) + } + + // It's very likely that the first non-null non-empty list-view starts at + // offset 0 of the child array. i := 0 - minOffset := offsets[i] // safe because input.Len() > 0 + for i < input.Len() && (isNull(i) || sizes[i] == 0) { + i += 1 + } + if i >= input.Len() { + return 0 + } + minOffset := offsets[i] + if minOffset == 0 { + // early exit: offset 0 found already + return 0 + } - for i += 1; i < input.Len(); i += 1 { - if minOffset == 0 { - // Fast path: the minimum offset is frequently 0 (the start of the child array), - // and frequently a view which has this offset will be near the start of the array. - return 0 + // Slow path: scan the buffers entirely. + i += 1 + for ; i < input.Len(); i += 1 { + if isNull(i) { + continue } - if offset := offsets[i]; offset < minOffset { + offset := offsets[i] + if offset < minOffset && sizes[i] > 0 { minOffset = offset } } @@ -1441,25 +1462,52 @@ func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { // Pre-conditions: // // input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 +// input.Len() > 0 && input.NullN() != input.Len() func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset { inputOffset := input.Offset() + var bitmap []byte + if input.Buffers()[0] != nil { + bitmap = input.Buffers()[0].Bytes() + } offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:] sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:] - maxLegalOffset := Offset(input.Children()[0].Len()) - - i := input.Len() - 1 - maxEnd := offsets[i] + sizes[i] // safe because input.Len() > 0 + isNull := func(i int) bool { + return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i) + } - for i -= 1; i >= 0; i -= 1 { - if maxEnd == maxLegalOffset { - // Fast path: the maximum offset+size is frequently exactly the end of the child array, - // and frequently a view which has this offset+size will be near the end of the array. - return maxEnd + i := input.Len() - 1 // safe because input.Len() > 0 + for i != 0 && (isNull(i) || sizes[i] == 0) { + i -= 1 + } + offset := offsets[i] + size := sizes[i] + if i == 0 { + if isNull(i) || sizes[i] == 0 { + return 0 + } else { + return offset + size } - if end := offsets[i] + sizes[i]; end > maxEnd { - maxEnd = end + } + + values := input.Children()[0] + maxEnd := offsets[i] + sizes[i] + if maxEnd == Offset(values.Len()) { + // Early-exit: maximum possible view-end found already. + return maxEnd + } + + // Slow path: scan the buffers entirely. + for ; i >= 0; i -= 1 { + offset := offsets[i] + size := sizes[i] + if size > 0 && !isNull(i) { + if offset+size > maxEnd { + maxEnd = offset + size + if maxEnd == Offset(values.Len()) { + return maxEnd + } + } } } return maxEnd From 623c9f98506218a871b99e1f6ad1d6b8c7fe7792 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 14:36:29 -0500 Subject: [PATCH 18/28] remove usage of minOffset/maxEnd from ipc writer --- go/arrow/ipc/writer.go | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index e9d59f0e35e..386c809c706 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -748,29 +748,17 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { case *arrow.ListViewType, *arrow.LargeListViewType: data := arr.Data() arr := arr.(array.VarLenListLike) - offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() - rngOff, rngLen := array.RangeOfValuesUsed(arr) - voffsets := w.getValueOffsetsAtBaseValue(arr, rngOff) - p.body = append(p.body, voffsets) - + voffsets := arr.Data().Buffers()[1] + voffsets.Retain() vsizes := data.Buffers()[2] - if vsizes != nil { - if data.Offset() != 0 || vsizes.Len() > offsetTraits.BytesRequired(arr.Len()) { - beg := offsetTraits.BytesRequired(data.Offset()) - end := beg + offsetTraits.BytesRequired(data.Len()) - vsizes = memory.NewBufferBytes(vsizes.Bytes()[beg:end]) - } else { - vsizes.Retain() - } - } + vsizes.Retain() + p.body = append(p.body, voffsets) p.body = append(p.body, vsizes) w.depth-- var ( - values = arr.ListValues() - mustRelease = false - values_offset = int64(rngOff) - values_end = int64(rngOff + rngLen) + values = arr.ListValues() + mustRelease = false ) defer func() { if mustRelease { @@ -778,11 +766,6 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { } }() - if arr.Len() > 0 && values_end < int64(values.Len()) { - // must also slice the values - values = array.NewSlice(values, values_offset, values_end) - mustRelease = true - } err := w.visit(p, values) if err != nil { From 6f1422066d77f06b39978187d80a13fc4270bced Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 16:16:55 -0500 Subject: [PATCH 19/28] guard for nil buffers --- go/arrow/ipc/writer.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 386c809c706..99444694790 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -749,22 +749,20 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { data := arr.Data() arr := arr.(array.VarLenListLike) voffsets := arr.Data().Buffers()[1] - voffsets.Retain() + if voffsets != nil { + voffsets.Retain() + } vsizes := data.Buffers()[2] - vsizes.Retain() + if vsizes != nil { + vsizes.Retain() + } p.body = append(p.body, voffsets) p.body = append(p.body, vsizes) w.depth-- var ( - values = arr.ListValues() - mustRelease = false + values = arr.ListValues() ) - defer func() { - if mustRelease { - values.Release() - } - }() err := w.visit(p, values) From 6d3c69bcd90cb3beafda661267ecc77d6baccd1a Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 16:19:51 -0500 Subject: [PATCH 20/28] remove unused function --- go/arrow/ipc/writer.go | 57 ------------------------------------------ 1 file changed, 57 deletions(-) diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 99444694790..98d06ce7dbf 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -863,63 +863,6 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer return voffsets } -// Truncates the offsets if needed and shifts the values if minOffset > 0. -// The offsets returned are corrected assuming the child values are truncated -// and now start at minOffset. -// -// This function only works on offset buffers of ListViews and LargeListViews. -// TODO(felipecrv): Unify this with getZeroBasedValueOffsets. -func (w *recordEncoder) getValueOffsetsAtBaseValue(arr arrow.Array, minOffset int) *memory.Buffer { - data := arr.Data() - voffsets := data.Buffers()[1] - offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits() - offsetBytesNeeded := offsetTraits.BytesRequired(data.Len()) - - if voffsets == nil || voffsets.Len() == 0 { - return nil - } - - needsTruncate := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len() - needsShift := minOffset > 0 - - if needsTruncate || needsShift { - shiftedOffsets := memory.NewResizableBuffer(w.mem) - shiftedOffsets.Resize(offsetBytesNeeded) - - switch arr.DataType().Layout().Buffers[1].ByteWidth { - case 8: - dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes()) - offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] - - if minOffset > 0 { - for i, o := range offsets { - dest[i] = o - int64(minOffset) - } - } else { - copy(dest, offsets) - } - default: - debug.Assert(arr.DataType().Layout().Buffers[1].ByteWidth == 4, "invalid offset bytewidth") - dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes()) - offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()] - - if minOffset > 0 { - for i, o := range offsets { - dest[i] = o - int32(minOffset) - } - } else { - copy(dest, offsets) - } - } - - voffsets = shiftedOffsets - } else { - voffsets.Retain() - } - - return voffsets -} - func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer { // this case sucks. Because the offsets are different for each // child array, when we have a sliced array, we need to re-base From 952001f7efea4e90cf3164c44fa01394be512b76 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 16:52:01 -0500 Subject: [PATCH 21/28] get list_view's offset buffer size right --- go/arrow/cdata/cdata.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 1ad630e84ad..66494ac55cd 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -762,14 +762,14 @@ func (imp *cimporter) importListViewLike() (err error) { defer nulls.Release() } - if offsets, err = imp.importOffsetsBuffer(1, offsetSize); err != nil { + if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil { return } if offsets != nil { defer offsets.Release() } - if sizes, err = imp.importOffsetsBuffer(2, offsetSize); err != nil { + if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil { return } if sizes != nil { From 97edc75fd457060050f0a9be5e78f2d395f74234 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 14 Dec 2023 16:56:11 -0500 Subject: [PATCH 22/28] revert autoformat noise --- go/arrow/flight/gen/flight/Flight.pb.go | 26 +- go/arrow/flight/gen/flight/FlightSql.pb.go | 443 +++++++++--------- go/arrow/flight/gen/flight/Flight_grpc.pb.go | 1 - go/arrow/gen-flatbuffers.go | 1 - go/arrow/internal/debug/assert_off.go | 1 - go/arrow/internal/debug/assert_on.go | 1 - go/arrow/internal/debug/doc.go | 6 +- go/arrow/internal/debug/log_off.go | 1 - go/arrow/internal/debug/log_on.go | 1 - go/arrow/internal/debug/util.go | 1 - go/arrow/internal/flatbuf/Binary.go | 2 +- go/arrow/internal/flatbuf/BinaryView.go | 14 +- go/arrow/internal/flatbuf/Block.go | 19 +- go/arrow/internal/flatbuf/BodyCompression.go | 18 +- .../internal/flatbuf/BodyCompressionMethod.go | 6 +- go/arrow/internal/flatbuf/Buffer.go | 34 +- go/arrow/internal/flatbuf/Date.go | 12 +- go/arrow/internal/flatbuf/Decimal.go | 24 +- go/arrow/internal/flatbuf/DictionaryBatch.go | 24 +- .../internal/flatbuf/DictionaryEncoding.go | 48 +- go/arrow/internal/flatbuf/DictionaryKind.go | 10 +- go/arrow/internal/flatbuf/Endianness.go | 4 +- go/arrow/internal/flatbuf/Feature.go | 38 +- go/arrow/internal/flatbuf/Field.go | 34 +- go/arrow/internal/flatbuf/FieldNode.go | 40 +- go/arrow/internal/flatbuf/FixedSizeBinary.go | 4 +- go/arrow/internal/flatbuf/FixedSizeList.go | 4 +- go/arrow/internal/flatbuf/Footer.go | 10 +- go/arrow/internal/flatbuf/KeyValue.go | 6 +- go/arrow/internal/flatbuf/LargeBinary.go | 4 +- go/arrow/internal/flatbuf/LargeList.go | 4 +- go/arrow/internal/flatbuf/LargeListView.go | 4 +- go/arrow/internal/flatbuf/LargeUtf8.go | 4 +- go/arrow/internal/flatbuf/ListView.go | 6 +- go/arrow/internal/flatbuf/Map.go | 54 +-- go/arrow/internal/flatbuf/MessageHeader.go | 16 +- go/arrow/internal/flatbuf/Null.go | 2 +- go/arrow/internal/flatbuf/RecordBatch.go | 102 ++-- go/arrow/internal/flatbuf/RunEndEncoded.go | 10 +- go/arrow/internal/flatbuf/Schema.go | 20 +- .../internal/flatbuf/SparseMatrixIndexCSR.go | 134 +++--- .../internal/flatbuf/SparseMatrixIndexCSX.go | 142 +++--- go/arrow/internal/flatbuf/SparseTensor.go | 28 +- .../internal/flatbuf/SparseTensorIndexCOO.go | 100 ++-- .../internal/flatbuf/SparseTensorIndexCSF.go | 254 +++++----- go/arrow/internal/flatbuf/Struct_.go | 6 +- go/arrow/internal/flatbuf/Tensor.go | 24 +- go/arrow/internal/flatbuf/TensorDim.go | 14 +- go/arrow/internal/flatbuf/Time.go | 28 +- go/arrow/internal/flatbuf/Timestamp.go | 250 +++++----- go/arrow/internal/flatbuf/Type.go | 6 +- go/arrow/internal/flatbuf/Union.go | 8 +- go/arrow/internal/flatbuf/Utf8.go | 2 +- go/arrow/internal/flatbuf/Utf8View.go | 14 +- go/arrow/ipc/cmd/arrow-cat/main.go | 66 +-- go/arrow/ipc/cmd/arrow-ls/main.go | 62 +-- go/arrow/math/math_amd64.go | 1 - go/arrow/math/math_arm64.go | 5 +- go/arrow/math/math_noasm.go | 1 - go/arrow/math/math_ppc64le.go | 1 - go/arrow/math/math_s390x.go | 1 - go/arrow/memory/cgo_allocator.go | 4 +- go/arrow/memory/cgo_allocator_defaults.go | 5 +- go/arrow/memory/cgo_allocator_logging.go | 5 +- go/arrow/memory/memory_amd64.go | 1 - go/arrow/memory/memory_arm64.go | 1 - go/arrow/memory/memory_avx2_amd64.go | 1 - go/arrow/memory/memory_js_wasm.go | 1 - go/arrow/memory/memory_neon_arm64.go | 1 - go/arrow/memory/memory_noasm.go | 1 - go/arrow/memory/memory_sse4_amd64.go | 1 - 71 files changed, 1115 insertions(+), 1112 deletions(-) diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go index 745de90cd99..0438bca28be 100644 --- a/go/arrow/flight/gen/flight/Flight.pb.go +++ b/go/arrow/flight/gen/flight/Flight.pb.go @@ -24,12 +24,11 @@ package flight import ( - reflect "reflect" - sync "sync" - protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" timestamppb "google.golang.org/protobuf/types/known/timestamppb" + reflect "reflect" + sync "sync" ) const ( @@ -39,6 +38,7 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) +// // The result of a cancel operation. // // This is used by CancelFlightInfoResult.status. @@ -103,6 +103,7 @@ func (CancelStatus) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{0} } +// // Describes what type of descriptor is defined. type FlightDescriptor_DescriptorType int32 @@ -160,6 +161,7 @@ func (FlightDescriptor_DescriptorType) EnumDescriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{12, 0} } +// // The request that a client provides to a server on handshake. type HandshakeRequest struct { state protoimpl.MessageState @@ -279,6 +281,7 @@ func (x *HandshakeResponse) GetPayload() []byte { return nil } +// // A message for doing simple auth. type BasicAuth struct { state protoimpl.MessageState @@ -373,6 +376,7 @@ func (*Empty) Descriptor() ([]byte, []int) { return file_Flight_proto_rawDescGZIP(), []int{3} } +// // Describes an available action, including both the name used for execution // along with a short description of the purpose of the action. type ActionType struct { @@ -430,6 +434,7 @@ func (x *ActionType) GetDescription() string { return "" } +// // A service specific expression that can be used to return a limited set // of available Arrow Flight streams. type Criteria struct { @@ -479,6 +484,7 @@ func (x *Criteria) GetExpression() []byte { return nil } +// // An opaque action specific for the service. type Action struct { state protoimpl.MessageState @@ -535,6 +541,7 @@ func (x *Action) GetBody() []byte { return nil } +// // The request of the CancelFlightInfo action. // // The request should be stored in Action.body. @@ -585,6 +592,7 @@ func (x *CancelFlightInfoRequest) GetInfo() *FlightInfo { return nil } +// // The request of the RenewFlightEndpoint action. // // The request should be stored in Action.body. @@ -635,6 +643,7 @@ func (x *RenewFlightEndpointRequest) GetEndpoint() *FlightEndpoint { return nil } +// // An opaque result returned after executing an action. type Result struct { state protoimpl.MessageState @@ -683,6 +692,7 @@ func (x *Result) GetBody() []byte { return nil } +// // The result of the CancelFlightInfo action. // // The result should be stored in Result.body. @@ -733,6 +743,7 @@ func (x *CancelFlightInfoResult) GetStatus() CancelStatus { return CancelStatus_CANCEL_STATUS_UNSPECIFIED } +// // Wrap the result of a getSchema call type SchemaResult struct { state protoimpl.MessageState @@ -785,6 +796,7 @@ func (x *SchemaResult) GetSchema() []byte { return nil } +// // The name or tag for a Flight. May be used as a way to retrieve or generate // a flight or be used to expose a set of previously defined flights. type FlightDescriptor struct { @@ -856,6 +868,7 @@ func (x *FlightDescriptor) GetPath() []string { return nil } +// // The access coordinates for retrieval of a dataset. With a FlightInfo, a // consumer is able to determine how to retrieve a dataset. type FlightInfo struct { @@ -987,6 +1000,7 @@ func (x *FlightInfo) GetAppMetadata() []byte { return nil } +// // The information to process a long-running query. type PollInfo struct { state protoimpl.MessageState @@ -1086,6 +1100,7 @@ func (x *PollInfo) GetExpirationTime() *timestamppb.Timestamp { return nil } +// // A particular stream or split associated with a flight. type FlightEndpoint struct { state protoimpl.MessageState @@ -1181,6 +1196,7 @@ func (x *FlightEndpoint) GetAppMetadata() []byte { return nil } +// // A location where a Flight service will accept retrieval of a particular // stream given a ticket. type Location struct { @@ -1230,6 +1246,7 @@ func (x *Location) GetUri() string { return "" } +// // An opaque identifier that the service can use to retrieve a particular // portion of a stream. // @@ -1282,6 +1299,7 @@ func (x *Ticket) GetTicket() []byte { return nil } +// // A batch of Arrow data as part of a stream of batches. type FlightData struct { state protoimpl.MessageState @@ -1366,7 +1384,7 @@ func (x *FlightData) GetDataBody() []byte { return nil } -// * +//* // The response message associated with the submission of a DoPut. type PutResult struct { state protoimpl.MessageState diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go index fb81a0dc9ed..279dc29c426 100644 --- a/go/arrow/flight/gen/flight/FlightSql.pb.go +++ b/go/arrow/flight/gen/flight/FlightSql.pb.go @@ -24,12 +24,11 @@ package flight import ( - reflect "reflect" - sync "sync" - protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" descriptorpb "google.golang.org/protobuf/types/descriptorpb" + reflect "reflect" + sync "sync" ) const ( @@ -1703,7 +1702,7 @@ func (SqlSupportsConvert) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{16} } -// * +//* // The JDBC/ODBC-defined type of any object. // All the values here are the same as in the JDBC and ODBC specs. type XdbcDataType int32 @@ -1818,7 +1817,7 @@ func (XdbcDataType) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{17} } -// * +//* // Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL. type XdbcDatetimeSubcode int32 @@ -2294,23 +2293,22 @@ func (ActionCancelQueryResult_CancelResult) EnumDescriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{29, 0} } +// // Represents a metadata request. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the metadata request. // // The returned Arrow schema will be: // < -// -// info_name: uint32 not null, -// value: dense_union< -// string_value: utf8, -// bool_value: bool, -// bigint_value: int64, -// int32_bitmask: int32, -// string_list: list -// int32_to_int32_list_map: map> -// +// info_name: uint32 not null, +// value: dense_union< +// string_value: utf8, +// bool_value: bool, +// bigint_value: int64, +// int32_bitmask: int32, +// string_list: list +// int32_to_int32_list_map: map> // > // where there is one row per requested piece of metadata information. type CommandGetSqlInfo struct { @@ -2378,62 +2376,61 @@ func (x *CommandGetSqlInfo) GetInfo() []uint32 { return nil } +// // Represents a request to retrieve information about data type supported on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned schema will be: // < -// -// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), -// data_type: int32 not null (The SQL data type), -// column_size: int32 (The maximum size supported by that column. -// In case of exact numeric types, this represents the maximum precision. -// In case of string types, this represents the character length. -// In case of datetime data types, this represents the length in characters of the string representation. -// NULL is returned for data types where column size is not applicable.), -// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for -// data types where a literal prefix is not applicable.), -// literal_suffix: utf8 (Character or characters used to terminate a literal, -// NULL is returned for data types where a literal suffix is not applicable.), -// create_params: list -// (A list of keywords corresponding to which parameters can be used when creating -// a column for that specific type. -// NULL is returned if there are no parameters for the data type definition.), -// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the -// Nullable enum.), -// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), -// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the -// Searchable enum.), -// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is -// not applicable to the data type or the data type is not numeric.), -// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), -// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute -// is not applicable to the data type or the data type is not numeric.), -// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL -// is returned if a localized name is not supported by the data source), -// minimum_scale: int32 (The minimum scale of the data type on the data source. -// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE -// columns both contain this value. NULL is returned if scale is not applicable.), -// maximum_scale: int32 (The maximum scale of the data type on the data source. -// NULL is returned if scale is not applicable.), -// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values -// as data_type value. Except for interval and datetime, which -// uses generic values. More info about those types can be -// obtained through datetime_subcode. The possible values can be seen -// in the XdbcDataType enum.), -// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains -// its sub types. For type different from interval and datetime, this value -// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), -// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains -// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For -// exact numeric types, this column contains the value 10 to indicate that -// column size specifies a number of decimal digits. Otherwise, this column is NULL.), -// interval_precision: int32 (If the data type is an interval data type, then this column contains the value -// of the interval leading precision. Otherwise, this column is NULL. This fields -// is only relevant to be used by ODBC). -// +// type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc), +// data_type: int32 not null (The SQL data type), +// column_size: int32 (The maximum size supported by that column. +// In case of exact numeric types, this represents the maximum precision. +// In case of string types, this represents the character length. +// In case of datetime data types, this represents the length in characters of the string representation. +// NULL is returned for data types where column size is not applicable.), +// literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for +// data types where a literal prefix is not applicable.), +// literal_suffix: utf8 (Character or characters used to terminate a literal, +// NULL is returned for data types where a literal suffix is not applicable.), +// create_params: list +// (A list of keywords corresponding to which parameters can be used when creating +// a column for that specific type. +// NULL is returned if there are no parameters for the data type definition.), +// nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the +// Nullable enum.), +// case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons), +// searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the +// Searchable enum.), +// unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is +// not applicable to the data type or the data type is not numeric.), +// fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.), +// auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute +// is not applicable to the data type or the data type is not numeric.), +// local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL +// is returned if a localized name is not supported by the data source), +// minimum_scale: int32 (The minimum scale of the data type on the data source. +// If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE +// columns both contain this value. NULL is returned if scale is not applicable.), +// maximum_scale: int32 (The maximum scale of the data type on the data source. +// NULL is returned if scale is not applicable.), +// sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values +// as data_type value. Except for interval and datetime, which +// uses generic values. More info about those types can be +// obtained through datetime_subcode. The possible values can be seen +// in the XdbcDataType enum.), +// datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains +// its sub types. For type different from interval and datetime, this value +// is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.), +// num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains +// the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For +// exact numeric types, this column contains the value 10 to indicate that +// column size specifies a number of decimal digits. Otherwise, this column is NULL.), +// interval_precision: int32 (If the data type is an interval data type, then this column contains the value +// of the interval leading precision. Otherwise, this column is NULL. This fields +// is only relevant to be used by ODBC). // > // The returned data should be ordered by data_type and then by type_name. type CommandGetXdbcTypeInfo struct { @@ -2485,17 +2482,16 @@ func (x *CommandGetXdbcTypeInfo) GetDataType() int32 { return 0 } +// // Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend. // The definition of a catalog depends on vendor/implementation. It is usually the database itself // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// catalog_name: utf8 not null -// +// catalog_name: utf8 not null // > // The returned data should be ordered by catalog_name. type CommandGetCatalogs struct { @@ -2536,18 +2532,17 @@ func (*CommandGetCatalogs) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{2} } +// // Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend. // The definition of a database schema depends on vendor/implementation. It is usually a collection of tables. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// catalog_name: utf8, -// db_schema_name: utf8 not null -// +// catalog_name: utf8, +// db_schema_name: utf8 not null // > // The returned data should be ordered by catalog_name, then db_schema_name. type CommandGetDbSchemas struct { @@ -2615,34 +2610,32 @@ func (x *CommandGetDbSchemas) GetDbSchemaFilterPattern() string { return "" } +// // Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// table_type: utf8 not null, -// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, -// it is serialized as an IPC message.) -// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// table_type: utf8 not null, +// [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema, +// it is serialized as an IPC message.) // > // Fields on table_schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. // The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. type CommandGetTables struct { state protoimpl.MessageState @@ -2744,18 +2737,17 @@ func (x *CommandGetTables) GetIncludeSchema() bool { return false } +// // Represents a request to retrieve the list of table types on a Flight SQL enabled backend. // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables. // TABLE, VIEW, and SYSTEM TABLE are commonly supported. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// table_type: utf8 not null -// +// table_type: utf8 not null // > // The returned data should be ordered by table_type. type CommandGetTableTypes struct { @@ -2796,21 +2788,20 @@ func (*CommandGetTableTypes) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{5} } +// // Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// catalog_name: utf8, -// db_schema_name: utf8, -// table_name: utf8 not null, -// column_name: utf8 not null, -// key_name: utf8, -// key_sequence: int32 not null -// +// catalog_name: utf8, +// db_schema_name: utf8, +// table_name: utf8 not null, +// column_name: utf8 not null, +// key_name: utf8, +// key_sequence: int32 not null // > // The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. type CommandGetPrimaryKeys struct { @@ -2885,29 +2876,28 @@ func (x *CommandGetPrimaryKeys) GetTable() string { return "" } +// // Represents a request to retrieve a description of the foreign key columns that reference the given table's // primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null -// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null // > // The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. @@ -2983,36 +2973,35 @@ func (x *CommandGetExportedKeys) GetTable() string { return "" } +// // Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null -// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetImportedKeys struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3085,38 +3074,37 @@ func (x *CommandGetImportedKeys) GetTable() string { return "" } +// // Represents a request to retrieve a description of the foreign key columns in the given foreign key table that // reference the primary key or the columns representing a unique constraint of the parent table (could be the same // or a different table) on a Flight SQL enabled backend. // Used in the command member of FlightDescriptor for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// - GetFlightInfo: execute the catalog metadata request. +// - GetSchema: return the Arrow schema of the query. +// - GetFlightInfo: execute the catalog metadata request. // // The returned Arrow schema will be: // < -// -// pk_catalog_name: utf8, -// pk_db_schema_name: utf8, -// pk_table_name: utf8 not null, -// pk_column_name: utf8 not null, -// fk_catalog_name: utf8, -// fk_db_schema_name: utf8, -// fk_table_name: utf8 not null, -// fk_column_name: utf8 not null, -// key_sequence: int32 not null, -// fk_key_name: utf8, -// pk_key_name: utf8, -// update_rule: uint8 not null, -// delete_rule: uint8 not null -// +// pk_catalog_name: utf8, +// pk_db_schema_name: utf8, +// pk_table_name: utf8 not null, +// pk_column_name: utf8 not null, +// fk_catalog_name: utf8, +// fk_db_schema_name: utf8, +// fk_table_name: utf8 not null, +// fk_column_name: utf8 not null, +// key_sequence: int32 not null, +// fk_key_name: utf8, +// pk_key_name: utf8, +// update_rule: uint8 not null, +// delete_rule: uint8 not null // > // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence. // update_rule and delete_rule returns a byte that is equivalent to actions: -// - 0 = CASCADE -// - 1 = RESTRICT -// - 2 = SET NULL -// - 3 = NO ACTION -// - 4 = SET DEFAULT +// - 0 = CASCADE +// - 1 = RESTRICT +// - 2 = SET NULL +// - 3 = NO ACTION +// - 4 = SET DEFAULT type CommandGetCrossReference struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3224,6 +3212,7 @@ func (x *CommandGetCrossReference) GetFkTable() string { return "" } +// // Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. type ActionCreatePreparedStatementRequest struct { state protoimpl.MessageState @@ -3283,6 +3272,7 @@ func (x *ActionCreatePreparedStatementRequest) GetTransactionId() []byte { return nil } +// // An embedded message describing a Substrait plan to execute. type SubstraitPlan struct { state protoimpl.MessageState @@ -3346,6 +3336,7 @@ func (x *SubstraitPlan) GetVersion() string { return "" } +// // Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. type ActionCreatePreparedSubstraitPlanRequest struct { state protoimpl.MessageState @@ -3405,6 +3396,7 @@ func (x *ActionCreatePreparedSubstraitPlanRequest) GetTransactionId() []byte { return nil } +// // Wrap the result of a "CreatePreparedStatement" or "CreatePreparedSubstraitPlan" action. // // The resultant PreparedStatement can be closed either: @@ -3480,6 +3472,7 @@ func (x *ActionCreatePreparedStatementResult) GetParameterSchema() []byte { return nil } +// // Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend. // Closes server resources associated with the prepared statement handle. type ActionClosePreparedStatementRequest struct { @@ -3530,6 +3523,7 @@ func (x *ActionClosePreparedStatementRequest) GetPreparedStatementHandle() []byt return nil } +// // Request message for the "BeginTransaction" action. // Begins a transaction. type ActionBeginTransactionRequest struct { @@ -3570,6 +3564,7 @@ func (*ActionBeginTransactionRequest) Descriptor() ([]byte, []int) { return file_FlightSql_proto_rawDescGZIP(), []int{15} } +// // Request message for the "BeginSavepoint" action. // Creates a savepoint within a transaction. // @@ -3632,6 +3627,7 @@ func (x *ActionBeginSavepointRequest) GetName() string { return "" } +// // The result of a "BeginTransaction" action. // // The transaction can be manipulated with the "EndTransaction" action, or @@ -3687,6 +3683,7 @@ func (x *ActionBeginTransactionResult) GetTransactionId() []byte { return nil } +// // The result of a "BeginSavepoint" action. // // The transaction can be manipulated with the "EndSavepoint" action. @@ -3742,6 +3739,7 @@ func (x *ActionBeginSavepointResult) GetSavepointId() []byte { return nil } +// // Request message for the "EndTransaction" action. // // Commit (COMMIT) or rollback (ROLLBACK) the transaction. @@ -3805,6 +3803,7 @@ func (x *ActionEndTransactionRequest) GetAction() ActionEndTransactionRequest_En return ActionEndTransactionRequest_END_TRANSACTION_UNSPECIFIED } +// // Request message for the "EndSavepoint" action. // // Release (RELEASE) the savepoint or rollback (ROLLBACK) to the @@ -3870,21 +3869,22 @@ func (x *ActionEndSavepointRequest) GetAction() ActionEndSavepointRequest_EndSav return ActionEndSavepointRequest_END_SAVEPOINT_UNSPECIFIED } +// // Represents a SQL query. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. type CommandStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -3942,22 +3942,23 @@ func (x *CommandStatementQuery) GetTransactionId() []byte { return nil } +// // Represents a Substrait plan. Used in the command member of FlightDescriptor // for the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - GetFlightInfo: execute the query. -// - DoPut: execute the query. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - GetFlightInfo: execute the query. +// - DoPut: execute the query. type CommandStatementSubstraitPlan struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4015,7 +4016,7 @@ func (x *CommandStatementSubstraitPlan) GetTransactionId() []byte { return nil } -// * +//* // Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery. // This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. type TicketStatementQuery struct { @@ -4066,22 +4067,23 @@ func (x *TicketStatementQuery) GetStatementHandle() []byte { return nil } +// // Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for // the following RPC calls: -// - GetSchema: return the Arrow schema of the query. -// Fields on this schema may contain the following metadata: -// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name -// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name -// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name -// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. -// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size -// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable -// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. -// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. -// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. -// - GetFlightInfo: execute the prepared statement instance. +// - GetSchema: return the Arrow schema of the query. +// Fields on this schema may contain the following metadata: +// - ARROW:FLIGHT:SQL:CATALOG_NAME - Table's catalog name +// - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME - Database schema name +// - ARROW:FLIGHT:SQL:TABLE_NAME - Table name +// - ARROW:FLIGHT:SQL:TYPE_NAME - The data source-specific name for the data type of the column. +// - ARROW:FLIGHT:SQL:PRECISION - Column precision/size +// - ARROW:FLIGHT:SQL:SCALE - Column scale/decimal digits if applicable +// - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_READ_ONLY - "1" indicates if the column is read only, "0" otherwise. +// - ARROW:FLIGHT:SQL:IS_SEARCHABLE - "1" indicates if the column is searchable via WHERE clause, "0" otherwise. +// - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution. +// - GetFlightInfo: execute the prepared statement instance. type CommandPreparedStatementQuery struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -4130,6 +4132,7 @@ func (x *CommandPreparedStatementQuery) GetPreparedStatementHandle() []byte { return nil } +// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included SQL update. type CommandStatementUpdate struct { @@ -4189,6 +4192,7 @@ func (x *CommandStatementUpdate) GetTransactionId() []byte { return nil } +// // Represents a SQL update query. Used in the command member of FlightDescriptor // for the RPC call DoPut to cause the server to execute the included // prepared statement handle as an update. @@ -4240,6 +4244,7 @@ func (x *CommandPreparedStatementUpdate) GetPreparedStatementHandle() []byte { return nil } +// // Returned from the RPC call DoPut when a CommandStatementUpdate // CommandPreparedStatementUpdate was in the request, containing // results from the update. @@ -4292,6 +4297,7 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 { return 0 } +// // Request message for the "CancelQuery" action. // // Explicitly cancel a running query. @@ -4360,6 +4366,7 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte { return nil } +// // The result of cancelling a query. // // The result should be wrapped in a google.protobuf.Any message. diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go index 34fcd5d09c9..87d9abc5926 100644 --- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go +++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go @@ -8,7 +8,6 @@ package flight import ( context "context" - grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" diff --git a/go/arrow/gen-flatbuffers.go b/go/arrow/gen-flatbuffers.go index 720016e0bf1..5c8eba4a247 100644 --- a/go/arrow/gen-flatbuffers.go +++ b/go/arrow/gen-flatbuffers.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build ignore // +build ignore package main diff --git a/go/arrow/internal/debug/assert_off.go b/go/arrow/internal/debug/assert_off.go index 1450ecc98a2..52b9a233169 100644 --- a/go/arrow/internal/debug/assert_off.go +++ b/go/arrow/internal/debug/assert_off.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !assert // +build !assert package debug diff --git a/go/arrow/internal/debug/assert_on.go b/go/arrow/internal/debug/assert_on.go index 4a57169b313..2aa5d6ace4c 100644 --- a/go/arrow/internal/debug/assert_on.go +++ b/go/arrow/internal/debug/assert_on.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build assert // +build assert package debug diff --git a/go/arrow/internal/debug/doc.go b/go/arrow/internal/debug/doc.go index 094e427a22e..3ee1783ca4b 100644 --- a/go/arrow/internal/debug/doc.go +++ b/go/arrow/internal/debug/doc.go @@ -17,12 +17,14 @@ /* Package debug provides APIs for conditional runtime assertions and debug logging. -# Using Assert + +Using Assert To enable runtime assertions, build with the assert tag. When the assert tag is omitted, the code for the assertion will be omitted from the binary. -# Using Log + +Using Log To enable runtime debug logs, build with the debug tag. When the debug tag is omitted, the code for logging will be omitted from the binary. diff --git a/go/arrow/internal/debug/log_off.go b/go/arrow/internal/debug/log_off.go index 760a5cdc0dc..48da8e1ee94 100644 --- a/go/arrow/internal/debug/log_off.go +++ b/go/arrow/internal/debug/log_off.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !debug // +build !debug package debug diff --git a/go/arrow/internal/debug/log_on.go b/go/arrow/internal/debug/log_on.go index 2588e7d1069..99d0c8ae33f 100644 --- a/go/arrow/internal/debug/log_on.go +++ b/go/arrow/internal/debug/log_on.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build debug // +build debug package debug diff --git a/go/arrow/internal/debug/util.go b/go/arrow/internal/debug/util.go index ea4eba7fb5c..7bd3d5389e6 100644 --- a/go/arrow/internal/debug/util.go +++ b/go/arrow/internal/debug/util.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build debug || assert // +build debug assert package debug diff --git a/go/arrow/internal/flatbuf/Binary.go b/go/arrow/internal/flatbuf/Binary.go index 95e015595b5..e8018e74c41 100644 --- a/go/arrow/internal/flatbuf/Binary.go +++ b/go/arrow/internal/flatbuf/Binary.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Opaque binary data +/// Opaque binary data type Binary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/BinaryView.go b/go/arrow/internal/flatbuf/BinaryView.go index f6906674bdb..09ca5e7db96 100644 --- a/go/arrow/internal/flatbuf/BinaryView.go +++ b/go/arrow/internal/flatbuf/BinaryView.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Logically the same as Binary, but the internal representation uses a view -// / struct that contains the string length and either the string's entire data -// / inline (for small strings) or an inlined prefix, an index of another buffer, -// / and an offset pointing to a slice in that buffer (for non-small strings). -// / -// / Since it uses a variable number of data buffers, each Field with this type -// / must have a corresponding entry in `variadicBufferCounts`. +/// Logically the same as Binary, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. type BinaryView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Block.go b/go/arrow/internal/flatbuf/Block.go index 8e33d3e6415..57a697b1968 100644 --- a/go/arrow/internal/flatbuf/Block.go +++ b/go/arrow/internal/flatbuf/Block.go @@ -35,34 +35,31 @@ func (rcv *Block) Table() flatbuffers.Table { return rcv._tab.Table } -// / Index to the start of the RecordBlock (note this is past the Message header) +/// Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } - -// / Index to the start of the RecordBlock (note this is past the Message header) +/// Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -// / Length of the metadata +/// Length of the metadata func (rcv *Block) MetaDataLength() int32 { return rcv._tab.GetInt32(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } - -// / Length of the metadata +/// Length of the metadata func (rcv *Block) MutateMetaDataLength(n int32) bool { return rcv._tab.MutateInt32(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } -// / Length of the data (this is aligned so there can be a gap between this and -// / the metadata). +/// Length of the data (this is aligned so there can be a gap between this and +/// the metadata). func (rcv *Block) BodyLength() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(16)) } - -// / Length of the data (this is aligned so there can be a gap between this and -// / the metadata). +/// Length of the data (this is aligned so there can be a gap between this and +/// the metadata). func (rcv *Block) MutateBodyLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(16), n) } diff --git a/go/arrow/internal/flatbuf/BodyCompression.go b/go/arrow/internal/flatbuf/BodyCompression.go index c23c2919021..6468e231352 100644 --- a/go/arrow/internal/flatbuf/BodyCompression.go +++ b/go/arrow/internal/flatbuf/BodyCompression.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Optional compression for the memory buffers constituting IPC message -// / bodies. Intended for use with RecordBatch but could be used for other -// / message types +/// Optional compression for the memory buffers constituting IPC message +/// bodies. Intended for use with RecordBatch but could be used for other +/// message types type BodyCompression struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *BodyCompression) Table() flatbuffers.Table { return rcv._tab } -// / Compressor library. -// / For LZ4_FRAME, each compressed buffer must consist of a single frame. +/// Compressor library. +/// For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) Codec() CompressionType { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *BodyCompression) Codec() CompressionType { return 0 } -// / Compressor library. -// / For LZ4_FRAME, each compressed buffer must consist of a single frame. +/// Compressor library. +/// For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) MutateCodec(n CompressionType) bool { return rcv._tab.MutateInt8Slot(4, int8(n)) } -// / Indicates the way the record batch body was compressed +/// Indicates the way the record batch body was compressed func (rcv *BodyCompression) Method() BodyCompressionMethod { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -70,7 +70,7 @@ func (rcv *BodyCompression) Method() BodyCompressionMethod { return 0 } -// / Indicates the way the record batch body was compressed +/// Indicates the way the record batch body was compressed func (rcv *BodyCompression) MutateMethod(n BodyCompressionMethod) bool { return rcv._tab.MutateInt8Slot(6, int8(n)) } diff --git a/go/arrow/internal/flatbuf/BodyCompressionMethod.go b/go/arrow/internal/flatbuf/BodyCompressionMethod.go index bb7234b3989..108ab3e07fb 100644 --- a/go/arrow/internal/flatbuf/BodyCompressionMethod.go +++ b/go/arrow/internal/flatbuf/BodyCompressionMethod.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -// / Provided for forward compatibility in case we need to support different -// / strategies for compressing the IPC message body (like whole-body -// / compression rather than buffer-level) in the future +/// Provided for forward compatibility in case we need to support different +/// strategies for compressing the IPC message body (like whole-body +/// compression rather than buffer-level) in the future type BodyCompressionMethod int8 const ( diff --git a/go/arrow/internal/flatbuf/Buffer.go b/go/arrow/internal/flatbuf/Buffer.go index e650e06a570..eba8d99b28e 100644 --- a/go/arrow/internal/flatbuf/Buffer.go +++ b/go/arrow/internal/flatbuf/Buffer.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / A Buffer represents a single contiguous memory segment +/// ---------------------------------------------------------------------- +/// A Buffer represents a single contiguous memory segment type Buffer struct { _tab flatbuffers.Struct } @@ -37,32 +37,30 @@ func (rcv *Buffer) Table() flatbuffers.Table { return rcv._tab.Table } -// / The relative offset into the shared memory page where the bytes for this -// / buffer starts +/// The relative offset into the shared memory page where the bytes for this +/// buffer starts func (rcv *Buffer) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } - -// / The relative offset into the shared memory page where the bytes for this -// / buffer starts +/// The relative offset into the shared memory page where the bytes for this +/// buffer starts func (rcv *Buffer) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -// / The absolute length (in bytes) of the memory buffer. The memory is found -// / from offset (inclusive) to offset + length (non-inclusive). When building -// / messages using the encapsulated IPC message, padding bytes may be written -// / after a buffer, but such padding bytes do not need to be accounted for in -// / the size here. +/// The absolute length (in bytes) of the memory buffer. The memory is found +/// from offset (inclusive) to offset + length (non-inclusive). When building +/// messages using the encapsulated IPC message, padding bytes may be written +/// after a buffer, but such padding bytes do not need to be accounted for in +/// the size here. func (rcv *Buffer) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } - -// / The absolute length (in bytes) of the memory buffer. The memory is found -// / from offset (inclusive) to offset + length (non-inclusive). When building -// / messages using the encapsulated IPC message, padding bytes may be written -// / after a buffer, but such padding bytes do not need to be accounted for in -// / the size here. +/// The absolute length (in bytes) of the memory buffer. The memory is found +/// from offset (inclusive) to offset + length (non-inclusive). When building +/// messages using the encapsulated IPC message, padding bytes may be written +/// after a buffer, but such padding bytes do not need to be accounted for in +/// the size here. func (rcv *Buffer) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/Date.go b/go/arrow/internal/flatbuf/Date.go index 985a8f79955..32983ec54cc 100644 --- a/go/arrow/internal/flatbuf/Date.go +++ b/go/arrow/internal/flatbuf/Date.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Date is either a 32-bit or 64-bit signed integer type representing an -// / elapsed time since UNIX epoch (1970-01-01), stored in either of two units: -// / -// / * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no -// / leap seconds), where the values are evenly divisible by 86400000 -// / * Days (32 bits) since the UNIX epoch +/// Date is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +/// +/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +/// leap seconds), where the values are evenly divisible by 86400000 +/// * Days (32 bits) since the UNIX epoch type Date struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Decimal.go b/go/arrow/internal/flatbuf/Decimal.go index 2fc9d5ad658..c9de254d1dc 100644 --- a/go/arrow/internal/flatbuf/Decimal.go +++ b/go/arrow/internal/flatbuf/Decimal.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Exact decimal value represented as an integer value in two's -// / complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers -// / are used. The representation uses the endianness indicated -// / in the Schema. +/// Exact decimal value represented as an integer value in two's +/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +/// are used. The representation uses the endianness indicated +/// in the Schema. type Decimal struct { _tab flatbuffers.Table } @@ -46,7 +46,7 @@ func (rcv *Decimal) Table() flatbuffers.Table { return rcv._tab } -// / Total number of decimal digits +/// Total number of decimal digits func (rcv *Decimal) Precision() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,12 +55,12 @@ func (rcv *Decimal) Precision() int32 { return 0 } -// / Total number of decimal digits +/// Total number of decimal digits func (rcv *Decimal) MutatePrecision(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } -// / Number of digits after the decimal point "." +/// Number of digits after the decimal point "." func (rcv *Decimal) Scale() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -69,13 +69,13 @@ func (rcv *Decimal) Scale() int32 { return 0 } -// / Number of digits after the decimal point "." +/// Number of digits after the decimal point "." func (rcv *Decimal) MutateScale(n int32) bool { return rcv._tab.MutateInt32Slot(6, n) } -// / Number of bits per value. The only accepted widths are 128 and 256. -// / We use bitWidth for consistency with Int::bitWidth. +/// Number of bits per value. The only accepted widths are 128 and 256. +/// We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) BitWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,8 +84,8 @@ func (rcv *Decimal) BitWidth() int32 { return 128 } -// / Number of bits per value. The only accepted widths are 128 and 256. -// / We use bitWidth for consistency with Int::bitWidth. +/// Number of bits per value. The only accepted widths are 128 and 256. +/// We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) MutateBitWidth(n int32) bool { return rcv._tab.MutateInt32Slot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryBatch.go b/go/arrow/internal/flatbuf/DictionaryBatch.go index 999c5fda463..25b5384e46a 100644 --- a/go/arrow/internal/flatbuf/DictionaryBatch.go +++ b/go/arrow/internal/flatbuf/DictionaryBatch.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / For sending dictionary encoding information. Any Field can be -// / dictionary-encoded, but in this case none of its children may be -// / dictionary-encoded. -// / There is one vector / column per dictionary, but that vector / column -// / may be spread across multiple dictionary batches by using the isDelta -// / flag +/// For sending dictionary encoding information. Any Field can be +/// dictionary-encoded, but in this case none of its children may be +/// dictionary-encoded. +/// There is one vector / column per dictionary, but that vector / column +/// may be spread across multiple dictionary batches by using the isDelta +/// flag type DictionaryBatch struct { _tab flatbuffers.Table } @@ -73,9 +73,9 @@ func (rcv *DictionaryBatch) Data(obj *RecordBatch) *RecordBatch { return nil } -// / If isDelta is true the values in the dictionary are to be appended to a -// / dictionary with the indicated id. If isDelta is false this dictionary -// / should replace the existing dictionary. +/// If isDelta is true the values in the dictionary are to be appended to a +/// dictionary with the indicated id. If isDelta is false this dictionary +/// should replace the existing dictionary. func (rcv *DictionaryBatch) IsDelta() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,9 +84,9 @@ func (rcv *DictionaryBatch) IsDelta() bool { return false } -// / If isDelta is true the values in the dictionary are to be appended to a -// / dictionary with the indicated id. If isDelta is false this dictionary -// / should replace the existing dictionary. +/// If isDelta is true the values in the dictionary are to be appended to a +/// dictionary with the indicated id. If isDelta is false this dictionary +/// should replace the existing dictionary. func (rcv *DictionaryBatch) MutateIsDelta(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryEncoding.go b/go/arrow/internal/flatbuf/DictionaryEncoding.go index 44c3874219f..a9b09530b2a 100644 --- a/go/arrow/internal/flatbuf/DictionaryEncoding.go +++ b/go/arrow/internal/flatbuf/DictionaryEncoding.go @@ -42,9 +42,9 @@ func (rcv *DictionaryEncoding) Table() flatbuffers.Table { return rcv._tab } -// / The known dictionary id in the application where this data is used. In -// / the file or streaming formats, the dictionary ids are found in the -// / DictionaryBatch messages +/// The known dictionary id in the application where this data is used. In +/// the file or streaming formats, the dictionary ids are found in the +/// DictionaryBatch messages func (rcv *DictionaryEncoding) Id() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -53,18 +53,18 @@ func (rcv *DictionaryEncoding) Id() int64 { return 0 } -// / The known dictionary id in the application where this data is used. In -// / the file or streaming formats, the dictionary ids are found in the -// / DictionaryBatch messages +/// The known dictionary id in the application where this data is used. In +/// the file or streaming formats, the dictionary ids are found in the +/// DictionaryBatch messages func (rcv *DictionaryEncoding) MutateId(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -// / The dictionary indices are constrained to be non-negative integers. If -// / this field is null, the indices must be signed int32. To maximize -// / cross-language compatibility and performance, implementations are -// / recommended to prefer signed integer types over unsigned integer types -// / and to avoid uint64 indices unless they are required by an application. +/// The dictionary indices are constrained to be non-negative integers. If +/// this field is null, the indices must be signed int32. To maximize +/// cross-language compatibility and performance, implementations are +/// recommended to prefer signed integer types over unsigned integer types +/// and to avoid uint64 indices unless they are required by an application. func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -78,15 +78,15 @@ func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { return nil } -// / The dictionary indices are constrained to be non-negative integers. If -// / this field is null, the indices must be signed int32. To maximize -// / cross-language compatibility and performance, implementations are -// / recommended to prefer signed integer types over unsigned integer types -// / and to avoid uint64 indices unless they are required by an application. -// / By default, dictionaries are not ordered, or the order does not have -// / semantic meaning. In some statistical, applications, dictionary-encoding -// / is used to represent ordered categorical data, and we provide a way to -// / preserve that metadata here +/// The dictionary indices are constrained to be non-negative integers. If +/// this field is null, the indices must be signed int32. To maximize +/// cross-language compatibility and performance, implementations are +/// recommended to prefer signed integer types over unsigned integer types +/// and to avoid uint64 indices unless they are required by an application. +/// By default, dictionaries are not ordered, or the order does not have +/// semantic meaning. In some statistical, applications, dictionary-encoding +/// is used to represent ordered categorical data, and we provide a way to +/// preserve that metadata here func (rcv *DictionaryEncoding) IsOrdered() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -95,10 +95,10 @@ func (rcv *DictionaryEncoding) IsOrdered() bool { return false } -// / By default, dictionaries are not ordered, or the order does not have -// / semantic meaning. In some statistical, applications, dictionary-encoding -// / is used to represent ordered categorical data, and we provide a way to -// / preserve that metadata here +/// By default, dictionaries are not ordered, or the order does not have +/// semantic meaning. In some statistical, applications, dictionary-encoding +/// is used to represent ordered categorical data, and we provide a way to +/// preserve that metadata here func (rcv *DictionaryEncoding) MutateIsOrdered(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryKind.go b/go/arrow/internal/flatbuf/DictionaryKind.go index 68251005156..126ba5f7f6b 100644 --- a/go/arrow/internal/flatbuf/DictionaryKind.go +++ b/go/arrow/internal/flatbuf/DictionaryKind.go @@ -20,11 +20,11 @@ package flatbuf import "strconv" -// / ---------------------------------------------------------------------- -// / Dictionary encoding metadata -// / Maintained for forwards compatibility, in the future -// / Dictionaries might be explicit maps between integers and values -// / allowing for non-contiguous index values +/// ---------------------------------------------------------------------- +/// Dictionary encoding metadata +/// Maintained for forwards compatibility, in the future +/// Dictionaries might be explicit maps between integers and values +/// allowing for non-contiguous index values type DictionaryKind int16 const ( diff --git a/go/arrow/internal/flatbuf/Endianness.go b/go/arrow/internal/flatbuf/Endianness.go index c9619b7b0d9..cefa2ff9c06 100644 --- a/go/arrow/internal/flatbuf/Endianness.go +++ b/go/arrow/internal/flatbuf/Endianness.go @@ -20,8 +20,8 @@ package flatbuf import "strconv" -// / ---------------------------------------------------------------------- -// / Endianness of the platform producing the data +/// ---------------------------------------------------------------------- +/// Endianness of the platform producing the data type Endianness int16 const ( diff --git a/go/arrow/internal/flatbuf/Feature.go b/go/arrow/internal/flatbuf/Feature.go index 2204c440ed4..ae5a0398b60 100644 --- a/go/arrow/internal/flatbuf/Feature.go +++ b/go/arrow/internal/flatbuf/Feature.go @@ -20,35 +20,35 @@ package flatbuf import "strconv" -// / Represents Arrow Features that might not have full support -// / within implementations. This is intended to be used in -// / two scenarios: -// / 1. A mechanism for readers of Arrow Streams -// / and files to understand that the stream or file makes -// / use of a feature that isn't supported or unknown to -// / the implementation (and therefore can meet the Arrow -// / forward compatibility guarantees). -// / 2. A means of negotiating between a client and server -// / what features a stream is allowed to use. The enums -// / values here are intented to represent higher level -// / features, additional details maybe negotiated -// / with key-value pairs specific to the protocol. -// / -// / Enums added to this list should be assigned power-of-two values -// / to facilitate exchanging and comparing bitmaps for supported -// / features. +/// Represents Arrow Features that might not have full support +/// within implementations. This is intended to be used in +/// two scenarios: +/// 1. A mechanism for readers of Arrow Streams +/// and files to understand that the stream or file makes +/// use of a feature that isn't supported or unknown to +/// the implementation (and therefore can meet the Arrow +/// forward compatibility guarantees). +/// 2. A means of negotiating between a client and server +/// what features a stream is allowed to use. The enums +/// values here are intented to represent higher level +/// features, additional details maybe negotiated +/// with key-value pairs specific to the protocol. +/// +/// Enums added to this list should be assigned power-of-two values +/// to facilitate exchanging and comparing bitmaps for supported +/// features. type Feature int64 const ( /// Needed to make flatbuffers happy. - FeatureUNUSED Feature = 0 + FeatureUNUSED Feature = 0 /// The stream makes use of multiple full dictionaries with the /// same ID and assumes clients implement dictionary replacement /// correctly. FeatureDICTIONARY_REPLACEMENT Feature = 1 /// The stream makes use of compressed bodies as described /// in Message.fbs. - FeatureCOMPRESSED_BODY Feature = 2 + FeatureCOMPRESSED_BODY Feature = 2 ) var EnumNamesFeature = map[Feature]string{ diff --git a/go/arrow/internal/flatbuf/Field.go b/go/arrow/internal/flatbuf/Field.go index 8aed29bc481..c03cf2f878b 100644 --- a/go/arrow/internal/flatbuf/Field.go +++ b/go/arrow/internal/flatbuf/Field.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / A field represents a named column in a record / row batch or child of a -// / nested type. +/// ---------------------------------------------------------------------- +/// A field represents a named column in a record / row batch or child of a +/// nested type. type Field struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *Field) Table() flatbuffers.Table { return rcv._tab } -// / Name is not required, in i.e. a List +/// Name is not required, in i.e. a List func (rcv *Field) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,8 +54,8 @@ func (rcv *Field) Name() []byte { return nil } -// / Name is not required, in i.e. a List -// / Whether or not this field can contain nulls. Should be true in general. +/// Name is not required, in i.e. a List +/// Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) Nullable() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -64,7 +64,7 @@ func (rcv *Field) Nullable() bool { return false } -// / Whether or not this field can contain nulls. Should be true in general. +/// Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) MutateNullable(n bool) bool { return rcv._tab.MutateBoolSlot(6, n) } @@ -81,7 +81,7 @@ func (rcv *Field) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(8, byte(n)) } -// / This is the type of the decoded value if the field is dictionary encoded. +/// This is the type of the decoded value if the field is dictionary encoded. func (rcv *Field) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -91,8 +91,8 @@ func (rcv *Field) Type(obj *flatbuffers.Table) bool { return false } -// / This is the type of the decoded value if the field is dictionary encoded. -// / Present only if the field is dictionary encoded. +/// This is the type of the decoded value if the field is dictionary encoded. +/// Present only if the field is dictionary encoded. func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -106,9 +106,9 @@ func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { return nil } -// / Present only if the field is dictionary encoded. -// / children apply only to nested data types like Struct, List and Union. For -// / primitive types children will have length 0. +/// Present only if the field is dictionary encoded. +/// children apply only to nested data types like Struct, List and Union. For +/// primitive types children will have length 0. func (rcv *Field) Children(obj *Field, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -129,9 +129,9 @@ func (rcv *Field) ChildrenLength() int { return 0 } -// / children apply only to nested data types like Struct, List and Union. For -// / primitive types children will have length 0. -// / User-defined metadata +/// children apply only to nested data types like Struct, List and Union. For +/// primitive types children will have length 0. +/// User-defined metadata func (rcv *Field) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -152,7 +152,7 @@ func (rcv *Field) CustomMetadataLength() int { return 0 } -// / User-defined metadata +/// User-defined metadata func FieldStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/FieldNode.go b/go/arrow/internal/flatbuf/FieldNode.go index 0e258a3d2cd..606b30bfebb 100644 --- a/go/arrow/internal/flatbuf/FieldNode.go +++ b/go/arrow/internal/flatbuf/FieldNode.go @@ -22,15 +22,15 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / Data structures for describing a table row batch (a collection of -// / equal-length Arrow arrays) -// / Metadata about a field at some level of a nested type tree (but not -// / its children). -// / -// / For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` -// / would have {length: 5, null_count: 2} for its List node, and {length: 6, -// / null_count: 0} for its Int16 node, as separate FieldNode structs +/// ---------------------------------------------------------------------- +/// Data structures for describing a table row batch (a collection of +/// equal-length Arrow arrays) +/// Metadata about a field at some level of a nested type tree (but not +/// its children). +/// +/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +/// would have {length: 5, null_count: 2} for its List node, and {length: 6, +/// null_count: 0} for its Int16 node, as separate FieldNode structs type FieldNode struct { _tab flatbuffers.Struct } @@ -44,28 +44,26 @@ func (rcv *FieldNode) Table() flatbuffers.Table { return rcv._tab.Table } -// / The number of value slots in the Arrow array at this level of a nested -// / tree +/// The number of value slots in the Arrow array at this level of a nested +/// tree func (rcv *FieldNode) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } - -// / The number of value slots in the Arrow array at this level of a nested -// / tree +/// The number of value slots in the Arrow array at this level of a nested +/// tree func (rcv *FieldNode) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -// / The number of observed nulls. Fields with null_count == 0 may choose not -// / to write their physical validity bitmap out as a materialized buffer, -// / instead setting the length of the bitmap buffer to 0. +/// The number of observed nulls. Fields with null_count == 0 may choose not +/// to write their physical validity bitmap out as a materialized buffer, +/// instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) NullCount() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } - -// / The number of observed nulls. Fields with null_count == 0 may choose not -// / to write their physical validity bitmap out as a materialized buffer, -// / instead setting the length of the bitmap buffer to 0. +/// The number of observed nulls. Fields with null_count == 0 may choose not +/// to write their physical validity bitmap out as a materialized buffer, +/// instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) MutateNullCount(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeBinary.go b/go/arrow/internal/flatbuf/FixedSizeBinary.go index 2725dfb90b9..4e660d5077f 100644 --- a/go/arrow/internal/flatbuf/FixedSizeBinary.go +++ b/go/arrow/internal/flatbuf/FixedSizeBinary.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeBinary) Table() flatbuffers.Table { return rcv._tab } -// / Number of bytes per value +/// Number of bytes per value func (rcv *FixedSizeBinary) ByteWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeBinary) ByteWidth() int32 { return 0 } -// / Number of bytes per value +/// Number of bytes per value func (rcv *FixedSizeBinary) MutateByteWidth(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeList.go b/go/arrow/internal/flatbuf/FixedSizeList.go index 534ca27f2fe..dabf5cc8581 100644 --- a/go/arrow/internal/flatbuf/FixedSizeList.go +++ b/go/arrow/internal/flatbuf/FixedSizeList.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeList) Table() flatbuffers.Table { return rcv._tab } -// / Number of list items per value +/// Number of list items per value func (rcv *FixedSizeList) ListSize() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeList) ListSize() int32 { return 0 } -// / Number of list items per value +/// Number of list items per value func (rcv *FixedSizeList) MutateListSize(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/Footer.go b/go/arrow/internal/flatbuf/Footer.go index d65af41e7f6..65b0ff09546 100644 --- a/go/arrow/internal/flatbuf/Footer.go +++ b/go/arrow/internal/flatbuf/Footer.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / Arrow File metadata -// / +/// ---------------------------------------------------------------------- +/// Arrow File metadata +/// type Footer struct { _tab flatbuffers.Table } @@ -108,7 +108,7 @@ func (rcv *Footer) RecordBatchesLength() int { return 0 } -// / User-defined metadata +/// User-defined metadata func (rcv *Footer) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -129,7 +129,7 @@ func (rcv *Footer) CustomMetadataLength() int { return 0 } -// / User-defined metadata +/// User-defined metadata func FooterStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/KeyValue.go b/go/arrow/internal/flatbuf/KeyValue.go index 0cd5dc62923..c1b85318ecd 100644 --- a/go/arrow/internal/flatbuf/KeyValue.go +++ b/go/arrow/internal/flatbuf/KeyValue.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / user defined key value pairs to add custom metadata to arrow -// / key namespacing is the responsibility of the user +/// ---------------------------------------------------------------------- +/// user defined key value pairs to add custom metadata to arrow +/// key namespacing is the responsibility of the user type KeyValue struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeBinary.go b/go/arrow/internal/flatbuf/LargeBinary.go index b25ecc41aff..2c3befcc16f 100644 --- a/go/arrow/internal/flatbuf/LargeBinary.go +++ b/go/arrow/internal/flatbuf/LargeBinary.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Same as Binary, but with 64-bit offsets, allowing to represent -// / extremely large data values. +/// Same as Binary, but with 64-bit offsets, allowing to represent +/// extremely large data values. type LargeBinary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeList.go b/go/arrow/internal/flatbuf/LargeList.go index d8bfb9c07df..92f22845874 100644 --- a/go/arrow/internal/flatbuf/LargeList.go +++ b/go/arrow/internal/flatbuf/LargeList.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Same as List, but with 64-bit offsets, allowing to represent -// / extremely large data values. +/// Same as List, but with 64-bit offsets, allowing to represent +/// extremely large data values. type LargeList struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeListView.go b/go/arrow/internal/flatbuf/LargeListView.go index 4608c1dec53..5b1df149cd1 100644 --- a/go/arrow/internal/flatbuf/LargeListView.go +++ b/go/arrow/internal/flatbuf/LargeListView.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Same as ListView, but with 64-bit offsets and sizes, allowing to represent -// / extremely large data values. +/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent +/// extremely large data values. type LargeListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeUtf8.go b/go/arrow/internal/flatbuf/LargeUtf8.go index 4478fed856e..e78b33e1100 100644 --- a/go/arrow/internal/flatbuf/LargeUtf8.go +++ b/go/arrow/internal/flatbuf/LargeUtf8.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Same as Utf8, but with 64-bit offsets, allowing to represent -// / extremely large data values. +/// Same as Utf8, but with 64-bit offsets, allowing to represent +/// extremely large data values. type LargeUtf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/ListView.go b/go/arrow/internal/flatbuf/ListView.go index cde43cf5b68..46b1e0b3cbf 100644 --- a/go/arrow/internal/flatbuf/ListView.go +++ b/go/arrow/internal/flatbuf/ListView.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Represents the same logical types that List can, but contains offsets and -// / sizes allowing for writes in any order and sharing of child values among -// / list values. +/// Represents the same logical types that List can, but contains offsets and +/// sizes allowing for writes in any order and sharing of child values among +/// list values. type ListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Map.go b/go/arrow/internal/flatbuf/Map.go index d4871e55819..8802aba1ebd 100644 --- a/go/arrow/internal/flatbuf/Map.go +++ b/go/arrow/internal/flatbuf/Map.go @@ -22,31 +22,31 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / A Map is a logical nested type that is represented as -// / -// / List> -// / -// / In this layout, the keys and values are each respectively contiguous. We do -// / not constrain the key and value types, so the application is responsible -// / for ensuring that the keys are hashable and unique. Whether the keys are sorted -// / may be set in the metadata for this field. -// / -// / In a field with Map type, the field has a child Struct field, which then -// / has two children: key type and the second the value type. The names of the -// / child fields may be respectively "entries", "key", and "value", but this is -// / not enforced. -// / -// / Map -// / ```text -// / - child[0] entries: Struct -// / - child[0] key: K -// / - child[1] value: V -// / ``` -// / Neither the "entries" field nor the "key" field may be nullable. -// / -// / The metadata is structured so that Arrow systems without special handling -// / for Map can make Map an alias for List. The "layout" attribute for the Map -// / field must have the same contents as a List. +/// A Map is a logical nested type that is represented as +/// +/// List> +/// +/// In this layout, the keys and values are each respectively contiguous. We do +/// not constrain the key and value types, so the application is responsible +/// for ensuring that the keys are hashable and unique. Whether the keys are sorted +/// may be set in the metadata for this field. +/// +/// In a field with Map type, the field has a child Struct field, which then +/// has two children: key type and the second the value type. The names of the +/// child fields may be respectively "entries", "key", and "value", but this is +/// not enforced. +/// +/// Map +/// ```text +/// - child[0] entries: Struct +/// - child[0] key: K +/// - child[1] value: V +/// ``` +/// Neither the "entries" field nor the "key" field may be nullable. +/// +/// The metadata is structured so that Arrow systems without special handling +/// for Map can make Map an alias for List. The "layout" attribute for the Map +/// field must have the same contents as a List. type Map struct { _tab flatbuffers.Table } @@ -67,7 +67,7 @@ func (rcv *Map) Table() flatbuffers.Table { return rcv._tab } -// / Set to true if the keys within each value are sorted +/// Set to true if the keys within each value are sorted func (rcv *Map) KeysSorted() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -76,7 +76,7 @@ func (rcv *Map) KeysSorted() bool { return false } -// / Set to true if the keys within each value are sorted +/// Set to true if the keys within each value are sorted func (rcv *Map) MutateKeysSorted(n bool) bool { return rcv._tab.MutateBoolSlot(4, n) } diff --git a/go/arrow/internal/flatbuf/MessageHeader.go b/go/arrow/internal/flatbuf/MessageHeader.go index d7f9907c7a7..c12fc105811 100644 --- a/go/arrow/internal/flatbuf/MessageHeader.go +++ b/go/arrow/internal/flatbuf/MessageHeader.go @@ -20,14 +20,14 @@ package flatbuf import "strconv" -// / ---------------------------------------------------------------------- -// / The root Message type -// / This union enables us to easily send different message types without -// / redundant storage, and in the future we can easily add new message types. -// / -// / Arrow implementations do not need to implement all of the message types, -// / which may include experimental metadata types. For maximum compatibility, -// / it is best to send data using RecordBatch +/// ---------------------------------------------------------------------- +/// The root Message type +/// This union enables us to easily send different message types without +/// redundant storage, and in the future we can easily add new message types. +/// +/// Arrow implementations do not need to implement all of the message types, +/// which may include experimental metadata types. For maximum compatibility, +/// it is best to send data using RecordBatch type MessageHeader byte const ( diff --git a/go/arrow/internal/flatbuf/Null.go b/go/arrow/internal/flatbuf/Null.go index 3b93a1b6ee9..3c3eb4bda36 100644 --- a/go/arrow/internal/flatbuf/Null.go +++ b/go/arrow/internal/flatbuf/Null.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / These are stored in the flatbuffer in the Type union below +/// These are stored in the flatbuffer in the Type union below type Null struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/RecordBatch.go b/go/arrow/internal/flatbuf/RecordBatch.go index 52c72a8a20a..c50f4a6e868 100644 --- a/go/arrow/internal/flatbuf/RecordBatch.go +++ b/go/arrow/internal/flatbuf/RecordBatch.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / A data header describing the shared memory layout of a "record" or "row" -// / batch. Some systems call this a "row batch" internally and others a "record -// / batch". +/// A data header describing the shared memory layout of a "record" or "row" +/// batch. Some systems call this a "row batch" internally and others a "record +/// batch". type RecordBatch struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *RecordBatch) Table() flatbuffers.Table { return rcv._tab } -// / number of records / rows. The arrays in the batch should all have this -// / length +/// number of records / rows. The arrays in the batch should all have this +/// length func (rcv *RecordBatch) Length() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *RecordBatch) Length() int64 { return 0 } -// / number of records / rows. The arrays in the batch should all have this -// / length +/// number of records / rows. The arrays in the batch should all have this +/// length func (rcv *RecordBatch) MutateLength(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -// / Nodes correspond to the pre-ordered flattened logical schema +/// Nodes correspond to the pre-ordered flattened logical schema func (rcv *RecordBatch) Nodes(obj *FieldNode, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -81,13 +81,13 @@ func (rcv *RecordBatch) NodesLength() int { return 0 } -// / Nodes correspond to the pre-ordered flattened logical schema -// / Buffers correspond to the pre-ordered flattened buffer tree -// / -// / The number of buffers appended to this list depends on the schema. For -// / example, most primitive arrays will have 2 buffers, 1 for the validity -// / bitmap and 1 for the values. For struct arrays, there will only be a -// / single buffer for the validity (nulls) bitmap +/// Nodes correspond to the pre-ordered flattened logical schema +/// Buffers correspond to the pre-ordered flattened buffer tree +/// +/// The number of buffers appended to this list depends on the schema. For +/// example, most primitive arrays will have 2 buffers, 1 for the validity +/// bitmap and 1 for the values. For struct arrays, there will only be a +/// single buffer for the validity (nulls) bitmap func (rcv *RecordBatch) Buffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -107,13 +107,13 @@ func (rcv *RecordBatch) BuffersLength() int { return 0 } -// / Buffers correspond to the pre-ordered flattened buffer tree -// / -// / The number of buffers appended to this list depends on the schema. For -// / example, most primitive arrays will have 2 buffers, 1 for the validity -// / bitmap and 1 for the values. For struct arrays, there will only be a -// / single buffer for the validity (nulls) bitmap -// / Optional compression of the message body +/// Buffers correspond to the pre-ordered flattened buffer tree +/// +/// The number of buffers appended to this list depends on the schema. For +/// example, most primitive arrays will have 2 buffers, 1 for the validity +/// bitmap and 1 for the values. For struct arrays, there will only be a +/// single buffer for the validity (nulls) bitmap +/// Optional compression of the message body func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -127,21 +127,21 @@ func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { return nil } -// / Optional compression of the message body -// / Some types such as Utf8View are represented using a variable number of buffers. -// / For each such Field in the pre-ordered flattened logical schema, there will be -// / an entry in variadicBufferCounts to indicate the number of number of variadic -// / buffers which belong to that Field in the current RecordBatch. -// / -// / For example, the schema -// / col1: Struct -// / col2: Utf8View -// / contains two Fields with variadic buffers so variadicBufferCounts will have -// / two entries, the first counting the variadic buffers of `col1.beta` and the -// / second counting `col2`'s. -// / -// / This field may be omitted if and only if the schema contains no Fields with -// / a variable number of buffers, such as BinaryView and Utf8View. +/// Optional compression of the message body +/// Some types such as Utf8View are represented using a variable number of buffers. +/// For each such Field in the pre-ordered flattened logical schema, there will be +/// an entry in variadicBufferCounts to indicate the number of number of variadic +/// buffers which belong to that Field in the current RecordBatch. +/// +/// For example, the schema +/// col1: Struct +/// col2: Utf8View +/// contains two Fields with variadic buffers so variadicBufferCounts will have +/// two entries, the first counting the variadic buffers of `col1.beta` and the +/// second counting `col2`'s. +/// +/// This field may be omitted if and only if the schema contains no Fields with +/// a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) VariadicBufferCounts(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -159,20 +159,20 @@ func (rcv *RecordBatch) VariadicBufferCountsLength() int { return 0 } -// / Some types such as Utf8View are represented using a variable number of buffers. -// / For each such Field in the pre-ordered flattened logical schema, there will be -// / an entry in variadicBufferCounts to indicate the number of number of variadic -// / buffers which belong to that Field in the current RecordBatch. -// / -// / For example, the schema -// / col1: Struct -// / col2: Utf8View -// / contains two Fields with variadic buffers so variadicBufferCounts will have -// / two entries, the first counting the variadic buffers of `col1.beta` and the -// / second counting `col2`'s. -// / -// / This field may be omitted if and only if the schema contains no Fields with -// / a variable number of buffers, such as BinaryView and Utf8View. +/// Some types such as Utf8View are represented using a variable number of buffers. +/// For each such Field in the pre-ordered flattened logical schema, there will be +/// an entry in variadicBufferCounts to indicate the number of number of variadic +/// buffers which belong to that Field in the current RecordBatch. +/// +/// For example, the schema +/// col1: Struct +/// col2: Utf8View +/// contains two Fields with variadic buffers so variadicBufferCounts will have +/// two entries, the first counting the variadic buffers of `col1.beta` and the +/// second counting `col2`'s. +/// +/// This field may be omitted if and only if the schema contains no Fields with +/// a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) MutateVariadicBufferCounts(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/RunEndEncoded.go b/go/arrow/internal/flatbuf/RunEndEncoded.go index b88460b2e22..fa414c1bf0e 100644 --- a/go/arrow/internal/flatbuf/RunEndEncoded.go +++ b/go/arrow/internal/flatbuf/RunEndEncoded.go @@ -22,11 +22,11 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Contains two child arrays, run_ends and values. -// / The run_ends child array must be a 16/32/64-bit integer array -// / which encodes the indices at which the run with the value in -// / each corresponding index in the values child array ends. -// / Like list/struct types, the value array can be of any type. +/// Contains two child arrays, run_ends and values. +/// The run_ends child array must be a 16/32/64-bit integer array +/// which encodes the indices at which the run with the value in +/// each corresponding index in the values child array ends. +/// Like list/struct types, the value array can be of any type. type RunEndEncoded struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Schema.go b/go/arrow/internal/flatbuf/Schema.go index ae5b248a766..4ee5ecc9e5e 100644 --- a/go/arrow/internal/flatbuf/Schema.go +++ b/go/arrow/internal/flatbuf/Schema.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / A Schema describes the columns in a row batch +/// ---------------------------------------------------------------------- +/// A Schema describes the columns in a row batch type Schema struct { _tab flatbuffers.Table } @@ -44,9 +44,9 @@ func (rcv *Schema) Table() flatbuffers.Table { return rcv._tab } -// / endianness of the buffer -// / it is Little Endian by default -// / if endianness doesn't match the underlying system then the vectors need to be converted +/// endianness of the buffer +/// it is Little Endian by default +/// if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) Endianness() Endianness { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,9 +55,9 @@ func (rcv *Schema) Endianness() Endianness { return 0 } -// / endianness of the buffer -// / it is Little Endian by default -// / if endianness doesn't match the underlying system then the vectors need to be converted +/// endianness of the buffer +/// it is Little Endian by default +/// if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) MutateEndianness(n Endianness) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } @@ -102,7 +102,7 @@ func (rcv *Schema) CustomMetadataLength() int { return 0 } -// / Features used in the stream/file. +/// Features used in the stream/file. func (rcv *Schema) Features(j int) Feature { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -120,7 +120,7 @@ func (rcv *Schema) FeaturesLength() int { return 0 } -// / Features used in the stream/file. +/// Features used in the stream/file. func (rcv *Schema) MutateFeatures(j int, n Feature) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go index 2477af10035..de8217650b2 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Compressed Sparse Row format, that is matrix-specific. +/// Compressed Sparse Row format, that is matrix-specific. type SparseMatrixIndexCSR struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSR) Table() flatbuffers.Table { return rcv._tab } -// / The type of values in indptrBuffer +/// The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -57,29 +57,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { return nil } -// / The type of values in indptrBuffer -// / indptrBuffer stores the location and size of indptr array that -// / represents the range of the rows. -// / The i-th row spans from indptr[i] to indptr[i+1] in the data. -// / The length of this array is 1 + (the number of rows), and the type -// / of index value is long. -// / -// / For example, let X be the following 6x4 matrix: -// / -// / X := [[0, 1, 2, 0], -// / [0, 0, 3, 0], -// / [0, 4, 0, 5], -// / [0, 0, 0, 0], -// / [6, 0, 7, 8], -// / [0, 9, 0, 0]]. -// / -// / The array of non-zero values in X is: -// / -// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -// / -// / And the indptr of X is: -// / -// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +/// The type of values in indptrBuffer +/// indptrBuffer stores the location and size of indptr array that +/// represents the range of the rows. +/// The i-th row spans from indptr[i] to indptr[i+1] in the data. +/// The length of this array is 1 + (the number of rows), and the type +/// of index value is long. +/// +/// For example, let X be the following 6x4 matrix: +/// +/// X := [[0, 1, 2, 0], +/// [0, 0, 3, 0], +/// [0, 4, 0, 5], +/// [0, 0, 0, 0], +/// [6, 0, 7, 8], +/// [0, 9, 0, 0]]. +/// +/// The array of non-zero values in X is: +/// +/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +/// +/// And the indptr of X is: +/// +/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -93,29 +93,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { return nil } -// / indptrBuffer stores the location and size of indptr array that -// / represents the range of the rows. -// / The i-th row spans from indptr[i] to indptr[i+1] in the data. -// / The length of this array is 1 + (the number of rows), and the type -// / of index value is long. -// / -// / For example, let X be the following 6x4 matrix: -// / -// / X := [[0, 1, 2, 0], -// / [0, 0, 3, 0], -// / [0, 4, 0, 5], -// / [0, 0, 0, 0], -// / [6, 0, 7, 8], -// / [0, 9, 0, 0]]. -// / -// / The array of non-zero values in X is: -// / -// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -// / -// / And the indptr of X is: -// / -// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -// / The type of values in indicesBuffer +/// indptrBuffer stores the location and size of indptr array that +/// represents the range of the rows. +/// The i-th row spans from indptr[i] to indptr[i+1] in the data. +/// The length of this array is 1 + (the number of rows), and the type +/// of index value is long. +/// +/// For example, let X be the following 6x4 matrix: +/// +/// X := [[0, 1, 2, 0], +/// [0, 0, 3, 0], +/// [0, 4, 0, 5], +/// [0, 0, 0, 0], +/// [6, 0, 7, 8], +/// [0, 9, 0, 0]]. +/// +/// The array of non-zero values in X is: +/// +/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +/// +/// And the indptr of X is: +/// +/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +/// The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -129,16 +129,16 @@ func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { return nil } -// / The type of values in indicesBuffer -// / indicesBuffer stores the location and size of the array that -// / contains the column indices of the corresponding non-zero values. -// / The type of index value is long. -// / -// / For example, the indices of the above X is: -// / -// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -// / -// / Note that the indices are sorted in lexicographical order for each row. +/// The type of values in indicesBuffer +/// indicesBuffer stores the location and size of the array that +/// contains the column indices of the corresponding non-zero values. +/// The type of index value is long. +/// +/// For example, the indices of the above X is: +/// +/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +/// +/// Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -152,15 +152,15 @@ func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { return nil } -// / indicesBuffer stores the location and size of the array that -// / contains the column indices of the corresponding non-zero values. -// / The type of index value is long. -// / -// / For example, the indices of the above X is: -// / -// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -// / -// / Note that the indices are sorted in lexicographical order for each row. +/// indicesBuffer stores the location and size of the array that +/// contains the column indices of the corresponding non-zero values. +/// The type of index value is long. +/// +/// For example, the indices of the above X is: +/// +/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +/// +/// Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSRStart(builder *flatbuffers.Builder) { builder.StartObject(4) } diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go index 7f262deedbf..c28cc5d082f 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Compressed Sparse format, that is matrix-specific. +/// Compressed Sparse format, that is matrix-specific. type SparseMatrixIndexCSX struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSX) Table() flatbuffers.Table { return rcv._tab } -// / Which axis, row or column, is compressed +/// Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -52,12 +52,12 @@ func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { return 0 } -// / Which axis, row or column, is compressed +/// Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) MutateCompressedAxis(n SparseMatrixCompressedAxis) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -// / The type of values in indptrBuffer +/// The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -71,30 +71,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { return nil } -// / The type of values in indptrBuffer -// / indptrBuffer stores the location and size of indptr array that -// / represents the range of the rows. -// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -// / The length of this array is 1 + (the number of rows), and the type -// / of index value is long. -// / -// / For example, let X be the following 6x4 matrix: -// / ```text -// / X := [[0, 1, 2, 0], -// / [0, 0, 3, 0], -// / [0, 4, 0, 5], -// / [0, 0, 0, 0], -// / [6, 0, 7, 8], -// / [0, 9, 0, 0]]. -// / ``` -// / The array of non-zero values in X is: -// / ```text -// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -// / ``` -// / And the indptr of X is: -// / ```text -// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -// / ``` +/// The type of values in indptrBuffer +/// indptrBuffer stores the location and size of indptr array that +/// represents the range of the rows. +/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +/// The length of this array is 1 + (the number of rows), and the type +/// of index value is long. +/// +/// For example, let X be the following 6x4 matrix: +/// ```text +/// X := [[0, 1, 2, 0], +/// [0, 0, 3, 0], +/// [0, 4, 0, 5], +/// [0, 0, 0, 0], +/// [6, 0, 7, 8], +/// [0, 9, 0, 0]]. +/// ``` +/// The array of non-zero values in X is: +/// ```text +/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +/// ``` +/// And the indptr of X is: +/// ```text +/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +/// ``` func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -108,30 +108,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { return nil } -// / indptrBuffer stores the location and size of indptr array that -// / represents the range of the rows. -// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -// / The length of this array is 1 + (the number of rows), and the type -// / of index value is long. -// / -// / For example, let X be the following 6x4 matrix: -// / ```text -// / X := [[0, 1, 2, 0], -// / [0, 0, 3, 0], -// / [0, 4, 0, 5], -// / [0, 0, 0, 0], -// / [6, 0, 7, 8], -// / [0, 9, 0, 0]]. -// / ``` -// / The array of non-zero values in X is: -// / ```text -// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -// / ``` -// / And the indptr of X is: -// / ```text -// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -// / ``` -// / The type of values in indicesBuffer +/// indptrBuffer stores the location and size of indptr array that +/// represents the range of the rows. +/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +/// The length of this array is 1 + (the number of rows), and the type +/// of index value is long. +/// +/// For example, let X be the following 6x4 matrix: +/// ```text +/// X := [[0, 1, 2, 0], +/// [0, 0, 3, 0], +/// [0, 4, 0, 5], +/// [0, 0, 0, 0], +/// [6, 0, 7, 8], +/// [0, 9, 0, 0]]. +/// ``` +/// The array of non-zero values in X is: +/// ```text +/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +/// ``` +/// And the indptr of X is: +/// ```text +/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +/// ``` +/// The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -145,16 +145,16 @@ func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { return nil } -// / The type of values in indicesBuffer -// / indicesBuffer stores the location and size of the array that -// / contains the column indices of the corresponding non-zero values. -// / The type of index value is long. -// / -// / For example, the indices of the above X is: -// / ```text -// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -// / ``` -// / Note that the indices are sorted in lexicographical order for each row. +/// The type of values in indicesBuffer +/// indicesBuffer stores the location and size of the array that +/// contains the column indices of the corresponding non-zero values. +/// The type of index value is long. +/// +/// For example, the indices of the above X is: +/// ```text +/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +/// ``` +/// Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -168,15 +168,15 @@ func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { return nil } -// / indicesBuffer stores the location and size of the array that -// / contains the column indices of the corresponding non-zero values. -// / The type of index value is long. -// / -// / For example, the indices of the above X is: -// / ```text -// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -// / ``` -// / Note that the indices are sorted in lexicographical order for each row. +/// indicesBuffer stores the location and size of the array that +/// contains the column indices of the corresponding non-zero values. +/// The type of index value is long. +/// +/// For example, the indices of the above X is: +/// ```text +/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +/// ``` +/// Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSXStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/SparseTensor.go b/go/arrow/internal/flatbuf/SparseTensor.go index 8f67e1fc08b..6f3f55797d7 100644 --- a/go/arrow/internal/flatbuf/SparseTensor.go +++ b/go/arrow/internal/flatbuf/SparseTensor.go @@ -54,9 +54,9 @@ func (rcv *SparseTensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -// / The type of data contained in a value cell. -// / Currently only fixed-width value types are supported, -// / no strings or nested types. +/// The type of data contained in a value cell. +/// Currently only fixed-width value types are supported, +/// no strings or nested types. func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -66,10 +66,10 @@ func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { return false } -// / The type of data contained in a value cell. -// / Currently only fixed-width value types are supported, -// / no strings or nested types. -// / The dimensions of the tensor, optionally named. +/// The type of data contained in a value cell. +/// Currently only fixed-width value types are supported, +/// no strings or nested types. +/// The dimensions of the tensor, optionally named. func (rcv *SparseTensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -90,8 +90,8 @@ func (rcv *SparseTensor) ShapeLength() int { return 0 } -// / The dimensions of the tensor, optionally named. -// / The number of non-zero values in a sparse tensor. +/// The dimensions of the tensor, optionally named. +/// The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) NonZeroLength() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -100,7 +100,7 @@ func (rcv *SparseTensor) NonZeroLength() int64 { return 0 } -// / The number of non-zero values in a sparse tensor. +/// The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) MutateNonZeroLength(n int64) bool { return rcv._tab.MutateInt64Slot(10, n) } @@ -117,7 +117,7 @@ func (rcv *SparseTensor) MutateSparseIndexType(n SparseTensorIndex) bool { return rcv._tab.MutateByteSlot(12, byte(n)) } -// / Sparse tensor index +/// Sparse tensor index func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -127,8 +127,8 @@ func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { return false } -// / Sparse tensor index -// / The location and size of the tensor's data +/// Sparse tensor index +/// The location and size of the tensor's data func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -142,7 +142,7 @@ func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { return nil } -// / The location and size of the tensor's data +/// The location and size of the tensor's data func SparseTensorStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go index bf1c218e2e4..f8eee99fa69 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go @@ -22,38 +22,38 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / EXPERIMENTAL: Data structures for sparse tensors -// / Coordinate (COO) format of sparse tensor index. -// / -// / COO's index list are represented as a NxM matrix, -// / where N is the number of non-zero values, -// / and M is the number of dimensions of a sparse tensor. -// / -// / indicesBuffer stores the location and size of the data of this indices -// / matrix. The value type and the stride of the indices matrix is -// / specified in indicesType and indicesStrides fields. -// / -// / For example, let X be a 2x3x4x5 tensor, and it has the following -// / 6 non-zero values: -// / ```text -// / X[0, 1, 2, 0] := 1 -// / X[1, 1, 2, 3] := 2 -// / X[0, 2, 1, 0] := 3 -// / X[0, 1, 3, 0] := 4 -// / X[0, 1, 2, 1] := 5 -// / X[1, 2, 0, 4] := 6 -// / ``` -// / In COO format, the index matrix of X is the following 4x6 matrix: -// / ```text -// / [[0, 0, 0, 0, 1, 1], -// / [1, 1, 1, 2, 1, 2], -// / [2, 2, 3, 1, 2, 0], -// / [0, 1, 0, 0, 3, 4]] -// / ``` -// / When isCanonical is true, the indices is sorted in lexicographical order -// / (row-major order), and it does not have duplicated entries. Otherwise, -// / the indices may not be sorted, or may have duplicated entries. +/// ---------------------------------------------------------------------- +/// EXPERIMENTAL: Data structures for sparse tensors +/// Coordinate (COO) format of sparse tensor index. +/// +/// COO's index list are represented as a NxM matrix, +/// where N is the number of non-zero values, +/// and M is the number of dimensions of a sparse tensor. +/// +/// indicesBuffer stores the location and size of the data of this indices +/// matrix. The value type and the stride of the indices matrix is +/// specified in indicesType and indicesStrides fields. +/// +/// For example, let X be a 2x3x4x5 tensor, and it has the following +/// 6 non-zero values: +/// ```text +/// X[0, 1, 2, 0] := 1 +/// X[1, 1, 2, 3] := 2 +/// X[0, 2, 1, 0] := 3 +/// X[0, 1, 3, 0] := 4 +/// X[0, 1, 2, 1] := 5 +/// X[1, 2, 0, 4] := 6 +/// ``` +/// In COO format, the index matrix of X is the following 4x6 matrix: +/// ```text +/// [[0, 0, 0, 0, 1, 1], +/// [1, 1, 1, 2, 1, 2], +/// [2, 2, 3, 1, 2, 0], +/// [0, 1, 0, 0, 3, 4]] +/// ``` +/// When isCanonical is true, the indices is sorted in lexicographical order +/// (row-major order), and it does not have duplicated entries. Otherwise, +/// the indices may not be sorted, or may have duplicated entries. type SparseTensorIndexCOO struct { _tab flatbuffers.Table } @@ -74,7 +74,7 @@ func (rcv *SparseTensorIndexCOO) Table() flatbuffers.Table { return rcv._tab } -// / The type of values in indicesBuffer +/// The type of values in indicesBuffer func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { return nil } -// / The type of values in indicesBuffer -// / Non-negative byte offsets to advance one value cell along each dimension -// / If omitted, default to row-major order (C-like). +/// The type of values in indicesBuffer +/// Non-negative byte offsets to advance one value cell along each dimension +/// If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) IndicesStrides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *SparseTensorIndexCOO) IndicesStridesLength() int { return 0 } -// / Non-negative byte offsets to advance one value cell along each dimension -// / If omitted, default to row-major order (C-like). +/// Non-negative byte offsets to advance one value cell along each dimension +/// If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { return false } -// / The location and size of the indices matrix's data +/// The location and size of the indices matrix's data func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -133,12 +133,12 @@ func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { return nil } -// / The location and size of the indices matrix's data -// / This flag is true if and only if the indices matrix is sorted in -// / row-major order, and does not have duplicated entries. -// / This sort order is the same as of Tensorflow's SparseTensor, -// / but it is inverse order of SciPy's canonical coo_matrix -// / (SciPy employs column-major order for its coo_matrix). +/// The location and size of the indices matrix's data +/// This flag is true if and only if the indices matrix is sorted in +/// row-major order, and does not have duplicated entries. +/// This sort order is the same as of Tensorflow's SparseTensor, +/// but it is inverse order of SciPy's canonical coo_matrix +/// (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) IsCanonical() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -147,11 +147,11 @@ func (rcv *SparseTensorIndexCOO) IsCanonical() bool { return false } -// / This flag is true if and only if the indices matrix is sorted in -// / row-major order, and does not have duplicated entries. -// / This sort order is the same as of Tensorflow's SparseTensor, -// / but it is inverse order of SciPy's canonical coo_matrix -// / (SciPy employs column-major order for its coo_matrix). +/// This flag is true if and only if the indices matrix is sorted in +/// row-major order, and does not have duplicated entries. +/// This sort order is the same as of Tensorflow's SparseTensor, +/// but it is inverse order of SciPy's canonical coo_matrix +/// (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) MutateIsCanonical(n bool) bool { return rcv._tab.MutateBoolSlot(10, n) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go index 66226e0412c..a824c84ebfe 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Compressed Sparse Fiber (CSF) sparse tensor index. +/// Compressed Sparse Fiber (CSF) sparse tensor index. type SparseTensorIndexCSF struct { _tab flatbuffers.Table } @@ -43,37 +43,37 @@ func (rcv *SparseTensorIndexCSF) Table() flatbuffers.Table { return rcv._tab } -// / CSF is a generalization of compressed sparse row (CSR) index. -// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -// / -// / CSF index recursively compresses each dimension of a tensor into a set -// / of prefix trees. Each path from a root to leaf forms one tensor -// / non-zero index. CSF is implemented with two arrays of buffers and one -// / arrays of integers. -// / -// / For example, let X be a 2x3x4x5 tensor and let it have the following -// / 8 non-zero values: -// / ```text -// / X[0, 0, 0, 1] := 1 -// / X[0, 0, 0, 2] := 2 -// / X[0, 1, 0, 0] := 3 -// / X[0, 1, 0, 2] := 4 -// / X[0, 1, 1, 0] := 5 -// / X[1, 1, 1, 0] := 6 -// / X[1, 1, 1, 1] := 7 -// / X[1, 1, 1, 2] := 8 -// / ``` -// / As a prefix tree this would be represented as: -// / ```text -// / 0 1 -// / / \ | -// / 0 1 1 -// / / / \ | -// / 0 0 1 1 -// / /| /| | /| | -// / 1 2 0 2 0 0 1 2 -// / ``` -// / The type of values in indptrBuffers +/// CSF is a generalization of compressed sparse row (CSR) index. +/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +/// +/// CSF index recursively compresses each dimension of a tensor into a set +/// of prefix trees. Each path from a root to leaf forms one tensor +/// non-zero index. CSF is implemented with two arrays of buffers and one +/// arrays of integers. +/// +/// For example, let X be a 2x3x4x5 tensor and let it have the following +/// 8 non-zero values: +/// ```text +/// X[0, 0, 0, 1] := 1 +/// X[0, 0, 0, 2] := 2 +/// X[0, 1, 0, 0] := 3 +/// X[0, 1, 0, 2] := 4 +/// X[0, 1, 1, 0] := 5 +/// X[1, 1, 1, 0] := 6 +/// X[1, 1, 1, 1] := 7 +/// X[1, 1, 1, 2] := 8 +/// ``` +/// As a prefix tree this would be represented as: +/// ```text +/// 0 1 +/// / \ | +/// 0 1 1 +/// / / \ | +/// 0 0 1 1 +/// /| /| | /| | +/// 1 2 0 2 0 0 1 2 +/// ``` +/// The type of values in indptrBuffers func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -87,51 +87,51 @@ func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { return nil } -// / CSF is a generalization of compressed sparse row (CSR) index. -// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -// / -// / CSF index recursively compresses each dimension of a tensor into a set -// / of prefix trees. Each path from a root to leaf forms one tensor -// / non-zero index. CSF is implemented with two arrays of buffers and one -// / arrays of integers. -// / -// / For example, let X be a 2x3x4x5 tensor and let it have the following -// / 8 non-zero values: -// / ```text -// / X[0, 0, 0, 1] := 1 -// / X[0, 0, 0, 2] := 2 -// / X[0, 1, 0, 0] := 3 -// / X[0, 1, 0, 2] := 4 -// / X[0, 1, 1, 0] := 5 -// / X[1, 1, 1, 0] := 6 -// / X[1, 1, 1, 1] := 7 -// / X[1, 1, 1, 2] := 8 -// / ``` -// / As a prefix tree this would be represented as: -// / ```text -// / 0 1 -// / / \ | -// / 0 1 1 -// / / / \ | -// / 0 0 1 1 -// / /| /| | /| | -// / 1 2 0 2 0 0 1 2 -// / ``` -// / The type of values in indptrBuffers -// / indptrBuffers stores the sparsity structure. -// / Each two consecutive dimensions in a tensor correspond to a buffer in -// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in -// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -// / -// / For example, the indptrBuffers for the above X is: -// / ```text -// / indptrBuffer(X) = [ -// / [0, 2, 3], -// / [0, 1, 3, 4], -// / [0, 2, 4, 5, 8] -// / ]. -// / ``` +/// CSF is a generalization of compressed sparse row (CSR) index. +/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +/// +/// CSF index recursively compresses each dimension of a tensor into a set +/// of prefix trees. Each path from a root to leaf forms one tensor +/// non-zero index. CSF is implemented with two arrays of buffers and one +/// arrays of integers. +/// +/// For example, let X be a 2x3x4x5 tensor and let it have the following +/// 8 non-zero values: +/// ```text +/// X[0, 0, 0, 1] := 1 +/// X[0, 0, 0, 2] := 2 +/// X[0, 1, 0, 0] := 3 +/// X[0, 1, 0, 2] := 4 +/// X[0, 1, 1, 0] := 5 +/// X[1, 1, 1, 0] := 6 +/// X[1, 1, 1, 1] := 7 +/// X[1, 1, 1, 2] := 8 +/// ``` +/// As a prefix tree this would be represented as: +/// ```text +/// 0 1 +/// / \ | +/// 0 1 1 +/// / / \ | +/// 0 0 1 1 +/// /| /| | /| | +/// 1 2 0 2 0 0 1 2 +/// ``` +/// The type of values in indptrBuffers +/// indptrBuffers stores the sparsity structure. +/// Each two consecutive dimensions in a tensor correspond to a buffer in +/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in +/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +/// +/// For example, the indptrBuffers for the above X is: +/// ```text +/// indptrBuffer(X) = [ +/// [0, 2, 3], +/// [0, 1, 3, 4], +/// [0, 2, 4, 5, 8] +/// ]. +/// ``` func (rcv *SparseTensorIndexCSF) IndptrBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -151,21 +151,21 @@ func (rcv *SparseTensorIndexCSF) IndptrBuffersLength() int { return 0 } -// / indptrBuffers stores the sparsity structure. -// / Each two consecutive dimensions in a tensor correspond to a buffer in -// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in -// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -// / -// / For example, the indptrBuffers for the above X is: -// / ```text -// / indptrBuffer(X) = [ -// / [0, 2, 3], -// / [0, 1, 3, 4], -// / [0, 2, 4, 5, 8] -// / ]. -// / ``` -// / The type of values in indicesBuffers +/// indptrBuffers stores the sparsity structure. +/// Each two consecutive dimensions in a tensor correspond to a buffer in +/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in +/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +/// +/// For example, the indptrBuffers for the above X is: +/// ```text +/// indptrBuffer(X) = [ +/// [0, 2, 3], +/// [0, 1, 3, 4], +/// [0, 2, 4, 5, 8] +/// ]. +/// ``` +/// The type of values in indicesBuffers func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -179,18 +179,18 @@ func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { return nil } -// / The type of values in indicesBuffers -// / indicesBuffers stores values of nodes. -// / Each tensor dimension corresponds to a buffer in indicesBuffers. -// / For example, the indicesBuffers for the above X is: -// / ```text -// / indicesBuffer(X) = [ -// / [0, 1], -// / [0, 1, 1], -// / [0, 0, 1, 1], -// / [1, 2, 0, 2, 0, 0, 1, 2] -// / ]. -// / ``` +/// The type of values in indicesBuffers +/// indicesBuffers stores values of nodes. +/// Each tensor dimension corresponds to a buffer in indicesBuffers. +/// For example, the indicesBuffers for the above X is: +/// ```text +/// indicesBuffer(X) = [ +/// [0, 1], +/// [0, 1, 1], +/// [0, 0, 1, 1], +/// [1, 2, 0, 2, 0, 0, 1, 2] +/// ]. +/// ``` func (rcv *SparseTensorIndexCSF) IndicesBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -210,23 +210,23 @@ func (rcv *SparseTensorIndexCSF) IndicesBuffersLength() int { return 0 } -// / indicesBuffers stores values of nodes. -// / Each tensor dimension corresponds to a buffer in indicesBuffers. -// / For example, the indicesBuffers for the above X is: -// / ```text -// / indicesBuffer(X) = [ -// / [0, 1], -// / [0, 1, 1], -// / [0, 0, 1, 1], -// / [1, 2, 0, 2, 0, 0, 1, 2] -// / ]. -// / ``` -// / axisOrder stores the sequence in which dimensions were traversed to -// / produce the prefix tree. -// / For example, the axisOrder for the above X is: -// / ```text -// / axisOrder(X) = [0, 1, 2, 3]. -// / ``` +/// indicesBuffers stores values of nodes. +/// Each tensor dimension corresponds to a buffer in indicesBuffers. +/// For example, the indicesBuffers for the above X is: +/// ```text +/// indicesBuffer(X) = [ +/// [0, 1], +/// [0, 1, 1], +/// [0, 0, 1, 1], +/// [1, 2, 0, 2, 0, 0, 1, 2] +/// ]. +/// ``` +/// axisOrder stores the sequence in which dimensions were traversed to +/// produce the prefix tree. +/// For example, the axisOrder for the above X is: +/// ```text +/// axisOrder(X) = [0, 1, 2, 3]. +/// ``` func (rcv *SparseTensorIndexCSF) AxisOrder(j int) int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -244,12 +244,12 @@ func (rcv *SparseTensorIndexCSF) AxisOrderLength() int { return 0 } -// / axisOrder stores the sequence in which dimensions were traversed to -// / produce the prefix tree. -// / For example, the axisOrder for the above X is: -// / ```text -// / axisOrder(X) = [0, 1, 2, 3]. -// / ``` +/// axisOrder stores the sequence in which dimensions were traversed to +/// produce the prefix tree. +/// For example, the axisOrder for the above X is: +/// ```text +/// axisOrder(X) = [0, 1, 2, 3]. +/// ``` func (rcv *SparseTensorIndexCSF) MutateAxisOrder(j int, n int32) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/Struct_.go b/go/arrow/internal/flatbuf/Struct_.go index 73752a17e00..427e7060382 100644 --- a/go/arrow/internal/flatbuf/Struct_.go +++ b/go/arrow/internal/flatbuf/Struct_.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / A Struct_ in the flatbuffer metadata is the same as an Arrow Struct -// / (according to the physical memory layout). We used Struct_ here as -// / Struct is a reserved word in Flatbuffers +/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +/// (according to the physical memory layout). We used Struct_ here as +/// Struct is a reserved word in Flatbuffers type Struct_ struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Tensor.go b/go/arrow/internal/flatbuf/Tensor.go index 47bfe8067b5..39d70e351e3 100644 --- a/go/arrow/internal/flatbuf/Tensor.go +++ b/go/arrow/internal/flatbuf/Tensor.go @@ -54,8 +54,8 @@ func (rcv *Tensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -// / The type of data contained in a value cell. Currently only fixed-width -// / value types are supported, no strings or nested types +/// The type of data contained in a value cell. Currently only fixed-width +/// value types are supported, no strings or nested types func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -65,9 +65,9 @@ func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { return false } -// / The type of data contained in a value cell. Currently only fixed-width -// / value types are supported, no strings or nested types -// / The dimensions of the tensor, optionally named +/// The type of data contained in a value cell. Currently only fixed-width +/// value types are supported, no strings or nested types +/// The dimensions of the tensor, optionally named func (rcv *Tensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *Tensor) ShapeLength() int { return 0 } -// / The dimensions of the tensor, optionally named -// / Non-negative byte offsets to advance one value cell along each dimension -// / If omitted, default to row-major order (C-like). +/// The dimensions of the tensor, optionally named +/// Non-negative byte offsets to advance one value cell along each dimension +/// If omitted, default to row-major order (C-like). func (rcv *Tensor) Strides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *Tensor) StridesLength() int { return 0 } -// / Non-negative byte offsets to advance one value cell along each dimension -// / If omitted, default to row-major order (C-like). +/// Non-negative byte offsets to advance one value cell along each dimension +/// If omitted, default to row-major order (C-like). func (rcv *Tensor) MutateStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *Tensor) MutateStrides(j int, n int64) bool { return false } -// / The location and size of the tensor's data +/// The location and size of the tensor's data func (rcv *Tensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -133,7 +133,7 @@ func (rcv *Tensor) Data(obj *Buffer) *Buffer { return nil } -// / The location and size of the tensor's data +/// The location and size of the tensor's data func TensorStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/TensorDim.go b/go/arrow/internal/flatbuf/TensorDim.go index c6413b6a8c0..14b82120887 100644 --- a/go/arrow/internal/flatbuf/TensorDim.go +++ b/go/arrow/internal/flatbuf/TensorDim.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / ---------------------------------------------------------------------- -// / Data structures for dense tensors -// / Shape data for a single axis in a tensor +/// ---------------------------------------------------------------------- +/// Data structures for dense tensors +/// Shape data for a single axis in a tensor type TensorDim struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *TensorDim) Table() flatbuffers.Table { return rcv._tab } -// / Length of dimension +/// Length of dimension func (rcv *TensorDim) Size() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,12 +54,12 @@ func (rcv *TensorDim) Size() int64 { return 0 } -// / Length of dimension +/// Length of dimension func (rcv *TensorDim) MutateSize(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -// / Name of the dimension, optional +/// Name of the dimension, optional func (rcv *TensorDim) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -68,7 +68,7 @@ func (rcv *TensorDim) Name() []byte { return nil } -// / Name of the dimension, optional +/// Name of the dimension, optional func TensorDimStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Time.go b/go/arrow/internal/flatbuf/Time.go index 13038a6e332..2fb6e4c110e 100644 --- a/go/arrow/internal/flatbuf/Time.go +++ b/go/arrow/internal/flatbuf/Time.go @@ -22,20 +22,20 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Time is either a 32-bit or 64-bit signed integer type representing an -// / elapsed time since midnight, stored in either of four units: seconds, -// / milliseconds, microseconds or nanoseconds. -// / -// / The integer `bitWidth` depends on the `unit` and must be one of the following: -// / * SECOND and MILLISECOND: 32 bits -// / * MICROSECOND and NANOSECOND: 64 bits -// / -// / The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds -// / (exclusive), adjusted for the time unit (for example, up to 86400000 -// / exclusive for the MILLISECOND unit). -// / This definition doesn't allow for leap seconds. Time values from -// / measurements with leap seconds will need to be corrected when ingesting -// / into Arrow (for example by replacing the value 86400 with 86399). +/// Time is either a 32-bit or 64-bit signed integer type representing an +/// elapsed time since midnight, stored in either of four units: seconds, +/// milliseconds, microseconds or nanoseconds. +/// +/// The integer `bitWidth` depends on the `unit` and must be one of the following: +/// * SECOND and MILLISECOND: 32 bits +/// * MICROSECOND and NANOSECOND: 64 bits +/// +/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +/// (exclusive), adjusted for the time unit (for example, up to 86400000 +/// exclusive for the MILLISECOND unit). +/// This definition doesn't allow for leap seconds. Time values from +/// measurements with leap seconds will need to be corrected when ingesting +/// into Arrow (for example by replacing the value 86400 with 86399). type Time struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Timestamp.go b/go/arrow/internal/flatbuf/Timestamp.go index ce172bacdd3..d0058e13e65 100644 --- a/go/arrow/internal/flatbuf/Timestamp.go +++ b/go/arrow/internal/flatbuf/Timestamp.go @@ -22,111 +22,111 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Timestamp is a 64-bit signed integer representing an elapsed time since a -// / fixed epoch, stored in either of four units: seconds, milliseconds, -// / microseconds or nanoseconds, and is optionally annotated with a timezone. -// / -// / Timestamp values do not include any leap seconds (in other words, all -// / days are considered 86400 seconds long). -// / -// / Timestamps with a non-empty timezone -// / ------------------------------------ -// / -// / If a Timestamp column has a non-empty timezone value, its epoch is -// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone -// / (the Unix epoch), regardless of the Timestamp's own timezone. -// / -// / Therefore, timestamp values with a non-empty timezone correspond to -// / physical points in time together with some additional information about -// / how the data was obtained and/or how to display it (the timezone). -// / -// / For example, the timestamp value 0 with the timezone string "Europe/Paris" -// / corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the -// / application may prefer to display it as "January 1st 1970, 01h00" in -// / the Europe/Paris timezone (which is the same physical point in time). -// / -// / One consequence is that timestamp values with a non-empty timezone -// / can be compared and ordered directly, since they all share the same -// / well-known point of reference (the Unix epoch). -// / -// / Timestamps with an unset / empty timezone -// / ----------------------------------------- -// / -// / If a Timestamp column has no timezone value, its epoch is -// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. -// / -// / Therefore, timestamp values without a timezone cannot be meaningfully -// / interpreted as physical points in time, but only as calendar / clock -// / indications ("wall clock time") in an unspecified timezone. -// / -// / For example, the timestamp value 0 with an empty timezone string -// / corresponds to "January 1st 1970, 00h00" in an unknown timezone: there -// / is not enough information to interpret it as a well-defined physical -// / point in time. -// / -// / One consequence is that timestamp values without a timezone cannot -// / be reliably compared or ordered, since they may have different points of -// / reference. In particular, it is *not* possible to interpret an unset -// / or empty timezone as the same as "UTC". -// / -// / Conversion between timezones -// / ---------------------------- -// / -// / If a Timestamp column has a non-empty timezone, changing the timezone -// / to a different non-empty value is a metadata-only operation: -// / the timestamp values need not change as their point of reference remains -// / the same (the Unix epoch). -// / -// / However, if a Timestamp column has no timezone value, changing it to a -// / non-empty value requires to think about the desired semantics. -// / One possibility is to assume that the original timestamp values are -// / relative to the epoch of the timezone being set; timestamp values should -// / then adjusted to the Unix epoch (for example, changing the timezone from -// / empty to "Europe/Paris" would require converting the timestamp values -// / from "Europe/Paris" to "UTC", which seems counter-intuitive but is -// / nevertheless correct). -// / -// / Guidelines for encoding data from external libraries -// / ---------------------------------------------------- -// / -// / Date & time libraries often have multiple different data types for temporal -// / data. In order to ease interoperability between different implementations the -// / Arrow project has some recommendations for encoding these types into a Timestamp -// / column. -// / -// / An "instant" represents a physical point in time that has no relevant timezone -// / (for example, astronomical data). To encode an instant, use a Timestamp with -// / the timezone string set to "UTC", and make sure the Timestamp values -// / are relative to the UTC epoch (January 1st 1970, midnight). -// / -// / A "zoned date-time" represents a physical point in time annotated with an -// / informative timezone (for example, the timezone in which the data was -// / recorded). To encode a zoned date-time, use a Timestamp with the timezone -// / string set to the name of the timezone, and make sure the Timestamp values -// / are relative to the UTC epoch (January 1st 1970, midnight). -// / -// / (There is some ambiguity between an instant and a zoned date-time with the -// / UTC timezone. Both of these are stored the same in Arrow. Typically, -// / this distinction does not matter. If it does, then an application should -// / use custom metadata or an extension type to distinguish between the two cases.) -// / -// / An "offset date-time" represents a physical point in time combined with an -// / explicit offset from UTC. To encode an offset date-time, use a Timestamp -// / with the timezone string set to the numeric timezone offset string -// / (e.g. "+03:00"), and make sure the Timestamp values are relative to -// / the UTC epoch (January 1st 1970, midnight). -// / -// / A "naive date-time" (also called "local date-time" in some libraries) -// / represents a wall clock time combined with a calendar date, but with -// / no indication of how to map this information to a physical point in time. -// / Naive date-times must be handled with care because of this missing -// / information, and also because daylight saving time (DST) may make -// / some values ambiguous or nonexistent. A naive date-time may be -// / stored as a struct with Date and Time fields. However, it may also be -// / encoded into a Timestamp column with an empty timezone. The timestamp -// / values should be computed "as if" the timezone of the date-time values -// / was UTC; for example, the naive date-time "January 1st 1970, 00h00" would -// / be encoded as timestamp value 0. +/// Timestamp is a 64-bit signed integer representing an elapsed time since a +/// fixed epoch, stored in either of four units: seconds, milliseconds, +/// microseconds or nanoseconds, and is optionally annotated with a timezone. +/// +/// Timestamp values do not include any leap seconds (in other words, all +/// days are considered 86400 seconds long). +/// +/// Timestamps with a non-empty timezone +/// ------------------------------------ +/// +/// If a Timestamp column has a non-empty timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +/// (the Unix epoch), regardless of the Timestamp's own timezone. +/// +/// Therefore, timestamp values with a non-empty timezone correspond to +/// physical points in time together with some additional information about +/// how the data was obtained and/or how to display it (the timezone). +/// +/// For example, the timestamp value 0 with the timezone string "Europe/Paris" +/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +/// application may prefer to display it as "January 1st 1970, 01h00" in +/// the Europe/Paris timezone (which is the same physical point in time). +/// +/// One consequence is that timestamp values with a non-empty timezone +/// can be compared and ordered directly, since they all share the same +/// well-known point of reference (the Unix epoch). +/// +/// Timestamps with an unset / empty timezone +/// ----------------------------------------- +/// +/// If a Timestamp column has no timezone value, its epoch is +/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +/// +/// Therefore, timestamp values without a timezone cannot be meaningfully +/// interpreted as physical points in time, but only as calendar / clock +/// indications ("wall clock time") in an unspecified timezone. +/// +/// For example, the timestamp value 0 with an empty timezone string +/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +/// is not enough information to interpret it as a well-defined physical +/// point in time. +/// +/// One consequence is that timestamp values without a timezone cannot +/// be reliably compared or ordered, since they may have different points of +/// reference. In particular, it is *not* possible to interpret an unset +/// or empty timezone as the same as "UTC". +/// +/// Conversion between timezones +/// ---------------------------- +/// +/// If a Timestamp column has a non-empty timezone, changing the timezone +/// to a different non-empty value is a metadata-only operation: +/// the timestamp values need not change as their point of reference remains +/// the same (the Unix epoch). +/// +/// However, if a Timestamp column has no timezone value, changing it to a +/// non-empty value requires to think about the desired semantics. +/// One possibility is to assume that the original timestamp values are +/// relative to the epoch of the timezone being set; timestamp values should +/// then adjusted to the Unix epoch (for example, changing the timezone from +/// empty to "Europe/Paris" would require converting the timestamp values +/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is +/// nevertheless correct). +/// +/// Guidelines for encoding data from external libraries +/// ---------------------------------------------------- +/// +/// Date & time libraries often have multiple different data types for temporal +/// data. In order to ease interoperability between different implementations the +/// Arrow project has some recommendations for encoding these types into a Timestamp +/// column. +/// +/// An "instant" represents a physical point in time that has no relevant timezone +/// (for example, astronomical data). To encode an instant, use a Timestamp with +/// the timezone string set to "UTC", and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// A "zoned date-time" represents a physical point in time annotated with an +/// informative timezone (for example, the timezone in which the data was +/// recorded). To encode a zoned date-time, use a Timestamp with the timezone +/// string set to the name of the timezone, and make sure the Timestamp values +/// are relative to the UTC epoch (January 1st 1970, midnight). +/// +/// (There is some ambiguity between an instant and a zoned date-time with the +/// UTC timezone. Both of these are stored the same in Arrow. Typically, +/// this distinction does not matter. If it does, then an application should +/// use custom metadata or an extension type to distinguish between the two cases.) +/// +/// An "offset date-time" represents a physical point in time combined with an +/// explicit offset from UTC. To encode an offset date-time, use a Timestamp +/// with the timezone string set to the numeric timezone offset string +/// (e.g. "+03:00"), and make sure the Timestamp values are relative to +/// the UTC epoch (January 1st 1970, midnight). +/// +/// A "naive date-time" (also called "local date-time" in some libraries) +/// represents a wall clock time combined with a calendar date, but with +/// no indication of how to map this information to a physical point in time. +/// Naive date-times must be handled with care because of this missing +/// information, and also because daylight saving time (DST) may make +/// some values ambiguous or nonexistent. A naive date-time may be +/// stored as a struct with Date and Time fields. However, it may also be +/// encoded into a Timestamp column with an empty timezone. The timestamp +/// values should be computed "as if" the timezone of the date-time values +/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +/// be encoded as timestamp value 0. type Timestamp struct { _tab flatbuffers.Table } @@ -159,16 +159,16 @@ func (rcv *Timestamp) MutateUnit(n TimeUnit) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -// / The timezone is an optional string indicating the name of a timezone, -// / one of: -// / -// / * As used in the Olson timezone database (the "tz database" or -// / "tzdata"), such as "America/New_York". -// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -// / such as "+07:30". -// / -// / Whether a timezone string is present indicates different semantics about -// / the data (see above). +/// The timezone is an optional string indicating the name of a timezone, +/// one of: +/// +/// * As used in the Olson timezone database (the "tz database" or +/// "tzdata"), such as "America/New_York". +/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +/// such as "+07:30". +/// +/// Whether a timezone string is present indicates different semantics about +/// the data (see above). func (rcv *Timestamp) Timezone() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -177,16 +177,16 @@ func (rcv *Timestamp) Timezone() []byte { return nil } -// / The timezone is an optional string indicating the name of a timezone, -// / one of: -// / -// / * As used in the Olson timezone database (the "tz database" or -// / "tzdata"), such as "America/New_York". -// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -// / such as "+07:30". -// / -// / Whether a timezone string is present indicates different semantics about -// / the data (see above). +/// The timezone is an optional string indicating the name of a timezone, +/// one of: +/// +/// * As used in the Olson timezone database (the "tz database" or +/// "tzdata"), such as "America/New_York". +/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +/// such as "+07:30". +/// +/// Whether a timezone string is present indicates different semantics about +/// the data (see above). func TimestampStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Type.go b/go/arrow/internal/flatbuf/Type.go index df8ba8650e1..ab2bce9c636 100644 --- a/go/arrow/internal/flatbuf/Type.go +++ b/go/arrow/internal/flatbuf/Type.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -// / ---------------------------------------------------------------------- -// / Top-level Type value, enabling extensible type-specific metadata. We can -// / add new logical types to Type without breaking backwards compatibility +/// ---------------------------------------------------------------------- +/// Top-level Type value, enabling extensible type-specific metadata. We can +/// add new logical types to Type without breaking backwards compatibility type Type byte const ( diff --git a/go/arrow/internal/flatbuf/Union.go b/go/arrow/internal/flatbuf/Union.go index 0367fb3c1fb..e34121d4757 100644 --- a/go/arrow/internal/flatbuf/Union.go +++ b/go/arrow/internal/flatbuf/Union.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / A union is a complex type with children in Field -// / By default ids in the type vector refer to the offsets in the children -// / optionally typeIds provides an indirection between the child offset and the type id -// / for each child `typeIds[offset]` is the id used in the type vector +/// A union is a complex type with children in Field +/// By default ids in the type vector refer to the offsets in the children +/// optionally typeIds provides an indirection between the child offset and the type id +/// for each child `typeIds[offset]` is the id used in the type vector type Union struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8.go b/go/arrow/internal/flatbuf/Utf8.go index cab4ce7743c..4ff365a3750 100644 --- a/go/arrow/internal/flatbuf/Utf8.go +++ b/go/arrow/internal/flatbuf/Utf8.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Unicode with UTF-8 encoding +/// Unicode with UTF-8 encoding type Utf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8View.go b/go/arrow/internal/flatbuf/Utf8View.go index f294126a618..9cf82149019 100644 --- a/go/arrow/internal/flatbuf/Utf8View.go +++ b/go/arrow/internal/flatbuf/Utf8View.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -// / Logically the same as Utf8, but the internal representation uses a view -// / struct that contains the string length and either the string's entire data -// / inline (for small strings) or an inlined prefix, an index of another buffer, -// / and an offset pointing to a slice in that buffer (for non-small strings). -// / -// / Since it uses a variable number of data buffers, each Field with this type -// / must have a corresponding entry in `variadicBufferCounts`. +/// Logically the same as Utf8, but the internal representation uses a view +/// struct that contains the string length and either the string's entire data +/// inline (for small strings) or an inlined prefix, an index of another buffer, +/// and an offset pointing to a slice in that buffer (for non-small strings). +/// +/// Since it uses a variable number of data buffers, each Field with this type +/// must have a corresponding entry in `variadicBufferCounts`. type Utf8View struct { _tab flatbuffers.Table } diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index db4208dc8d3..0251b08c09b 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -18,40 +18,40 @@ // // Examples: // -// $> arrow-cat ./testdata/primitives.data -// version: V4 -// record 1/3... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2/3... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> arrow-cat ./testdata/primitives.data +// version: V4 +// record 1/3... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2/3... +// col[0] "bools": [true (null) (null) false true] +// [...] // -// $> gen-arrow-stream | arrow-cat -// record 1... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> gen-arrow-stream | arrow-cat +// record 1... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2... +// col[0] "bools": [true (null) (null) false true] +// [...] package main import ( diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 49865be96cd..4230ae24499 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -18,38 +18,38 @@ // // Examples: // -// $> arrow-ls ./testdata/primitives.data -// version: V4 -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> arrow-ls ./testdata/primitives.data +// version: V4 +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 // -// $> gen-arrow-stream | arrow-ls -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> gen-arrow-stream | arrow-ls +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 package main import ( diff --git a/go/arrow/math/math_amd64.go b/go/arrow/math/math_amd64.go index 2397eef718d..44301dc2415 100644 --- a/go/arrow/math/math_amd64.go +++ b/go/arrow/math/math_amd64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go index b150eb061f9..014664b0463 100644 --- a/go/arrow/math/math_arm64.go +++ b/go/arrow/math/math_arm64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package math @@ -26,8 +25,8 @@ import ( func init() { if cpu.ARM64.HasASIMD { initNEON() - } else { - initGo() + } else { + initGo() } } diff --git a/go/arrow/math/math_noasm.go b/go/arrow/math/math_noasm.go index 5527ebf8018..0fa924d90aa 100644 --- a/go/arrow/math/math_noasm.go +++ b/go/arrow/math/math_noasm.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build noasm // +build noasm package math diff --git a/go/arrow/math/math_ppc64le.go b/go/arrow/math/math_ppc64le.go index 85c8f2fe2e7..3daeac7efaf 100644 --- a/go/arrow/math/math_ppc64le.go +++ b/go/arrow/math/math_ppc64le.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_s390x.go b/go/arrow/math/math_s390x.go index 85c8f2fe2e7..3daeac7efaf 100644 --- a/go/arrow/math/math_s390x.go +++ b/go/arrow/math/math_s390x.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package math diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index a6a2f417989..af25d1899a6 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build cgo && ccalloc -// +build cgo,ccalloc +// +build cgo +// +build ccalloc package memory diff --git a/go/arrow/memory/cgo_allocator_defaults.go b/go/arrow/memory/cgo_allocator_defaults.go index 0a2e9a342d3..501431a0e1e 100644 --- a/go/arrow/memory/cgo_allocator_defaults.go +++ b/go/arrow/memory/cgo_allocator_defaults.go @@ -14,8 +14,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build cgo && ccalloc && !cclog -// +build cgo,ccalloc,!cclog +// +build cgo +// +build ccalloc +// +build !cclog package memory diff --git a/go/arrow/memory/cgo_allocator_logging.go b/go/arrow/memory/cgo_allocator_logging.go index fe2e3a940ce..01ad6b39480 100644 --- a/go/arrow/memory/cgo_allocator_logging.go +++ b/go/arrow/memory/cgo_allocator_logging.go @@ -14,8 +14,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build cgo && ccalloc && cclog -// +build cgo,ccalloc,cclog +// +build cgo +// +build ccalloc +// +build cclog package memory diff --git a/go/arrow/memory/memory_amd64.go b/go/arrow/memory/memory_amd64.go index 895ddc07cf8..58356d64825 100644 --- a/go/arrow/memory/memory_amd64.go +++ b/go/arrow/memory/memory_amd64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_arm64.go b/go/arrow/memory/memory_arm64.go index 52603349585..3db5d110131 100755 --- a/go/arrow/memory/memory_arm64.go +++ b/go/arrow/memory/memory_arm64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_avx2_amd64.go b/go/arrow/memory/memory_avx2_amd64.go index 39fb3a5f769..2bd851ea532 100644 --- a/go/arrow/memory/memory_avx2_amd64.go +++ b/go/arrow/memory/memory_avx2_amd64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_js_wasm.go b/go/arrow/memory/memory_js_wasm.go index 5cc0c84d39e..9b94d99ff33 100644 --- a/go/arrow/memory/memory_js_wasm.go +++ b/go/arrow/memory/memory_js_wasm.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build wasm // +build wasm package memory diff --git a/go/arrow/memory/memory_neon_arm64.go b/go/arrow/memory/memory_neon_arm64.go index 806ca575f22..6cb0400c9c5 100755 --- a/go/arrow/memory/memory_neon_arm64.go +++ b/go/arrow/memory/memory_neon_arm64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_noasm.go b/go/arrow/memory/memory_noasm.go index 44f19c091c7..bf8846fa2e0 100644 --- a/go/arrow/memory/memory_noasm.go +++ b/go/arrow/memory/memory_noasm.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build noasm // +build noasm package memory diff --git a/go/arrow/memory/memory_sse4_amd64.go b/go/arrow/memory/memory_sse4_amd64.go index 1711a1ee3ea..716c0d2704a 100644 --- a/go/arrow/memory/memory_sse4_amd64.go +++ b/go/arrow/memory/memory_sse4_amd64.go @@ -14,7 +14,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -//go:build !noasm // +build !noasm package memory From 03a12f9cf50dbb92184afd815a73454b138ce713 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 15 Dec 2023 08:55:29 -0500 Subject: [PATCH 23/28] randomize offsets --- dev/archery/archery/integration/datagen.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 488ad6f60c9..2bbc843836a 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -949,17 +949,15 @@ def _get_children(self): def generate_column(self, size, name=None): MAX_LIST_SIZE = 4 + VALUES_SIZE = size * MAX_LIST_SIZE is_valid = self._make_is_valid(size) - offsets = [] + + MAX_OFFSET = VALUES_SIZE - MAX_LIST_SIZE + offsets = np.random.randint(0, MAX_OFFSET + 1, size=size) sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size) - offset = 0 - for s in sizes: - offsets.append(offset) - offset += int(s) - # The offset now is the total number of elements in the child array - values = self.value_field.generate_column(offset) + values = self.value_field.generate_column(VALUES_SIZE) if name is None: name = self.name From b9237669d321e38bdeeffb1e675be5399a9575e8 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 15 Dec 2023 09:12:39 -0500 Subject: [PATCH 24/28] update Integration.rst --- docs/source/format/Integration.rst | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index e1160b287e7..1a9b1b97f07 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -223,7 +223,7 @@ considered equivalent to ``[]`` (no metadata). Duplicated keys are not forbidden **Type**: :: { - "name" : "null|struct|list|largelist|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map" + "name" : "null|struct|list|largelist|listview|largelistview|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|utf8view|binaryview|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map|runendencoded" } A ``Type`` will have other fields as defined in @@ -446,12 +446,22 @@ or ``DATA``. ``BufferData`` is encoded based on the type of buffer: -* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for non-nullable +* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for non-nullable ``Field`` still has a ``VALIDITY`` array, even though all values are 1. * ``OFFSET``: a JSON array of integers for 32-bit offsets or - string-formatted integers for 64-bit offsets -* ``TYPE_ID``: a JSON array of integers -* ``DATA``: a JSON array of encoded values + string-formatted integers for 64-bit offsets. +* ``TYPE_ID``: a JSON array of integers. +* ``DATA``: a JSON array of encoded values. +* ``VARIADIC_DATA_BUFFERS``: a JSON array of data buffers represented as + hex encoded strings. +* ``VIEWS``: a JSON array of encoded views, which are JSON objects with: + * ``SIZE``: an integer indicating the size of the view, + * ``INLINED``: an encoded value (this field will be present if ``SIZE`` + is smaller than 12, otherwise the next three fields will be present), + * ``PREFIX_HEX``: the first four bytes of the view encoded as hex, + * ``BUFFER_INDEX``: the index in ``VARIADIC_DATA_BUFFERS`` of the buffer + viewed, + * ``OFFSET``: the offset in the buffer viewed. The value encoding for ``DATA`` is different depending on the logical type: @@ -527,6 +537,9 @@ in ``datagen.py``): - Signed indices - Unsigned indices - Nested dictionaries +* Run end encoded +* Binary view and string view +* List view and large list view * Extension Types From e10341a568ccee4b63259cbf335a358d4b4ae623 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 15 Dec 2023 09:26:00 -0500 Subject: [PATCH 25/28] review comments --- go/arrow/cdata/cdata.go | 9 +-------- go/arrow/internal/utils.go | 4 +--- go/arrow/ipc/file_reader.go | 30 +++++++++++------------------- go/arrow/memory/util.go | 8 ++++++++ go/arrow/type_traits.go | 32 ++++++++++++++++++++++---------- 5 files changed, 43 insertions(+), 40 deletions(-) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 66494ac55cd..64cc8456e81 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -686,14 +686,7 @@ func (imp *cimporter) importBinaryViewLike() (err error) { } buffers := make([]*memory.Buffer, len(imp.cbuffers)-1) - // XXX couldn't figure out how to extract file_reader.go::releaseBuffers as a utility - defer func() { - for _, buf := range buffers { - if buf != nil { - buf.Release() - } - } - }() + defer memory.ReleaseBuffers(buffers) if buffers[0], err = imp.importNullBitmap(0); err != nil { return diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go index 934bf628cb4..7b5df167ea4 100644 --- a/go/arrow/internal/utils.go +++ b/go/arrow/internal/utils.go @@ -51,9 +51,7 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool { // StringView and BinaryView. func HasBufferSizesBuffer(id arrow.Type) bool { switch id { - case arrow.STRING_VIEW: - return true - case arrow.BINARY_VIEW: + case arrow.STRING_VIEW, arrow.BINARY_VIEW: return true default: return false diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index dd51a761510..7bc7f6ebfaa 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -521,7 +521,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData { case *arrow.RunEndEncodedType: field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) runEnds := ctx.loadChild(dt.RunEnds()) defer runEnds.Release() @@ -583,7 +583,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData buffers = append(buffers, ctx.buffer()) } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -591,7 +591,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 3) buffers = append(buffers, ctx.buffer(), ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -603,7 +603,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData for i := 0; i < int(nVariadicBufs); i++ { buffers = append(buffers, ctx.buffer()) } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -611,7 +611,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0) } @@ -619,7 +619,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -630,7 +630,7 @@ func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData { func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 2) buffers = append(buffers, ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -641,7 +641,7 @@ func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData { func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 3) buffers = append(buffers, ctx.buffer(), ctx.buffer()) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -651,7 +651,7 @@ func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.A func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) sub := ctx.loadChild(dt.Elem()) defer sub.Release() @@ -661,7 +661,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) ar func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) arrow.ArrayData { field, buffers := ctx.loadCommon(dt.ID(), 1) - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) subs := make([]arrow.ArrayData, dt.NumFields()) for i, f := range dt.Fields() { @@ -704,7 +704,7 @@ func (ctx *arrayLoaderContext) loadUnion(dt arrow.UnionType) arrow.ArrayData { } } - defer releaseBuffers(buffers) + defer memory.ReleaseBuffers(buffers) subs := make([]arrow.ArrayData, dt.NumFields()) for i, f := range dt.Fields() { subs[i] = ctx.loadChild(f.Type) @@ -768,11 +768,3 @@ func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker } return dictutils.KindReplacement, nil } - -func releaseBuffers(buffers []*memory.Buffer) { - for _, b := range buffers { - if b != nil { - b.Release() - } - } -} diff --git a/go/arrow/memory/util.go b/go/arrow/memory/util.go index 3b0d3a5cb9e..6cc7ec91b96 100644 --- a/go/arrow/memory/util.go +++ b/go/arrow/memory/util.go @@ -35,3 +35,11 @@ func isMultipleOfPowerOf2(v int, d int) bool { func addressOf(b []byte) uintptr { return uintptr(unsafe.Pointer(&b[0])) } + +func ReleaseBuffers(buffers []*Buffer) { + for _, b := range buffers { + if b != nil { + b.Release() + } + } +} diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 73485f40ca1..6a9e080be66 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -78,35 +78,47 @@ type TemporalType interface { MonthInterval | MonthDayNanoInterval } -func sliceAs[Out, T interface{}](b []T) []Out { - len_bytes := len(b) * int(unsafe.Sizeof(b[0])) - cap_bytes := cap(b) * int(unsafe.Sizeof(b[0])) +func reinterpretSlice[Out, T any](b []T) []Out { + lenBytes := len(b) * int(unsafe.Sizeof(b[0])) + capBytes := cap(b) * int(unsafe.Sizeof(b[0])) var z Out - len_out := len_bytes / int(unsafe.Sizeof(z)) - cap_out := cap_bytes / int(unsafe.Sizeof(z)) + lenOut := lenBytes / int(unsafe.Sizeof(z)) + capOut := capBytes / int(unsafe.Sizeof(z)) h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), cap_out)[:len_out] + return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), capOut)[:lenOut] } +// GetValues reinterprets the data.Buffers()[i] to a slice of T with len=data.Len(). +// +// If the buffer is nil, nil will be returned. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). func GetValues[T FixedWidthType](data ArrayData, i int) []T { if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { return nil } - return sliceAs[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()] + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()] } +// GetOffsets reinterprets the data.Buffers()[i] to a slice of T with len=data.Len()+1. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). func GetOffsets[T int32 | int64](data ArrayData, i int) []T { - return sliceAs[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()+1] + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()+1] } +// GetBytes reinterprets a slice of T to a slice of bytes. func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte { - return sliceAs[byte](in) + return reinterpretSlice[byte](in) } +// GetData reinterprets a slice of bytes to a slice of T. +// +// NOTE: the buffer's length must be a multiple of Sizeof(T). func GetData[T FixedWidthType | ViewHeader](in []byte) []T { - return sliceAs[T](in) + return reinterpretSlice[T](in) } var typMap = map[reflect.Type]DataType{ From cb6a4efdd74c4b496572f2223be39f286bdcb5d1 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 15 Dec 2023 10:10:52 -0500 Subject: [PATCH 26/28] avoid use of reflect.SliceHeader --- go/arrow/type_traits.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go index 6a9e080be66..67fa8a266b3 100644 --- a/go/arrow/type_traits.go +++ b/go/arrow/type_traits.go @@ -79,15 +79,18 @@ type TemporalType interface { } func reinterpretSlice[Out, T any](b []T) []Out { - lenBytes := len(b) * int(unsafe.Sizeof(b[0])) - capBytes := cap(b) * int(unsafe.Sizeof(b[0])) + if cap(b) == 0 { + return nil + } + out := (*Out)(unsafe.Pointer(&b[:1][0])) + + lenBytes := len(b) * int(unsafe.Sizeof(b[0])) + capBytes := cap(b) * int(unsafe.Sizeof(b[0])) - var z Out - lenOut := lenBytes / int(unsafe.Sizeof(z)) - capOut := capBytes / int(unsafe.Sizeof(z)) + lenOut := lenBytes / int(unsafe.Sizeof(*out)) + capOut := capBytes / int(unsafe.Sizeof(*out)) - h := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - return unsafe.Slice((*Out)(unsafe.Pointer(h.Data)), capOut)[:lenOut] + return unsafe.Slice(out, capOut)[:lenOut] } // GetValues reinterprets the data.Buffers()[i] to a slice of T with len=data.Len(). @@ -99,14 +102,14 @@ func GetValues[T FixedWidthType](data ArrayData, i int) []T { if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 { return nil } - return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()] + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()] } // GetOffsets reinterprets the data.Buffers()[i] to a slice of T with len=data.Len()+1. // // NOTE: the buffer's length must be a multiple of Sizeof(T). func GetOffsets[T int32 | int64](data ArrayData, i int) []T { - return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset():data.Offset()+data.Len()+1] + return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()+1] } // GetBytes reinterprets a slice of T to a slice of bytes. From 297f456c6a2c20afa176f6035c95a070438e0c58 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 15 Dec 2023 13:26:11 -0500 Subject: [PATCH 27/28] fix listview slicing optimization --- go/arrow/array/encoded.go | 6 +- go/arrow/ipc/writer.go | 106 ++++++++++++++++-- go/internal/bitutils/bit_set_run_reader.go | 6 +- go/internal/utils/math.go | 22 +--- go/parquet/file/column_reader.go | 2 +- go/parquet/file/column_reader_test.go | 2 +- go/parquet/file/level_conversion.go | 2 +- .../internal/encoding/boolean_decoder.go | 6 +- .../internal/encoding/byte_array_decoder.go | 2 +- go/parquet/internal/encoding/decoder.go | 2 +- .../internal/encoding/delta_bit_packing.go | 4 +- .../internal/encoding/delta_byte_array.go | 2 +- .../encoding/delta_length_byte_array.go | 2 +- .../encoding/fixed_len_byte_array_decoder.go | 2 +- .../encoding/plain_encoder_types.gen.go | 10 +- .../encoding/plain_encoder_types.gen.go.tmpl | 2 +- .../internal/encoding/typed_encoder.gen.go | 28 ++--- .../encoding/typed_encoder.gen.go.tmpl | 4 +- go/parquet/internal/encoding/types.go | 4 +- go/parquet/internal/testutils/pagebuilder.go | 8 +- go/parquet/internal/utils/bit_reader.go | 4 +- go/parquet/internal/utils/rle.go | 6 +- .../internal/utils/typed_rle_dict.gen.go | 56 ++++----- .../internal/utils/typed_rle_dict.gen.go.tmpl | 8 +- go/parquet/pqarrow/column_readers.go | 6 +- 25 files changed, 187 insertions(+), 115 deletions(-) diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go index fa5fa7addf3..8ca1416b92a 100644 --- a/go/arrow/array/encoded.go +++ b/go/arrow/array/encoded.go @@ -150,19 +150,19 @@ func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array { case *Int16: for _, v := range e.Int16Values()[physOffset : physOffset+physLength] { v -= int16(r.data.offset) - v = int16(utils.MinInt(int(v), r.data.length)) + v = int16(utils.Min(int(v), r.data.length)) bldr.(*Int16Builder).Append(v) } case *Int32: for _, v := range e.Int32Values()[physOffset : physOffset+physLength] { v -= int32(r.data.offset) - v = int32(utils.MinInt(int(v), r.data.length)) + v = int32(utils.Min(int(v), r.data.length)) bldr.(*Int32Builder).Append(v) } case *Int64: for _, v := range e.Int64Values()[physOffset : physOffset+physLength] { v -= int64(r.data.offset) - v = int64(utils.MinInt(int(v), r.data.length)) + v = int64(utils.Min(int(v), r.data.length)) bldr.(*Int64Builder).Append(v) } } diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go index 98d06ce7dbf..31ce53a0f1a 100644 --- a/go/arrow/ipc/writer.go +++ b/go/arrow/ipc/writer.go @@ -34,6 +34,7 @@ import ( "github.com/apache/arrow/go/v15/arrow/internal/dictutils" "github.com/apache/arrow/go/v15/arrow/internal/flatbuf" "github.com/apache/arrow/go/v15/arrow/memory" + "github.com/apache/arrow/go/v15/internal/utils" ) type swriter struct { @@ -746,16 +747,11 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { w.depth++ case *arrow.ListViewType, *arrow.LargeListViewType: - data := arr.Data() arr := arr.(array.VarLenListLike) - voffsets := arr.Data().Buffers()[1] - if voffsets != nil { - voffsets.Retain() - } - vsizes := data.Buffers()[2] - if vsizes != nil { - vsizes.Retain() - } + + voffsets, minOffset, maxEnd := w.getZeroBasedListViewOffsets(arr) + vsizes := w.getListViewSizes(arr) + p.body = append(p.body, voffsets) p.body = append(p.body, vsizes) @@ -764,6 +760,10 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error { values = arr.ListValues() ) + if minOffset != 0 || maxEnd < int64(values.Len()) { + values = array.NewSlice(values, minOffset, maxEnd) + defer values.Release() + } err := w.visit(p, values) if err != nil { @@ -863,6 +863,94 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer return voffsets } +func getZeroBasedListViewOffsets[OffsetT int32 | int64](mem memory.Allocator, arr array.VarLenListLike) (valueOffsets *memory.Buffer, minOffset, maxEnd OffsetT) { + requiredBytes := int(unsafe.Sizeof(minOffset)) * arr.Len() + if arr.Data().Offset() == 0 { + // slice offsets to used extent, in case we have truncated slice + minOffset, maxEnd = 0, OffsetT(arr.ListValues().Len()) + valueOffsets = arr.Data().Buffers()[1] + if valueOffsets.Len() > requiredBytes { + valueOffsets = memory.SliceBuffer(valueOffsets, 0, requiredBytes) + } else { + valueOffsets.Retain() + } + return + } + + // non-zero offset, it's likely that the smallest offset is not zero + // we must a) create a new offsets array with shifted offsets and + // b) slice the values array accordingly + + valueOffsets = memory.NewResizableBuffer(mem) + valueOffsets.Resize(requiredBytes) + if arr.Len() > 0 { + // max value of int32/int64 based on type + minOffset = (^OffsetT(0)) << ((8 * unsafe.Sizeof(minOffset)) - 1) + for i := 0; i < arr.Len(); i++ { + start, end := arr.ValueOffsets(i) + minOffset = utils.Min(minOffset, OffsetT(start)) + maxEnd = utils.Max(maxEnd, OffsetT(end)) + } + } + + offsets := arrow.GetData[OffsetT](arr.Data().Buffers()[1].Bytes())[arr.Data().Offset():] + destOffset := arrow.GetData[OffsetT](valueOffsets.Bytes()) + for i := 0; i < arr.Len(); i++ { + destOffset[i] = offsets[i] - minOffset + } + return +} + +func getListViewSizes[OffsetT int32 | int64](arr array.VarLenListLike) *memory.Buffer { + var z OffsetT + requiredBytes := int(unsafe.Sizeof(z)) * arr.Len() + sizes := arr.Data().Buffers()[2] + + if arr.Data().Offset() != 0 || sizes.Len() > requiredBytes { + // slice offsets to used extent, in case we have truncated slice + offsetBytes := arr.Data().Offset() * int(unsafe.Sizeof(z)) + sizes = memory.SliceBuffer(sizes, offsetBytes, requiredBytes) + } else { + sizes.Retain() + } + return sizes +} + +func (w *recordEncoder) getZeroBasedListViewOffsets(arr array.VarLenListLike) (*memory.Buffer, int64, int64) { + if arr.Len() == 0 { + return nil, 0, 0 + } + + var ( + outOffsets *memory.Buffer + minOff, maxEnd int64 + ) + + switch v := arr.(type) { + case *array.ListView: + voffsets, outOff, outEnd := getZeroBasedListViewOffsets[int32](w.mem, v) + outOffsets = voffsets + minOff, maxEnd = int64(outOff), int64(outEnd) + case *array.LargeListView: + outOffsets, minOff, maxEnd = getZeroBasedListViewOffsets[int64](w.mem, v) + } + return outOffsets, minOff, maxEnd +} + +func (w *recordEncoder) getListViewSizes(arr array.VarLenListLike) *memory.Buffer { + if arr.Len() == 0 { + return nil + } + + switch v := arr.(type) { + case *array.ListView: + return getListViewSizes[int32](v) + case *array.LargeListView: + return getListViewSizes[int64](v) + } + return nil +} + func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer { // this case sucks. Because the offsets are different for each // child array, when we have a sliced array, we need to re-base diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go index 6764ca79126..374b8d4aab3 100644 --- a/go/internal/bitutils/bit_set_run_reader.go +++ b/go/internal/bitutils/bit_set_run_reader.go @@ -113,7 +113,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { bitOffset := int8(startOffset % 8) if length > 0 && bitOffset != 0 { - br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset))) + br.curNumBits = int32(utils.Min(int(length), int(8-bitOffset))) br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits)) } return @@ -124,7 +124,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { endBitOffset := int8((startOffset + length) % 8) if length > 0 && endBitOffset != 0 { br.pos++ - br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset))) + br.curNumBits = int32(utils.Min(int(length), int(endBitOffset))) br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits)) } } @@ -219,7 +219,7 @@ func (br *baseSetBitRunReader) skipNextZeros() { if br.remaining > 0 { br.curWord = br.loadPartial(0, br.remaining) br.curNumBits = int32(br.remaining) - nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) + nzeros := int32(utils.Min(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) br.curWord = br.consumeBits(br.curWord, nzeros) br.curNumBits -= nzeros br.remaining -= int64(nzeros) diff --git a/go/internal/utils/math.go b/go/internal/utils/math.go index 62cf96ce431..c8311750e3a 100644 --- a/go/internal/utils/math.go +++ b/go/internal/utils/math.go @@ -16,32 +16,16 @@ package utils -// Min is a convenience Min function for int64 -func Min(a, b int64) int64 { - if a < b { - return a - } - return b -} +import "golang.org/x/exp/constraints" -// MinInt is a convenience Min function for int -func MinInt(a, b int) int { +func Min[T constraints.Ordered](a, b T) T { if a < b { return a } return b } -// Max is a convenience Max function for int64 -func Max(a, b int64) int64 { - if a > b { - return a - } - return b -} - -// MaxInt is a convenience Max function for int -func MaxInt(a, b int) int { +func Max[T constraints.Ordered](a, b T) T { if a > b { return a } diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go index 766638d88f2..342fb3b198a 100644 --- a/go/parquet/file/column_reader.go +++ b/go/parquet/file/column_reader.go @@ -517,7 +517,7 @@ func (c *columnChunkReader) readBatch(batchSize int64, defLvls, repLvls []int16, // if this is a required field, ndefs will be 0 since there is no definition // levels stored with it and `read` will be the number of values, otherwise // we use ndefs since it will be equal to or greater than read. - totalVals := int64(utils.MaxInt(ndefs, read)) + totalVals := int64(utils.Max(ndefs, read)) c.consumeBufferedValues(totalVals) totalLvls += totalVals diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go index 21ea52e2b7b..a6725bc02fe 100755 --- a/go/parquet/file/column_reader_test.go +++ b/go/parquet/file/column_reader_test.go @@ -244,7 +244,7 @@ func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) { totalRead += batch batchActual += int(read) - batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096))) + batchSize = int32(utils.Min(1<<24, utils.Max(int(batchSize*2), 4096))) if batch <= 0 { break } diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go index f6707fce86d..251468658ae 100755 --- a/go/parquet/file/level_conversion.go +++ b/go/parquet/file/level_conversion.go @@ -144,7 +144,7 @@ func defLevelsBatchToBitmap(defLevels []int16, remainingUpperBound int64, info L var batch []int16 for len(defLevels) > 0 { - batchSize := shared_utils.MinInt(maxbatch, len(defLevels)) + batchSize := shared_utils.Min(maxbatch, len(defLevels)) batch, defLevels = defLevels[:batchSize], defLevels[batchSize:] definedBitmap := bmi.GreaterThanBitmap(batch, info.DefLevel-1) diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go index 3782dc85ea8..353f4438559 100644 --- a/go/parquet/internal/encoding/boolean_decoder.go +++ b/go/parquet/internal/encoding/boolean_decoder.go @@ -55,7 +55,7 @@ func (dec *PlainBooleanDecoder) SetData(nvals int, data []byte) error { // // Returns the number of values decoded func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) { - max := shared_utils.MinInt(len(out), dec.nvals) + max := shared_utils.Min(len(out), dec.nvals) // attempts to read all remaining bool values from the current data byte unalignedExtract := func(i int) int { @@ -148,7 +148,7 @@ func (dec *RleBooleanDecoder) SetData(nvals int, data []byte) error { } func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) { - max := shared_utils.MinInt(len(out), dec.nvals) + max := shared_utils.Min(len(out), dec.nvals) var ( buf [1024]uint64 @@ -156,7 +156,7 @@ func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) { ) for n > 0 { - batch := shared_utils.MinInt(len(buf), n) + batch := shared_utils.Min(len(buf), n) decoded := dec.rleDec.GetBatch(buf[:batch]) if decoded != batch { return max - n, io.ErrUnexpectedEOF diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go index 82ce9f84265..0c1c858fb48 100644 --- a/go/parquet/internal/encoding/byte_array_decoder.go +++ b/go/parquet/internal/encoding/byte_array_decoder.go @@ -49,7 +49,7 @@ func (PlainByteArrayDecoder) Type() parquet.Type { // // Returns the number of values that were decoded. func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), pbad.nvals) + max := utils.Min(len(out), pbad.nvals) for i := 0; i < max; i++ { // there should always be at least four bytes which is the length of the diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go index cee624730e9..acb57fbce78 100644 --- a/go/parquet/internal/encoding/decoder.go +++ b/go/parquet/internal/encoding/decoder.go @@ -155,7 +155,7 @@ func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []b } func (d *dictDecoder) DecodeIndices(numValues int, bldr array.Builder) (int, error) { - n := shared_utils.MinInt(numValues, d.nvals) + n := shared_utils.Min(numValues, d.nvals) if cap(d.idxScratchSpace) < n { d.idxScratchSpace = make([]uint64, n, bitutil.NextPowerOf2(n)) } else { diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go index a00f3457cac..560b77f4c66 100644 --- a/go/parquet/internal/encoding/delta_bit_packing.go +++ b/go/parquet/internal/encoding/delta_bit_packing.go @@ -158,7 +158,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) { - max := shared_utils.MinInt(len(out), int(d.totalValues)) + max := shared_utils.Min(len(out), int(d.totalValues)) if max == 0 { return 0, nil } @@ -249,7 +249,7 @@ func (d *DeltaBitPackInt64Decoder) unpackNextMini() error { // Decode retrieves min(remaining values, len(out)) values from the data and returns the number // of values actually decoded and any errors encountered. func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) { - max := shared_utils.MinInt(len(out), d.nvals) + max := shared_utils.Min(len(out), d.nvals) if max == 0 { return 0, nil } diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go index 57b0c8a70e5..5e5002e34a6 100644 --- a/go/parquet/internal/encoding/delta_byte_array.go +++ b/go/parquet/internal/encoding/delta_byte_array.go @@ -172,7 +172,7 @@ func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error { // Decode decodes byte arrays into the slice provided and returns the number of values actually decoded func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), d.nvals) + max := utils.Min(len(out), d.nvals) if max == 0 { return 0, nil } diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go index d5a99c187d1..183eb453ca0 100644 --- a/go/parquet/internal/encoding/delta_length_byte_array.go +++ b/go/parquet/internal/encoding/delta_length_byte_array.go @@ -126,7 +126,7 @@ func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error { // Decode populates the passed in slice with data decoded until it hits the length of out // or runs out of values in the column to decode, then returns the number of values actually decoded. func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - max := utils.MinInt(len(out), d.nvals) + max := utils.Min(len(out), d.nvals) for i := 0; i < max; i++ { out[i] = d.data[:d.lengths[i]:d.lengths[i]] d.data = d.data[d.lengths[i]:] diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go index 1e589fc2e7b..2054e1bb85f 100644 --- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go +++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go @@ -38,7 +38,7 @@ func (PlainFixedLenByteArrayDecoder) Type() parquet.Type { // values to decode or the length of out has been filled. Then returns the total number of values // that were decoded. func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { - max := utils.MinInt(len(out), pflba.nvals) + max := utils.Min(len(out), pflba.nvals) numBytesNeeded := max * pflba.typeLen if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 { return 0, xerrors.New("parquet: eof exception") diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go index 09403d74cb0..a41f754f62a 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go @@ -172,7 +172,7 @@ func (PlainInt32Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Int32SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -277,7 +277,7 @@ func (PlainInt64Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Int64SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -382,7 +382,7 @@ func (PlainInt96Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(parquet.Int96SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -487,7 +487,7 @@ func (PlainFloat32Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Float32SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) @@ -592,7 +592,7 @@ func (PlainFloat64Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64(arrow.Float64SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl index 2838c63a418..74f63e78bcc 100644 --- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl +++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl @@ -133,7 +133,7 @@ func (Plain{{.Name}}Decoder) Type() parquet.Type { // decoding the min(len(out), remaining values). // It returns the number of values actually decoded and any error encountered. func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { - max := utils.MinInt(len(out), dec.nvals) + max := utils.Min(len(out), dec.nvals) nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes) if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 { return 0, fmt.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data)) diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go index 4bc18e8c63c..04db72178f3 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go +++ b/go/parquet/internal/encoding/typed_encoder.gen.go @@ -195,7 +195,7 @@ func (DictInt32Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt32Decoder) Decode(out []int32) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -209,7 +209,7 @@ func (d *DictInt32Decoder) Decode(out []int32) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -432,7 +432,7 @@ func (DictInt64Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt64Decoder) Decode(out []int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -446,7 +446,7 @@ func (d *DictInt64Decoder) Decode(out []int64) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -647,7 +647,7 @@ func (DictInt96Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -661,7 +661,7 @@ func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -872,7 +872,7 @@ func (DictFloat32Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFloat32Decoder) Decode(out []float32) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -886,7 +886,7 @@ func (d *DictFloat32Decoder) Decode(out []float32) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1097,7 +1097,7 @@ func (DictFloat64Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFloat64Decoder) Decode(out []float64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1111,7 +1111,7 @@ func (d *DictFloat64Decoder) Decode(out []float64) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1365,7 +1365,7 @@ func (DictByteArrayDecoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1379,7 +1379,7 @@ func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err @@ -1544,7 +1544,7 @@ func (DictFixedLenByteArrayDecoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -1558,7 +1558,7 @@ func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) ( // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl index d72f3151204..ceb755caa0b 100644 --- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl +++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl @@ -271,7 +271,7 @@ func (Dict{{.Name}}Decoder) Type() parquet.Type { // decoding using the dictionary to get the actual values. Returns the number of values // actually decoded and any error encountered. func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decode(out[:vals]) if err != nil { return decoded, err @@ -285,7 +285,7 @@ func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) { // Decode spaced is like Decode but will space out the data leaving slots for null values // based on the provided bitmap. func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) { - vals := shared_utils.MinInt(len(out), d.nvals) + vals := shared_utils.Min(len(out), d.nvals) decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset) if err != nil { return decoded, err diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 4ab3ab1a1c9..f8d860c88a0 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { b.buf = bufferPool.Get().(*memory.Buffer) } - newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256) + newCap := utils.Max(b.buf.Cap()+b.offset, 256) for newCap < b.pos+nbytes { newCap = bitutil.NextPowerOf2(newCap) } @@ -375,7 +375,7 @@ func (b *BufferWriter) Reserve(nbytes int) { if b.buffer == nil { b.buffer = memory.NewResizableBuffer(b.mem) } - newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256) + newCap := utils.Max(b.buffer.Cap()+b.offset, 256) for newCap < b.pos+nbytes+b.offset { newCap = bitutil.NextPowerOf2(newCap) } diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go index 48ac3316400..525921d9631 100644 --- a/go/parquet/internal/testutils/pagebuilder.go +++ b/go/parquet/internal/testutils/pagebuilder.go @@ -75,7 +75,7 @@ func (d *DataPageBuilder) appendLevels(lvls []int16, maxLvl int16, e parquet.Enc func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) { d.defLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) - d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.nvals = utils.Max(len(lvls), d.nvals) d.defLvlEncoding = parquet.Encodings.RLE d.hasDefLvls = true } @@ -83,7 +83,7 @@ func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) { func (d *DataPageBuilder) AppendRepLevels(lvls []int16, maxLvl int16) { d.repLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE) - d.nvals = utils.MaxInt(len(lvls), d.nvals) + d.nvals = utils.Max(len(lvls), d.nvals) d.repLvlEncoding = parquet.Encodings.RLE d.hasRepLvls = true } @@ -122,7 +122,7 @@ func (d *DataPageBuilder) AppendValues(desc *schema.Column, values interface{}, panic(err) } - d.nvals = utils.MaxInt(sz, d.nvals) + d.nvals = utils.Max(sz, d.nvals) d.encoding = e d.hasValues = true } @@ -191,7 +191,7 @@ func MakeDataPage(dataPageVersion parquet.DataPageVersion, d *schema.Column, val num = builder.nvals } else { stream.Write(indexBuffer.Bytes()) - num = utils.MaxInt(builder.nvals, nvals) + num = utils.Max(builder.nvals, nvals) } buf := stream.Finish() diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go index 0bf501e0488..d327be5f525 100644 --- a/go/parquet/internal/utils/bit_reader.go +++ b/go/parquet/internal/utils/bit_reader.go @@ -266,7 +266,7 @@ func (b *BitReader) GetBatchBools(out []bool) (int, error) { for i < length { // grab byte-aligned bits in a loop since it's more efficient than going // bit by bit when you can grab 8 bools at a time. - unpackSize := utils.MinInt(blen, length-i) / 8 * 8 + unpackSize := utils.Min(blen, length-i) / 8 * 8 n, err := b.reader.Read(buf[:bitutil.BytesForBits(int64(unpackSize))]) if err != nil { return i, err @@ -314,7 +314,7 @@ func (b *BitReader) GetBatch(bits uint, out []uint64) (int, error) { b.reader.Seek(b.byteoffset, io.SeekStart) for i < length { // unpack groups of 32 bytes at a time into a buffer since it's more efficient - unpackSize := utils.MinInt(buflen, length-i) + unpackSize := utils.Min(buflen, length-i) numUnpacked := unpack32(b.reader, b.unpackBuf[:unpackSize], int(bits)) if numUnpacked == 0 { break diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go index f367e7dc13c..dffe55402b9 100644 --- a/go/parquet/internal/utils/rle.go +++ b/go/parquet/internal/utils/rle.go @@ -51,7 +51,7 @@ func MaxRLEBufferSize(width, numValues int) int { minRepeatedRunSize := 1 + int(bitutil.BytesForBits(int64(width))) repeatedMaxSize := int(bitutil.BytesForBits(int64(numValues))) * minRepeatedRunSize - return utils.MaxInt(literalMaxSize, repeatedMaxSize) + return utils.Max(literalMaxSize, repeatedMaxSize) } // Utility classes to do run length encoding (RLE) for fixed bit width values. If runs @@ -370,7 +370,7 @@ func (r *RleDecoder) consumeRepeatCounts(read, batchSize, remain int, run bituti } func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -388,7 +388,7 @@ func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64 ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go index 886d24564db..37dc49a6958 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go @@ -130,7 +130,7 @@ func (r *RleDecoder) getspacedInt32(dc DictionaryConverter, vals []int32, batchS } func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -148,7 +148,7 @@ func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32, ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -185,7 +185,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32) if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -193,7 +193,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32) read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -323,7 +323,7 @@ func (r *RleDecoder) getspacedInt64(dc DictionaryConverter, vals []int64, batchS } func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -341,7 +341,7 @@ func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64, ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -378,7 +378,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64) if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -386,7 +386,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64) read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -516,7 +516,7 @@ func (r *RleDecoder) getspacedInt96(dc DictionaryConverter, vals []parquet.Int96 } func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet.Int96, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -534,7 +534,7 @@ func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -571,7 +571,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -579,7 +579,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -709,7 +709,7 @@ func (r *RleDecoder) getspacedFloat32(dc DictionaryConverter, vals []float32, ba } func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -727,7 +727,7 @@ func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -764,7 +764,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -772,7 +772,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -902,7 +902,7 @@ func (r *RleDecoder) getspacedFloat64(dc DictionaryConverter, vals []float64, ba } func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -920,7 +920,7 @@ func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -957,7 +957,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -965,7 +965,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -1095,7 +1095,7 @@ func (r *RleDecoder) getspacedByteArray(dc DictionaryConverter, vals []parquet.B } func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []parquet.ByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -1113,7 +1113,7 @@ func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []par ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -1150,7 +1150,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -1158,7 +1158,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { @@ -1288,7 +1288,7 @@ func (r *RleDecoder) getspacedFixedLenByteArray(dc DictionaryConverter, vals []p } func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, vals []parquet.FixedLenByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -1306,7 +1306,7 @@ func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, va ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -1343,7 +1343,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -1351,7 +1351,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl index abcb419055a..88c7dd979eb 100644 --- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl +++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl @@ -129,7 +129,7 @@ func (r *RleDecoder) getspaced{{.Name}}(dc DictionaryConverter, vals []{{.name}} } func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.name}}, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) { - batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf)) + batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf)) buf = buf[:batch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) @@ -147,7 +147,7 @@ func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{. ) for read < batch { if run.Set { - updateSize := utils.MinInt(batch-read, int(run.Len)) + updateSize := utils.Min(batch-read, int(run.Len)) if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil { return 0, 0, run, err } @@ -184,7 +184,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{ if !dc.IsValid(idx) { return read, nil } - batch := utils.MinInt(remain, int(r.repCount)) + batch := utils.Min(remain, int(r.repCount)) if err := dc.Fill(vals[:batch], idx); err != nil { return read, err } @@ -192,7 +192,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{ read += batch vals = vals[batch:] case r.litCount > 0: - litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024) + litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024) buf := indexbuffer[:litbatch] n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf) if n != litbatch { diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go index 3c38aba5c32..a403b2196a8 100644 --- a/go/parquet/pqarrow/column_readers.go +++ b/go/parquet/pqarrow/column_readers.go @@ -790,7 +790,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) { isNeg := int8(buf[0]) < 0 // 1. extract high bits - highBitsOffset := utils.MaxInt(0, len(buf)-8) + highBitsOffset := utils.Max(0, len(buf)-8) var ( highBits uint64 lowBits uint64 @@ -811,7 +811,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) { } // 2. extract lower bits - lowBitsOffset := utils.MinInt(len(buf), 8) + lowBitsOffset := utils.Min(len(buf), 8) lowBits = uint64FromBigEndianShifted(buf[highBitsOffset:]) if lowBitsOffset == 8 { @@ -850,7 +850,7 @@ func bigEndianToDecimal256(buf []byte) (decimal256.Num, error) { } for wordIdx := 0; wordIdx < 4; wordIdx++ { - wordLen := utils.MinInt(len(buf), arrow.Uint64SizeBytes) + wordLen := utils.Min(len(buf), arrow.Uint64SizeBytes) word := buf[len(buf)-wordLen:] if wordLen == 8 { From e3467e9609b4414ed09a31d1dd1331944d94e20b Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 15 Dec 2023 16:09:36 -0300 Subject: [PATCH 28/28] Add pre-condition comment back --- go/arrow/array/list.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 22fb4a2cee1..9d959b5e43b 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -1415,7 +1415,7 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error { // Pre-conditions: // // input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64 -// input.Len() > 0 +// input.Len() > 0 && input.NullN() != input.Len() func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset { var bitmap []byte if input.Buffers()[0] != nil {