From a2c40e840aa0d17bd7422bf545f9285fcdc24596 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Fri, 27 Aug 2021 22:13:08 -0400 Subject: [PATCH 01/20] link c data interface headers --- go/arrow/cdata/abi.h | 1 + go/arrow/cdata/helpers.h | 1 + 2 files changed, 2 insertions(+) create mode 120000 go/arrow/cdata/abi.h create mode 120000 go/arrow/cdata/helpers.h diff --git a/go/arrow/cdata/abi.h b/go/arrow/cdata/abi.h new file mode 120000 index 00000000000..447162aed29 --- /dev/null +++ b/go/arrow/cdata/abi.h @@ -0,0 +1 @@ +C:/Users/mtopol/projects/zeroarrow/cpp/src/arrow/c/abi.h \ No newline at end of file diff --git a/go/arrow/cdata/helpers.h b/go/arrow/cdata/helpers.h new file mode 120000 index 00000000000..4a66108fc45 --- /dev/null +++ b/go/arrow/cdata/helpers.h @@ -0,0 +1 @@ +C:/Users/mtopol/projects/zeroarrow/cpp/src/arrow/c/helpers.h \ No newline at end of file From c7141f184a835b6122a0da44fe430d2974671cc7 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Fri, 27 Aug 2021 22:17:54 -0400 Subject: [PATCH 02/20] fix symlinks to be relative --- go/arrow/cdata/abi.h | 2 +- go/arrow/cdata/helpers.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go/arrow/cdata/abi.h b/go/arrow/cdata/abi.h index 447162aed29..3d262a42569 120000 --- a/go/arrow/cdata/abi.h +++ b/go/arrow/cdata/abi.h @@ -1 +1 @@ -C:/Users/mtopol/projects/zeroarrow/cpp/src/arrow/c/abi.h \ No newline at end of file +../../../cpp/src/arrow/c/abi.h \ No newline at end of file diff --git a/go/arrow/cdata/helpers.h b/go/arrow/cdata/helpers.h index 4a66108fc45..7562f1e6597 120000 --- a/go/arrow/cdata/helpers.h +++ b/go/arrow/cdata/helpers.h @@ -1 +1 @@ -C:/Users/mtopol/projects/zeroarrow/cpp/src/arrow/c/helpers.h \ No newline at end of file +../../../cpp/src/arrow/c/helpers.h \ No newline at end of file From e35aa7e43bcc3c0bdf02a9cdfbd1e83de1337774 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Sat, 28 Aug 2021 01:25:26 -0400 Subject: [PATCH 03/20] C Data consumer interface --- go/arrow/cdata/abi.h | 1 - go/arrow/cdata/arrow/c/abi.h | 103 ++++++ go/arrow/cdata/arrow/c/helpers.h | 117 +++++++ go/arrow/cdata/cdata.go | 539 +++++++++++++++++++++++++++++++ go/arrow/cdata/helpers.h | 1 - go/arrow/cdata/interface.go | 148 +++++++++ 6 files changed, 907 insertions(+), 2 deletions(-) delete mode 120000 go/arrow/cdata/abi.h create mode 100644 go/arrow/cdata/arrow/c/abi.h create mode 100644 go/arrow/cdata/arrow/c/helpers.h create mode 100644 go/arrow/cdata/cdata.go delete mode 120000 go/arrow/cdata/helpers.h create mode 100644 go/arrow/cdata/interface.go diff --git a/go/arrow/cdata/abi.h b/go/arrow/cdata/abi.h deleted file mode 120000 index 3d262a42569..00000000000 --- a/go/arrow/cdata/abi.h +++ /dev/null @@ -1 +0,0 @@ -../../../cpp/src/arrow/c/abi.h \ No newline at end of file diff --git a/go/arrow/cdata/arrow/c/abi.h b/go/arrow/cdata/arrow/c/abi.h new file mode 100644 index 00000000000..a78170dbdbc --- /dev/null +++ b/go/arrow/cdata/arrow/c/abi.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +// EXPERIMENTAL: C stream interface + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#ifdef __cplusplus +} +#endif diff --git a/go/arrow/cdata/arrow/c/helpers.h b/go/arrow/cdata/arrow/c/helpers.h new file mode 100644 index 00000000000..a5c1f6fe4ba --- /dev/null +++ b/go/arrow/cdata/arrow/c/helpers.h @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/c/abi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// Query whether the C schema is released +inline int ArrowSchemaIsReleased(const struct ArrowSchema* schema) { + return schema->release == NULL; +} + +/// Mark the C schema released (for use in release callbacks) +inline void ArrowSchemaMarkReleased(struct ArrowSchema* schema) { + schema->release = NULL; +} + +/// Move the C schema from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid schema already, otherwise there +/// will be a memory leak. +inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dest) { + assert(dest != src); + assert(!ArrowSchemaIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowSchema)); + ArrowSchemaMarkReleased(src); +} + +/// Release the C schema, if necessary, by calling its release callback +inline void ArrowSchemaRelease(struct ArrowSchema* schema) { + if (!ArrowSchemaIsReleased(schema)) { + schema->release(schema); + assert(ArrowSchemaIsReleased(schema)); + } +} + +/// Query whether the C array is released +inline int ArrowArrayIsReleased(const struct ArrowArray* array) { + return array->release == NULL; +} + +/// Mark the C array released (for use in release callbacks) +inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } + +/// Move the C array from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid array already, otherwise there +/// will be a memory leak. +inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { + assert(dest != src); + assert(!ArrowArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArray)); + ArrowArrayMarkReleased(src); +} + +/// Release the C array, if necessary, by calling its release callback +inline void ArrowArrayRelease(struct ArrowArray* array) { + if (!ArrowArrayIsReleased(array)) { + array->release(array); + assert(ArrowArrayIsReleased(array)); + } +} + +/// Query whether the C array stream is released +inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { + return stream->release == NULL; +} + +/// Mark the C array stream released (for use in release callbacks) +inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { + stream->release = NULL; +} + +/// Move the C array stream from `src` to `dest` +/// +/// Note `dest` must *not* point to a valid stream already, otherwise there +/// will be a memory leak. +inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dest) { + assert(dest != src); + assert(!ArrowArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowArrayStream)); + ArrowArrayStreamMarkReleased(src); +} + +/// Release the C array stream, if necessary, by calling its release callback +inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { + if (!ArrowArrayStreamIsReleased(stream)) { + stream->release(stream); + assert(ArrowArrayStreamIsReleased(stream)); + } +} + +#ifdef __cplusplus +} +#endif diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go new file mode 100644 index 00000000000..469bba848c6 --- /dev/null +++ b/go/arrow/cdata/cdata.go @@ -0,0 +1,539 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package cdata + +// implement handling of the Arrow C Data Interface. At least from a consuming side. + +// #include "arrow/c/abi.h" +// #include "arrow/c/helpers.h" +// typedef struct ArrowSchema ArrowSchema; +// typedef struct ArrowArray ArrowArray; +// typedef struct ArrowArrayStream ArrowArrayStream; +// +// int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } +// int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } +// const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } +// +import "C" + +import ( + "io" + "reflect" + "runtime" + "strconv" + "strings" + "syscall" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/bitutil" + "github.com/apache/arrow/go/arrow/memory" + "golang.org/x/xerrors" +) + +type ( + // CArrowSchema is the C Data Interface for ArrowSchemas defined in abi.h + CArrowSchema = C.ArrowSchema + // CArrowArray is the C Data Interface object for Arrow Arrays as defined in abi.h + CArrowArray = C.ArrowArray + // CArrowArrayStream is the Experimental API for handling streams of record batches + // through the C Data interface. + CArrowArrayStream = C.ArrowArrayStream +) + +// Map from the defined strings to their corresponding arrow.DataType interface +// object instances, for types that don't require params. +var formatToSimpleType = map[string]arrow.DataType{ + "n": arrow.Null, + "b": arrow.FixedWidthTypes.Boolean, + "c": arrow.PrimitiveTypes.Int8, + "C": arrow.PrimitiveTypes.Uint8, + "s": arrow.PrimitiveTypes.Int16, + "S": arrow.PrimitiveTypes.Uint16, + "i": arrow.PrimitiveTypes.Int32, + "I": arrow.PrimitiveTypes.Uint32, + "l": arrow.PrimitiveTypes.Int64, + "L": arrow.PrimitiveTypes.Uint64, + "e": arrow.FixedWidthTypes.Float16, + "f": arrow.PrimitiveTypes.Float32, + "g": arrow.PrimitiveTypes.Float64, + "z": arrow.BinaryTypes.Binary, + "u": arrow.BinaryTypes.String, + "tdD": arrow.FixedWidthTypes.Date32, + "tdm": arrow.FixedWidthTypes.Date64, + "tts": arrow.FixedWidthTypes.Time32s, + "ttm": arrow.FixedWidthTypes.Time32ms, + "ttu": arrow.FixedWidthTypes.Time64us, + "ttn": arrow.FixedWidthTypes.Time64ns, + "tDs": arrow.FixedWidthTypes.Duration_s, + "tDm": arrow.FixedWidthTypes.Duration_ms, + "tDu": arrow.FixedWidthTypes.Duration_us, + "tDn": arrow.FixedWidthTypes.Duration_ns, + "tiM": arrow.FixedWidthTypes.MonthInterval, + "tiD": arrow.FixedWidthTypes.DayTimeInterval, +} + +// decode metadata from C which is encoded as +// +// [int32] -> number of metadata pairs +// for 0..n +// [int32] -> number of bytes in key +// [n bytes] -> key value +// [int32] -> number of bytes in value +// [n bytes] -> value +func decodeCMetadata(md *C.char) arrow.Metadata { + if md == nil { + return arrow.Metadata{} + } + + // don't copy the bytes, just reference them directly + const maxlen = 0x7fffffff + data := (*[maxlen]byte)(unsafe.Pointer(md))[:] + + readint32 := func() int32 { + v := *(*int32)(unsafe.Pointer(&data[0])) + data = data[arrow.Int32SizeBytes:] + return v + } + + readstr := func() string { + l := readint32() + s := string(data[:l]) + data = data[l:] + return s + } + + npairs := readint32() + if npairs == 0 { + return arrow.Metadata{} + } + + keys := make([]string, npairs) + vals := make([]string, npairs) + + for i := int32(0); i < npairs; i++ { + keys[i] = readstr() + vals[i] = readstr() + } + + return arrow.NewMetadata(keys, vals) +} + +// convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema +func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { + var childFields []arrow.Field + if schema.n_children > 0 { + // call ourselves recursively if there are children. + var schemaChildren []*C.ArrowSchema + // set up a slice to reference safely + s := (*reflect.SliceHeader)(unsafe.Pointer(&schemaChildren)) + s.Data = uintptr(unsafe.Pointer(schema.children)) + s.Len = int(schema.n_children) + s.Cap = int(schema.n_children) + + childFields = make([]arrow.Field, schema.n_children) + for i, c := range schemaChildren { + childFields[i], err = importSchema((*CArrowSchema)(c)) + if err != nil { + return + } + } + } + + // copy the schema name from the c-string + ret.Name = C.GoString(schema.name) + ret.Nullable = (schema.flags & C.ARROW_FLAG_NULLABLE) != 0 + ret.Metadata = decodeCMetadata(schema.metadata) + + // copies the c-string here, but it's very small + f := C.GoString(schema.format) + // handle our non-parameterized simple types. + dt, ok := formatToSimpleType[f] + if ok { + ret.Type = dt + return + } + + // handle the complex types + switch f[0] { + case 'w': // fixed size binary is "w:##" where ## is the byteWidth + byteWidth, err := strconv.Atoi(strings.Split(f, ":")[1]) + if err != nil { + return ret, err + } + dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} + case 'd': // decimal types are d:,[,] size is assumed 128 if left out + props := strings.Split(f, ":")[1] + propList := strings.Split(props, ",") + if len(propList) == 3 { + err = xerrors.New("only decimal128 is supported") + return + } + + precision, _ := strconv.Atoi(propList[0]) + scale, _ := strconv.Atoi(propList[1]) + dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} + case '+': // types with children + switch f[1] { + case 'l': // list + dt = arrow.ListOf(childFields[0].Type) + case 'w': // fixed size list is w:# where # is the list size. + listSize, err := strconv.Atoi(strings.Split(f, ":")[1]) + if err != nil { + return ret, err + } + + dt = arrow.FixedSizeListOf(int32(listSize), childFields[0].Type) + case 's': // struct + dt = arrow.StructOf(childFields...) + case 'm': // map type is basically a list of structs. + st := childFields[0].Type.(*arrow.StructType) + dt = arrow.MapOf(st.Field(0).Type, st.Field(1).Type) + dt.(*arrow.MapType).KeysSorted = (schema.flags & C.ARROW_FLAG_MAP_KEYS_SORTED) != 0 + } + } + + if dt == nil { + // if we didn't find a type, then it's something we haven't implemented. + err = xerrors.New("unimplemented type") + } else { + ret.Type = dt + } + return +} + +// importer to keep track when importing C ArrowArray objects. +type cimporter struct { + dt arrow.DataType + arr C.ArrowArray + data *array.Data + parent *cimporter + children []cimporter + cbuffers []*C.void +} + +func (imp *cimporter) importChild(parent *cimporter, src *C.ArrowArray) error { + imp.parent = parent + return imp.doImport(src) +} + +// import any child arrays for lists, structs, and so on. +func (imp *cimporter) doImportChildren() error { + var children []*C.ArrowArray + // create a proper slice for our children + s := (*reflect.SliceHeader)(unsafe.Pointer(&children)) + s.Data = uintptr(unsafe.Pointer(imp.arr.children)) + s.Len = int(imp.arr.n_children) + s.Cap = int(imp.arr.n_children) + + if len(children) > 0 { + imp.children = make([]cimporter, len(children)) + } + + // handle the cases + switch imp.dt.ID() { + case arrow.LIST: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.ListType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.FIXED_SIZE_LIST: // only one child to import + imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + case arrow.STRUCT: // import all the children + st := imp.dt.(*arrow.StructType) + for i, c := range children { + imp.children[i].dt = st.Field(i).Type + imp.children[i].importChild(imp, c) + } + case arrow.MAP: // only one child to import, it's a struct array + imp.children[0].dt = imp.dt.(*arrow.MapType).ValueType() + if err := imp.children[0].importChild(imp, children[0]); err != nil { + return err + } + } + + return nil +} + +// import is called recursively as needed for importing an array and its children +// in order to generate array.Data objects +func (imp *cimporter) doImport(src *C.ArrowArray) error { + // move the array from the src object passed in to the one referenced by + // this importer. That way we can set up a finalizer on the created + // *array.Data object so we clean up our Array's memory when garbage collected. + C.ArrowArrayMove(src, &imp.arr) + defer func(arr *C.ArrowArray) { + if imp.data != nil { + runtime.SetFinalizer(imp.data, func(*array.Data) { + C.ArrowArrayRelease(arr) + }) + } + }(&imp.arr) + + // import any children + if err := imp.doImportChildren(); err != nil { + return err + } + // get a view of the buffers, zero-copy. we're just looking at the pointers + const maxlen = 0x7fffffff + imp.cbuffers = (*[maxlen]*C.void)(unsafe.Pointer(imp.arr.buffers))[:imp.arr.n_buffers:imp.arr.n_buffers] + + // handle each of our type cases + switch dt := imp.dt.(type) { + case *arrow.NullType: + if err := imp.checkNoChildren(); err != nil { + return err + } + imp.data = array.NewData(dt, int(imp.arr.length), nil, nil, int(imp.arr.null_count), int(imp.arr.offset)) + case arrow.FixedWidthDataType: + return imp.importFixedSizePrimitive() + case *arrow.StringType: + return imp.importStringLike() + case *arrow.BinaryType: + return imp.importStringLike() + case *arrow.ListType: + return imp.importListLike() + case *arrow.MapType: + return imp.importListLike() + case *arrow.FixedSizeListType: + if err := imp.checkNumChildren(1); err != nil { + return err + } + + if err := imp.checkNumBuffers(1); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, []*array.Data{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + case *arrow.StructType: + if err := imp.checkNumBuffers(1); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + children := make([]*array.Data, len(imp.children)) + for i := range imp.children { + children[i] = imp.children[i].data + } + + imp.data = array.NewData(dt, int(imp.arr.length), []*memory.Buffer{nulls}, children, int(imp.arr.null_count), int(imp.arr.offset)) + default: + return xerrors.Errorf("unimplemented type %s", dt) + } + + return nil +} + +func (imp *cimporter) importStringLike() error { + if err := imp.checkNoChildren(); err != nil { + return err + } + + if err := imp.checkNumBuffers(3); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + offsets := imp.importOffsetsBuffer(1) + values := imp.importVariableValuesBuffer(2, 1, arrow.Int32Traits.CastFromBytes(offsets.Bytes())) + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) importListLike() error { + if err := imp.checkNumChildren(1); err != nil { + return err + } + + if err := imp.checkNumBuffers(2); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + offsets := imp.importOffsetsBuffer(1) + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets}, []*array.Data{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) importFixedSizePrimitive() error { + if err := imp.checkNoChildren(); err != nil { + return err + } + + if err := imp.checkNumBuffers(2); err != nil { + return err + } + + nulls, err := imp.importNullBitmap(0) + if err != nil { + return err + } + + var values *memory.Buffer + + fw := imp.dt.(arrow.FixedWidthDataType) + if bitutil.IsMultipleOf8(int64(fw.BitWidth())) { + values = imp.importFixedSizeBuffer(1, bitutil.BytesForBits(int64(fw.BitWidth()))) + } else { + if fw.BitWidth() != 1 { + return xerrors.New("invalid bitwidth") + } + values = imp.importBitsBuffer(1) + } + imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, values}, nil, int(imp.arr.null_count), int(imp.arr.offset)) + return nil +} + +func (imp *cimporter) checkNoChildren() error { return imp.checkNumChildren(0) } + +func (imp *cimporter) checkNumChildren(n int64) error { + if int64(imp.arr.n_children) != n { + return xerrors.Errorf("expected %d children, for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_children) + } + return nil +} + +func (imp *cimporter) checkNumBuffers(n int64) error { + if int64(imp.arr.n_buffers) != n { + return xerrors.Errorf("expected %d buffers for imported type %s, ArrowArray has %d", n, imp.dt, imp.arr.n_buffers) + } + return nil +} + +func (imp *cimporter) importBuffer(bufferID int, sz int64) *memory.Buffer { + // this is not a copy, we're just having a slice which points at the data + // it's still owned by the C.ArrowArray object and its backing C++ object. + const maxLen = 0x7fffffff + data := (*[maxLen]byte)(unsafe.Pointer(imp.cbuffers[bufferID]))[:sz:sz] + return memory.NewBufferBytes(data) +} + +func (imp *cimporter) importBitsBuffer(bufferID int) *memory.Buffer { + bufsize := bitutil.BytesForBits(int64(imp.arr.length) + int64(imp.arr.offset)) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importNullBitmap(bufferID int) (*memory.Buffer, error) { + if imp.arr.null_count > 0 && imp.cbuffers[bufferID] == nil { + return nil, xerrors.Errorf("arrowarray struct has null bitmap buffer, but non-zero null_count %d", imp.arr.null_count) + } + + if imp.arr.null_count == 0 && imp.cbuffers[bufferID] == nil { + return nil, nil + } + + return imp.importBitsBuffer(bufferID), nil +} + +func (imp *cimporter) importFixedSizeBuffer(bufferID int, byteWidth int64) *memory.Buffer { + bufsize := byteWidth * int64(imp.arr.length+imp.arr.offset) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importOffsetsBuffer(bufferID int) *memory.Buffer { + const offsetsize = int64(arrow.Int32SizeBytes) // go doesn't implement int64 offsets yet + bufsize := offsetsize * int64((imp.arr.length + imp.arr.offset + 1)) + return imp.importBuffer(bufferID, bufsize) +} + +func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth int, offsets []int32) *memory.Buffer { + bufsize := byteWidth * int(offsets[imp.arr.length]) + return imp.importBuffer(bufferID, int64(bufsize)) +} + +func importCArray(arr *C.ArrowArray, sc *C.ArrowSchema) (field arrow.Field, imp *cimporter, err error) { + field, err = importSchema(sc) + if err != nil { + return + } + + imp, err = importCArrayAsType(arr, field.Type) + return +} + +func importCArrayAsType(arr *C.ArrowArray, dt arrow.DataType) (imp *cimporter, err error) { + imp = &cimporter{dt: dt} + err = imp.doImport(arr) + return +} + +func initReader(rdr *nativeCRecordBatchReader, stream *C.ArrowArrayStream) { + C.ArrowArrayStreamMove(stream, &rdr.stream) + runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { C.ArrowArrayStreamRelease(&r.stream) }) +} + +// Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface +type nativeCRecordBatchReader struct { + stream C.ArrowArrayStream + schema *arrow.Schema +} + +func (n *nativeCRecordBatchReader) getError(errno int) error { + return xerrors.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(&n.stream))) +} + +func (n *nativeCRecordBatchReader) Read() (array.Record, error) { + if n.schema == nil { + var sc C.ArrowSchema + errno := C.stream_get_schema(&n.stream, &sc) + if errno != 0 { + return nil, n.getError(int(errno)) + } + defer C.ArrowSchemaRelease(&sc) + s, err := ImportCArrowSchema((*CArrowSchema)(&sc)) + if err != nil { + return nil, err + } + + n.schema = s + } + + var arr C.ArrowArray + errno := C.stream_get_next(&n.stream, &arr) + if errno != 0 { + return nil, n.getError(int(errno)) + } + + if C.ArrowArrayIsReleased(&arr) == 1 { + return nil, io.EOF + } + + return ImportCRecordBatchWithSchema(&arr, n.schema) +} diff --git a/go/arrow/cdata/helpers.h b/go/arrow/cdata/helpers.h deleted file mode 120000 index 7562f1e6597..00000000000 --- a/go/arrow/cdata/helpers.h +++ /dev/null @@ -1 +0,0 @@ -../../../cpp/src/arrow/c/helpers.h \ No newline at end of file diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go new file mode 100644 index 00000000000..46537ff037a --- /dev/null +++ b/go/arrow/cdata/interface.go @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package cdata + +import ( + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/arrio" + "golang.org/x/xerrors" +) + +// ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it +// will copy the metadata and schema definitions over from the C object rather +// than keep direct references to them. It is safe to call C.ArrowSchemaRelease +// after receiving the schema from this function. +func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { + ret, err := importSchema(out) + if err != nil { + return nil, err + } + + return arrow.NewSchema(ret.Type.(*arrow.StructType).Fields(), &ret.Metadata), nil +} + +// ImportCArrayWithType takes a pointer to a C Data ArrowArray and interprets the values +// as an array with the given datatype. If err is not nil, then ArrowArrayRelease must still +// be called on arr to release the memory. +// +// The underlying buffers will not be copied, but will instead be referenced directly +// by the resulting array interface object. The passed in ArrowArray will have it's ownership +// transferred to the resulting array.Interface via ArrowArrayMove. The underlying array.Data +// object that is owned by the Array will now be the owner of the memory pointer and +// will call ArrowArrayRelease when it is released and garbage collected via runtime.SetFinalizer. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (array.Interface, error) { + imp, err := importCArrayAsType(arr, dt) + if err != nil { + return nil, err + } + + return array.MakeFromData(imp.data), nil +} + +// ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order +// to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still +// be called on arr to release the memory. +// +// The Schema will be copied with the information used to populate the returned Field, complete +// with metadata. The array will reference the same memory that is referred to by the ArrowArray +// object and take ownership of it as per ImportCArrayWithType. The returned array.Interface will +// own the C memory and call ArrowArrayRelease when the array.Data object is cleaned up. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCArray(arr *CArrowArray, schema *CArrowSchema) (arrow.Field, array.Interface, error) { + field, err := importSchema(schema) + if err != nil { + return field, nil, err + } + + ret, err := ImportCArrayWithType(arr, field.Type) + return field, ret, err +} + +// ImportCRecordBatchWithSchema is used for importing a Record Batch array when the schema +// is already known such as when receiving record batches through a stream. +// +// All of the semantics regarding memory ownership are the same as when calling +// ImportCRecordBatch directly with a schema. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCRecordBatchWithSchema(arr *CArrowArray, sc *arrow.Schema) (array.Record, error) { + imp, err := importCArrayAsType(arr, arrow.StructOf(sc.Fields()...)) + if err != nil { + return nil, err + } + + st := array.NewStructData(imp.data) + defer st.Release() + + // now that we have our fields, we can split them out into the slice of arrays + // and construct a record batch from them to return. + cols := make([]array.Interface, st.NumField()) + for i := 0; i < st.NumField(); i++ { + cols[i] = st.Field(i) + } + + return array.NewRecord(sc, cols, int64(st.Len())), nil +} + +// ImportCRecordBatch imports an ArrowArray from C as a record batch. If err is not nil, +// then ArrowArrayRelease must still be called to release the memory. +// +// A record batch is represented in the C Data Interface as a Struct Array whose fields +// are the columns of the record batch. Thus after importing the schema passed in here, +// if it is not a Struct type, this will return an error. As with ImportCArray, the +// columns in the record batch will take ownership of the CArrowArray memory if successful. +// Since ArrowArrayMove is used, it's still safe to call ArrowArrayRelease on the source +// regardless. But if there is an error, it *MUST* be called to ensure there is no memory leak. +// +// NOTE: The array takes ownership of the underlying memory buffers via ArrowArrayMove, +// it does not take ownership of the actual arr object itself. +func ImportCRecordBatch(arr *CArrowArray, sc *CArrowSchema) (array.Record, error) { + field, err := importSchema(sc) + if err != nil { + return nil, err + } + + if field.Type.ID() != arrow.STRUCT { + return nil, xerrors.New("recordbatch array import must be of struct type") + } + + return ImportCRecordBatchWithSchema(arr, arrow.NewSchema(field.Type.(*arrow.StructType).Fields(), &field.Metadata)) +} + +// ImportCArrayStream creates an arrio.Reader from an ArrowArrayStream taking ownership +// of the underlying stream object via ArrowArrayStreamMove. +// +// The records returned by this reader must be released manually after they are returned. +// The reader itself will release the stream via SetFinalizer when it is garbage collected. +// It will return (nil, io.EOF) from the Read function when there are no more records to return. +// +// NOTE: The reader takes ownership of the underlying memory buffers via ArrowArrayStreamMove, +// it does not take ownership of the actual stream object itself. +func ImportCArrayStream(stream *CArrowArrayStream, schema *arrow.Schema) arrio.Reader { + out := &nativeCRecordBatchReader{schema: schema} + initReader(out, stream) + return out +} From c45cc0035c6196222e474832aa06d2ea0eb43f22 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Mon, 30 Aug 2021 19:26:38 -0400 Subject: [PATCH 04/20] and it works, huzzah --- go/arrow/cdata/cdata.go | 86 ++-- go/arrow/cdata/cdata_fulltest.c | 298 ++++++++++++ go/arrow/cdata/cdata_test.go | 613 +++++++++++++++++++++++++ go/arrow/cdata/cdata_test_framework.go | 246 ++++++++++ go/arrow/cdata/cdata_testframework.c | 87 ++++ go/arrow/cdata/interface.go | 14 +- go/arrow/cdata/utils.h | 56 +++ go/arrow/datatype_fixedwidth.go | 2 +- 8 files changed, 1373 insertions(+), 29 deletions(-) create mode 100644 go/arrow/cdata/cdata_fulltest.c create mode 100644 go/arrow/cdata/cdata_test.go create mode 100644 go/arrow/cdata/cdata_test_framework.go create mode 100644 go/arrow/cdata/cdata_testframework.c create mode 100644 go/arrow/cdata/utils.h diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 469bba848c6..5ae79590618 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -29,6 +29,8 @@ package cdata // int stream_get_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { return st->get_schema(st, out); } // int stream_get_next(struct ArrowArrayStream* st, struct ArrowArray* out) { return st->get_next(st, out); } // const char* stream_get_last_error(struct ArrowArrayStream* st) { return st->get_last_error(st); } +// struct ArrowArray get_arr() { struct ArrowArray arr; return arr; } +// struct ArrowArrayStream get_stream() { struct ArrowArrayStream stream; return stream; } // import "C" @@ -171,16 +173,42 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { return } - // handle the complex types - switch f[0] { - case 'w': // fixed size binary is "w:##" where ## is the byteWidth - byteWidth, err := strconv.Atoi(strings.Split(f, ":")[1]) + // handle types with params via colon + typs := strings.Split(f, ":") + defaulttz := "UTC" + switch typs[0] { + case "tss": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Second, TimeZone: tz} + case "tsm": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: tz} + case "tsu": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: tz} + case "tsn": + tz := typs[1] + if len(typs[1]) == 0 { + tz = defaulttz + } + dt = &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: tz} + case "w": // fixed size binary is "w:##" where ## is the byteWidth + byteWidth, err := strconv.Atoi(typs[1]) if err != nil { return ret, err } dt = &arrow.FixedSizeBinaryType{ByteWidth: byteWidth} - case 'd': // decimal types are d:,[,] size is assumed 128 if left out - props := strings.Split(f, ":")[1] + case "d": // decimal types are d:,[,] size is assumed 128 if left out + props := typs[1] propList := strings.Split(props, ",") if len(propList) == 3 { err = xerrors.New("only decimal128 is supported") @@ -190,7 +218,9 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { precision, _ := strconv.Atoi(propList[0]) scale, _ := strconv.Atoi(propList[1]) dt = &arrow.Decimal128Type{Precision: int32(precision), Scale: int32(scale)} - case '+': // types with children + } + + if f[0] == '+' { // types with children switch f[1] { case 'l': // list dt = arrow.ListOf(childFields[0].Type) @@ -222,7 +252,7 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { // importer to keep track when importing C ArrowArray objects. type cimporter struct { dt arrow.DataType - arr C.ArrowArray + arr *C.ArrowArray data *array.Data parent *cimporter children []cimporter @@ -275,25 +305,35 @@ func (imp *cimporter) doImportChildren() error { return nil } +func (imp *cimporter) initarr() { + arr := C.get_arr() + imp.arr = &arr +} + // import is called recursively as needed for importing an array and its children // in order to generate array.Data objects func (imp *cimporter) doImport(src *C.ArrowArray) error { + imp.initarr() // move the array from the src object passed in to the one referenced by // this importer. That way we can set up a finalizer on the created // *array.Data object so we clean up our Array's memory when garbage collected. - C.ArrowArrayMove(src, &imp.arr) + C.ArrowArrayMove(src, imp.arr) defer func(arr *C.ArrowArray) { if imp.data != nil { runtime.SetFinalizer(imp.data, func(*array.Data) { C.ArrowArrayRelease(arr) + if C.ArrowArrayIsReleased(arr) != 1 { + panic("did not release C mem") + } }) } - }(&imp.arr) + }(imp.arr) // import any children if err := imp.doImportChildren(); err != nil { return err } + // get a view of the buffers, zero-copy. we're just looking at the pointers const maxlen = 0x7fffffff imp.cbuffers = (*[maxlen]*C.void)(unsafe.Pointer(imp.arr.buffers))[:imp.arr.n_buffers:imp.arr.n_buffers] @@ -478,16 +518,6 @@ func (imp *cimporter) importVariableValuesBuffer(bufferID int, byteWidth int, of return imp.importBuffer(bufferID, int64(bufsize)) } -func importCArray(arr *C.ArrowArray, sc *C.ArrowSchema) (field arrow.Field, imp *cimporter, err error) { - field, err = importSchema(sc) - if err != nil { - return - } - - imp, err = importCArrayAsType(arr, field.Type) - return -} - func importCArrayAsType(arr *C.ArrowArray, dt arrow.DataType) (imp *cimporter, err error) { imp = &cimporter{dt: dt} err = imp.doImport(arr) @@ -495,24 +525,26 @@ func importCArrayAsType(arr *C.ArrowArray, dt arrow.DataType) (imp *cimporter, e } func initReader(rdr *nativeCRecordBatchReader, stream *C.ArrowArrayStream) { - C.ArrowArrayStreamMove(stream, &rdr.stream) - runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { C.ArrowArrayStreamRelease(&r.stream) }) + st := C.get_stream() + rdr.stream = &st + C.ArrowArrayStreamMove(stream, rdr.stream) + runtime.SetFinalizer(rdr, func(r *nativeCRecordBatchReader) { C.ArrowArrayStreamRelease(r.stream) }) } // Record Batch reader that conforms to arrio.Reader for the ArrowArrayStream interface type nativeCRecordBatchReader struct { - stream C.ArrowArrayStream + stream *C.ArrowArrayStream schema *arrow.Schema } func (n *nativeCRecordBatchReader) getError(errno int) error { - return xerrors.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(&n.stream))) + return xerrors.Errorf("%w: %s", syscall.Errno(errno), C.GoString(C.stream_get_last_error(n.stream))) } func (n *nativeCRecordBatchReader) Read() (array.Record, error) { if n.schema == nil { var sc C.ArrowSchema - errno := C.stream_get_schema(&n.stream, &sc) + errno := C.stream_get_schema(n.stream, &sc) if errno != 0 { return nil, n.getError(int(errno)) } @@ -525,8 +557,8 @@ func (n *nativeCRecordBatchReader) Read() (array.Record, error) { n.schema = s } - var arr C.ArrowArray - errno := C.stream_get_next(&n.stream, &arr) + arr := C.get_arr() + errno := C.stream_get_next(n.stream, &arr) if errno != 0 { return nil, n.getError(int(errno)) } diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c new file mode 100644 index 00000000000..f2a0da70c54 --- /dev/null +++ b/go/arrow/cdata/cdata_fulltest.c @@ -0,0 +1,298 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +#include +#include +#include +#include "arrow/c/abi.h" +#include "arrow/c/helpers.h" +#include "utils.h" + +static void release_primitive(struct ArrowSchema* schema) { + free((void *)schema->format); + schema->release = NULL; +} + +static void release_nested(struct ArrowSchema* schema) { + for (int i = 0; i < schema->n_children; ++i) { + assert(!ArrowSchemaIsReleased(schema->children[i])); + ArrowSchemaRelease(schema->children[i]); + ArrowSchemaMarkReleased(schema->children[i]); + } + assert(!ArrowSchemaIsReleased(schema)); + free((void *)schema->format); + if (strlen(schema->name) > 0) { + free((void *)schema->name); + } + ArrowSchemaMarkReleased(schema); +} + +void test_primitive(struct ArrowSchema* schema, const char* fmt) { + *schema = (struct ArrowSchema) { + // Type description + .format = fmt, + .name = "", + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_primitive, + }; +} + +void free_nested_schemas(struct ArrowSchema* top) { + for (int i = 0; i < top->n_children; ++i) { + free_nested_schemas(top->children[i]); + } + ArrowSchemaMarkReleased(top); + // free(top); +} + +void free_top_schema(struct ArrowSchema** top) { + free_nested_schemas(top[0]); + free(top); +} + +struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = 0, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested, + }; + if (i != 0) { + schemas[i-1]->n_children = 1; + schemas[i-1]->children = &schemas[i]; + } + } + return schemas; +} + +struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = flags[i], + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested, + }; + } + + schemas[0]->children = &schemas[1]; + schemas[0]->n_children = n-1; + return schemas; +} + +struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); + + if (check_for_endianness() == 1) { + schemas[n-1]->metadata = kEncodedMeta2LE; + } else { + schemas[n-1]->metadata = kEncodedMeta2BE; + } + + return schemas; +} + +struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); + + if (check_for_endianness() == 1) { + schemas[0]->metadata = kEncodedMeta2LE; + schemas[n-1]->metadata = kEncodedMeta1LE; + } else { + schemas[0]->metadata = kEncodedMeta2BE; + schemas[n-1]->metadata = kEncodedMeta1BE; + } + return schemas; +} + +struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n) { + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + for (int i = 0; i < n; ++i) { + schemas[i] = malloc(sizeof(struct ArrowSchema)); + *schemas[i] = (struct ArrowSchema) { + .format = fmts[i], + .name = names[i], + .metadata = NULL, + .flags = flags[i], + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested, + }; + } + + schemas[0]->n_children = 1; + schemas[0]->children = &schemas[1]; + schemas[1]->n_children = n-2; + schemas[1]->children = &schemas[2]; + + return schemas; +} + +struct streamcounter { + int n; + int max; +}; + +static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { + out->children = malloc(sizeof(struct ArrowSchema*)*2); + out->n_children = 2; + + out->children[0] = malloc(sizeof(struct ArrowSchema)); + *out->children[0] = (struct ArrowSchema) { + .format = "i", + .name = "a", + .metadata = NULL, + .flags = ARROW_FLAG_NULLABLE, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested, + }; + + out->children[1] = malloc(sizeof(struct ArrowSchema)); + *out->children[1] = (struct ArrowSchema) { + .format = "u", + .name = "b", + .metadata = NULL, + .flags = ARROW_FLAG_NULLABLE, + .children = NULL, + .n_children = 0, + .dictionary = NULL, + .release = &release_nested, + }; + + out->format = "+s"; + out->release = &free_nested_schemas; + + return 0; +} + +static void release_stream(struct ArrowArrayStream* st) { + free(st->private_data); + ArrowArrayStreamMarkReleased(st); +} + +static void release_the_array(struct ArrowArray* out) { + for (int i = 0; i < out->n_children; ++i) { + ArrowArrayRelease(out->children[i]); + } + free((void*)out->children); + free(out->buffers); + out->release = NULL; +} + +void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); + +static void release_str_array(struct ArrowArray* array) { + assert(array->n_buffers == 3); + free((void*) array->buffers[1]); + free((void*) array->buffers[2]); + free(array->buffers); + array->release = NULL; +} + +void export_str_array(const char* data, const int32_t* offsets, int64_t nitems, struct ArrowArray* out) { + *out = (struct ArrowArray) { + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 3, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_str_array + }; + + out->buffers = (const void**)malloc(sizeof(void*) * out->n_buffers); + assert(out->buffers != NULL); + out->buffers[0] = NULL; + out->buffers[1] = offsets; + out->buffers[2] = data; +} + +static int next_record(struct ArrowArrayStream* st, struct ArrowArray* out) { + struct streamcounter* cnter = (struct streamcounter*)(st->private_data); + if (cnter->n == cnter->max) { + ArrowArrayMarkReleased(out); + return 0; + } + + cnter->n++; + + *out = (struct ArrowArray) { + .offset = 0, + .dictionary = NULL, + .length = 3, + .null_count = 0, + .buffers = (const void**)malloc(sizeof(void*)), + .n_children = 2, + .n_buffers = 1, + .release = &release_the_array + }; + + out->buffers[0] = NULL; + out->children = (struct ArrowArray**)malloc(sizeof(struct ArrowArray*)*2); + int32_t* intdata = malloc(sizeof(int32_t)*3); + for (int i = 0; i < 3; ++i) { + intdata[i] = cnter->n * (i+1); + } + + out->children[0] = malloc(sizeof(struct ArrowArray)); + export_int32_array(intdata, 3, out->children[0]); + out->children[1] = malloc(sizeof(struct ArrowArray)); + char* strdata = strdup("foobarbaz"); + int32_t* offsets = malloc(sizeof(int32_t)*4); + offsets[0] = 0; + offsets[1] = 3; + offsets[2] = 6; + offsets[3] = 9; + export_str_array(strdata, offsets, 3, out->children[1]); + + return 0; +} + +void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out) { + struct streamcounter* cnt = malloc(sizeof(struct streamcounter)); + cnt->max = n_batches; + cnt->n = 0; + + out->get_next = &next_record; + out->get_schema = &stream_schema; + out->release = &release_stream; + out->private_data = cnt; +} diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go new file mode 100644 index 00000000000..0cf874ad285 --- /dev/null +++ b/go/arrow/cdata/cdata_test.go @@ -0,0 +1,613 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +package cdata + +import ( + "fmt" + "io" + "runtime" + "testing" + "time" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/decimal128" + "github.com/apache/arrow/go/arrow/memory" + "github.com/stretchr/testify/assert" +) + +func TestSchemaExport(t *testing.T) { + sc := exportInt32TypeSchema() + f, err := importSchema(&sc) + assert.NoError(t, err) + + keys, _ := getMetadataKeys() + vals, _ := getMetadataValues() + + assert.Equal(t, arrow.PrimitiveTypes.Int32, f.Type) + assert.Equal(t, keys, f.Metadata.Keys()) + assert.Equal(t, vals, f.Metadata.Values()) + + releaseSchema(&sc) + assert.Nil(t, sc.release) +} + +func TestSimpleArrayExport(t *testing.T) { + assert.False(t, test1IsReleased()) + + testarr := exportInt32Array() + arr, err := ImportCArrayWithType(&testarr, arrow.PrimitiveTypes.Int32) + assert.NoError(t, err) + + assert.False(t, test1IsReleased()) + assert.True(t, isReleased(&testarr)) + + arr.Release() + runtime.GC() + assert.Eventually(t, test1IsReleased, 1*time.Second, 10*time.Millisecond) +} + +func TestSimpleArrayAndSchema(t *testing.T) { + sc := exportInt32TypeSchema() + testarr := exportInt32Array() + + // grab address of the buffer we stuck into the ArrowArray object + buflist := (*[2]unsafe.Pointer)(unsafe.Pointer(testarr.buffers)) + origvals := (*[10]int32)(unsafe.Pointer(buflist[1])) + + fld, arr, err := ImportCArray(&testarr, &sc) + assert.NoError(t, err) + assert.Equal(t, arrow.PrimitiveTypes.Int32, fld.Type) + assert.EqualValues(t, 10, arr.Len()) + + // verify that the address is the same of the first integer for the + // slice that is being used by the array.Interface and the original buffer + vals := arr.(*array.Int32).Int32Values() + assert.Same(t, &vals[0], &origvals[0]) + + // and that the values are correct + for i, v := range vals { + assert.Equal(t, int32(i+1), v) + } +} + +func TestPrimitiveSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmt string + }{ + {arrow.PrimitiveTypes.Int8, "c"}, + {arrow.PrimitiveTypes.Int16, "s"}, + {arrow.PrimitiveTypes.Int32, "i"}, + {arrow.PrimitiveTypes.Int64, "l"}, + {arrow.PrimitiveTypes.Uint8, "C"}, + {arrow.PrimitiveTypes.Uint16, "S"}, + {arrow.PrimitiveTypes.Uint32, "I"}, + {arrow.PrimitiveTypes.Uint64, "L"}, + {arrow.FixedWidthTypes.Boolean, "b"}, + {arrow.Null, "n"}, + {arrow.FixedWidthTypes.Float16, "e"}, + {arrow.PrimitiveTypes.Float32, "f"}, + {arrow.PrimitiveTypes.Float64, "g"}, + {&arrow.FixedSizeBinaryType{ByteWidth: 3}, "w:3"}, + {arrow.BinaryTypes.Binary, "z"}, + {arrow.BinaryTypes.String, "u"}, + {&arrow.Decimal128Type{Precision: 16, Scale: 4}, "d:16,4"}, + {&arrow.Decimal128Type{Precision: 15, Scale: 0}, "d:15,0"}, + {&arrow.Decimal128Type{Precision: 15, Scale: -4}, "d:15,-4"}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testPrimitive(tt.fmt) + + f, err := ImportCArrowField(&sc) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + releaseSchema(&sc) + assert.True(t, schemaIsReleased(&sc)) + }) + } +} + +func TestImportTemporalSchema(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmt string + }{ + {arrow.FixedWidthTypes.Date32, "tdD"}, + {arrow.FixedWidthTypes.Date64, "tdm"}, + {arrow.FixedWidthTypes.Time32s, "tts"}, + {arrow.FixedWidthTypes.Time32ms, "ttm"}, + {arrow.FixedWidthTypes.Time64us, "ttu"}, + {arrow.FixedWidthTypes.Time64ns, "ttn"}, + {arrow.FixedWidthTypes.Duration_s, "tDs"}, + {arrow.FixedWidthTypes.Duration_ms, "tDm"}, + {arrow.FixedWidthTypes.Duration_us, "tDu"}, + {arrow.FixedWidthTypes.Duration_ns, "tDn"}, + {arrow.FixedWidthTypes.MonthInterval, "tiM"}, + {arrow.FixedWidthTypes.DayTimeInterval, "tiD"}, + {arrow.FixedWidthTypes.Timestamp_s, "tss:"}, + {&arrow.TimestampType{Unit: arrow.Second, TimeZone: "Europe/Paris"}, "tss:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_ms, "tsm:"}, + {&arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "Europe/Paris"}, "tsm:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_us, "tsu:"}, + {&arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "Europe/Paris"}, "tsu:Europe/Paris"}, + {arrow.FixedWidthTypes.Timestamp_ns, "tsn:"}, + {&arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "Europe/Paris"}, "tsn:Europe/Paris"}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testPrimitive(tt.fmt) + + f, err := ImportCArrowField(&sc) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + releaseSchema(&sc) + assert.True(t, schemaIsReleased(&sc)) + }) + } +} + +func TestListSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmts []string + names []string + }{ + {arrow.ListOf(arrow.PrimitiveTypes.Int8), []string{"+l", "c"}, []string{"", "item"}}, + {arrow.FixedSizeListOf(2, arrow.PrimitiveTypes.Int64), []string{"+w:2", "l"}, []string{"", "item"}}, + {arrow.ListOf(arrow.ListOf(arrow.PrimitiveTypes.Int32)), []string{"+l", "+l", "i"}, []string{"", "item", "item"}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testNested(tt.fmts, tt.names) + defer releaseNestedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + releaseSchema(top) + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestStructSchemas(t *testing.T) { + tests := []struct { + typ arrow.DataType + fmts []string + names []string + flags []int64 + }{ + {arrow.StructOf( + arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true, Metadata: metadata2}, + ), []string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{flagIsNullable, flagIsNullable, flagIsNullable}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testStruct(tt.fmts, tt.names, tt.flags) + defer releaseNestedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + releaseSchema(top) + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestMapSchemas(t *testing.T) { + tests := []struct { + typ *arrow.MapType + keysSorted bool + fmts []string + names []string + flags []int64 + }{ + {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), false, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable, 0, 0, flagIsNullable}}, + {arrow.MapOf(arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String), true, []string{"+m", "+s", "c", "u"}, []string{"", "entries", "key", "value"}, []int64{flagIsNullable | flagMapKeysSorted, 0, 0, flagIsNullable}}, + } + + for _, tt := range tests { + t.Run(tt.typ.Name(), func(t *testing.T) { + sc := testMap(tt.fmts, tt.names, tt.flags) + defer releaseNestedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + f, err := ImportCArrowField(top) + assert.NoError(t, err) + + tt.typ.KeysSorted = tt.keysSorted + assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) + + releaseSchema(top) + assert.True(t, schemaIsReleased(top)) + }) + } +} + +func TestSchema(t *testing.T) { + // schema is exported as an equivalent struct type (+ top-level metadata) + sc := arrow.NewSchema([]arrow.Field{ + {Name: "nulls", Type: arrow.Null, Nullable: false}, + {Name: "values", Type: arrow.PrimitiveTypes.Int64, Nullable: true, Metadata: metadata1}, + }, &metadata2) + + cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) + defer releaseNestedSchemas(cst) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] + out, err := ImportCArrowSchema(top) + assert.NoError(t, err) + + assert.True(t, sc.Equal(out)) + assert.True(t, sc.Metadata().Equal(out.Metadata())) + + releaseSchema(top) + assert.True(t, schemaIsReleased(top)) +} + +func createTestInt8Arr() array.Interface { + bld := array.NewInt8Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int8{1, 2, 0, -3}, []bool{true, true, false, true}) + return bld.NewInt8Array() +} + +func createTestInt16Arr() array.Interface { + bld := array.NewInt16Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int16{1, 2, -3}, []bool{true, true, true}) + return bld.NewInt16Array() +} + +func createTestInt32Arr() array.Interface { + bld := array.NewInt32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int32{1, 2, 0, -3}, []bool{true, true, false, true}) + return bld.NewInt32Array() +} + +func createTestInt64Arr() array.Interface { + bld := array.NewInt64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]int64{1, 2, -3}, []bool{true, true, true}) + return bld.NewInt64Array() +} + +func createTestUint8Arr() array.Interface { + bld := array.NewUint8Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint8{1, 2, 0, 3}, []bool{true, true, false, true}) + return bld.NewUint8Array() +} + +func createTestUint16Arr() array.Interface { + bld := array.NewUint16Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint16{1, 2, 3}, []bool{true, true, true}) + return bld.NewUint16Array() +} + +func createTestUint32Arr() array.Interface { + bld := array.NewUint32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint32{1, 2, 0, 3}, []bool{true, true, false, true}) + return bld.NewUint32Array() +} + +func createTestUint64Arr() array.Interface { + bld := array.NewUint64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]uint64{1, 2, 3}, []bool{true, true, true}) + return bld.NewUint64Array() +} + +func createTestBoolArr() array.Interface { + bld := array.NewBooleanBuilder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]bool{true, false, false}, []bool{true, true, false}) + return bld.NewBooleanArray() +} + +func createTestNullArr() array.Interface { + return array.NewNull(2) +} + +func createTestFloat32Arr() array.Interface { + bld := array.NewFloat32Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]float32{1.5, 0}, []bool{true, false}) + return bld.NewFloat32Array() +} + +func createTestFloat64Arr() array.Interface { + bld := array.NewFloat64Builder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]float64{1.5, 0}, []bool{true, false}) + return bld.NewFloat64Array() +} + +func createTestFSBArr() array.Interface { + bld := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 3}) + defer bld.Release() + + bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) + return bld.NewFixedSizeBinaryArray() +} + +func createTestBinaryArr() array.Interface { + bld := array.NewBinaryBuilder(memory.DefaultAllocator, arrow.BinaryTypes.Binary) + defer bld.Release() + + bld.AppendValues([][]byte{[]byte("foo"), []byte("bar"), nil}, []bool{true, true, false}) + return bld.NewBinaryArray() +} + +func createTestStrArr() array.Interface { + bld := array.NewStringBuilder(memory.DefaultAllocator) + defer bld.Release() + + bld.AppendValues([]string{"foo", "bar", ""}, []bool{true, true, false}) + return bld.NewStringArray() +} + +func createTestDecimalArr() array.Interface { + bld := array.NewDecimal128Builder(memory.DefaultAllocator, &arrow.Decimal128Type{Precision: 16, Scale: 4}) + defer bld.Release() + + bld.AppendValues([]decimal128.Num{decimal128.FromU64(12345670), decimal128.FromU64(0)}, []bool{true, false}) + return bld.NewDecimal128Array() +} + +func TestPrimitiveArrs(t *testing.T) { + tests := []struct { + name string + fn func() array.Interface + }{ + {"int8", createTestInt8Arr}, + {"uint8", createTestUint8Arr}, + {"int16", createTestInt16Arr}, + {"uint16", createTestUint16Arr}, + {"int32", createTestInt32Arr}, + {"uint32", createTestUint32Arr}, + {"int64", createTestInt64Arr}, + {"uint64", createTestUint64Arr}, + {"bool", createTestBoolArr}, + {"null", createTestNullArr}, + {"float32", createTestFloat32Arr}, + {"float64", createTestFloat64Arr}, + {"fixed size binary", createTestFSBArr}, + {"binary", createTestBinaryArr}, + {"utf8", createTestStrArr}, + {"decimal128", createTestDecimalArr}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + arr := tt.fn() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(arr, imported)) + assert.True(t, isReleased(carr)) + + imported.Release() + }) + } +} + +func TestPrimitiveSliced(t *testing.T) { + arr := createTestInt16Arr() + defer arr.Release() + + sl := array.NewSlice(arr, 1, 2) + defer sl.Release() + + carr := createCArr(sl) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(sl, imported)) + assert.True(t, array.ArraySliceEqual(arr, 1, 2, imported, 0, int64(imported.Len()))) + assert.True(t, isReleased(carr)) + + imported.Release() +} + +func createTestListArr() array.Interface { + bld := array.NewListBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8) + defer bld.Release() + + vb := bld.ValueBuilder().(*array.Int8Builder) + + bld.Append(true) + vb.AppendValues([]int8{1, 2}, []bool{true, true}) + + bld.Append(true) + vb.AppendValues([]int8{3, 0}, []bool{true, false}) + + bld.AppendNull() + + return bld.NewArray() +} + +func createTestFixedSizeList() array.Interface { + bld := array.NewFixedSizeListBuilder(memory.DefaultAllocator, 2, arrow.PrimitiveTypes.Int64) + defer bld.Release() + + vb := bld.ValueBuilder().(*array.Int64Builder) + + bld.Append(true) + vb.AppendValues([]int64{1, 2}, []bool{true, true}) + + bld.Append(true) + vb.AppendValues([]int64{3, 0}, []bool{true, false}) + + bld.AppendNull() + return bld.NewArray() +} + +func createTestStructArr() array.Interface { + bld := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( + arrow.Field{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true}, + arrow.Field{Name: "b", Type: arrow.BinaryTypes.String, Nullable: true}, + )) + defer bld.Release() + + f1bld := bld.FieldBuilder(0).(*array.Int8Builder) + f2bld := bld.FieldBuilder(1).(*array.StringBuilder) + + bld.Append(true) + f1bld.Append(1) + f2bld.Append("foo") + + bld.Append(true) + f1bld.Append(2) + f2bld.AppendNull() + + return bld.NewArray() +} + +func createTestMapArr() array.Interface { + bld := array.NewMapBuilder(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8, arrow.BinaryTypes.String, false) + defer bld.Release() + + kb := bld.KeyBuilder().(*array.Int8Builder) + vb := bld.ItemBuilder().(*array.StringBuilder) + + bld.Append(true) + kb.Append(1) + vb.Append("foo") + kb.Append(2) + vb.AppendNull() + + bld.Append(true) + kb.Append(3) + vb.Append("bar") + + return bld.NewArray() +} + +func TestNestedArrays(t *testing.T) { + tests := []struct { + name string + fn func() array.Interface + }{ + {"list", createTestListArr}, + {"fixed size list", createTestFixedSizeList}, + {"struct", createTestStructArr}, + {"map", createTestMapArr}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + arr := tt.fn() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + imported, err := ImportCArrayWithType(carr, arr.DataType()) + assert.NoError(t, err) + assert.True(t, array.ArrayEqual(arr, imported)) + assert.True(t, isReleased(carr)) + + imported.Release() + }) + } +} + +func TestRecordBatch(t *testing.T) { + arr := createTestStructArr() + defer arr.Release() + + carr := createCArr(arr) + defer freeTestArr(carr) + + sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) + defer releaseNestedSchemas(sc) + + top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] + rb, err := ImportCRecordBatch(carr, top) + assert.NoError(t, err) + defer rb.Release() + + assert.EqualValues(t, 2, rb.NumCols()) + rbschema := rb.Schema() + assert.Equal(t, "a", rbschema.Field(0).Name) + assert.Equal(t, "b", rbschema.Field(1).Name) + + rec := array.NewRecord(rbschema, []array.Interface{arr.(*array.Struct).Field(0), arr.(*array.Struct).Field(1)}, -1) + defer rec.Release() + + assert.True(t, array.RecordEqual(rb, rec)) +} + +func TestRecordReaderStream(t *testing.T) { + stream := arrayStreamTest() + defer releaseStream(stream) + + rdr := ImportCArrayStream(stream, nil) + for { + rec, err := rdr.Read() + if err != nil { + if err == io.EOF { + break + } + assert.NoError(t, err) + } + defer rec.Release() + + fmt.Println(rec) + } +} diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go new file mode 100644 index 00000000000..226b2ef49c8 --- /dev/null +++ b/go/arrow/cdata/cdata_test_framework.go @@ -0,0 +1,246 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cdata + +// #include +// #include +// #include "arrow/c/abi.h" +// #include "arrow/c/helpers.h" +// +// void setup_array_stream_test(const int n_batches, struct ArrowArrayStream* out); +// struct ArrowArray* get_test_arr() { return (struct ArrowArray*)(malloc(sizeof(struct ArrowArray))); } +// struct ArrowArrayStream* get_test_stream() { return (struct ArrowArrayStream*)malloc(sizeof(struct ArrowArrayStream)); } +// +// void release_test_arr(struct ArrowArray* arr) { +// for (int i = 0; i < arr->n_buffers; ++i) { +// free((void*)arr->buffers[i]); +// } +// ArrowArrayMarkReleased(arr); +// } +// +// int32_t* get_data() { +// int32_t* data = malloc(sizeof(int32_t)*10); +// for (int i = 0; i < 10; ++i) { data[i] = i+1; } +// return data; +// } +// void export_int32_type(struct ArrowSchema* schema); +// void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); +// int test1_is_released(); +// void test_primitive(struct ArrowSchema* schema, const char* fmt); +// void free_top_schema(struct ArrowSchema** top); +// struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n); +// struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); +// struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); +// struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n); +import "C" +import ( + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" +) + +const ( + flagIsNullable = C.ARROW_FLAG_NULLABLE + flagMapKeysSorted = C.ARROW_FLAG_MAP_KEYS_SORTED +) + +var ( + metadata1 = arrow.NewMetadata([]string{"key1", "key2"}, []string{"", "bar"}) + metadata2 = arrow.NewMetadata([]string{"key"}, []string{"abcde"}) +) + +func exportInt32TypeSchema() CArrowSchema { + var s CArrowSchema + C.export_int32_type(&s) + return s +} + +func releaseStream(s *CArrowArrayStream) { + C.ArrowArrayStreamRelease(s) +} + +func releaseSchema(s *CArrowSchema) { + C.ArrowSchemaRelease(s) +} + +func schemaIsReleased(s *CArrowSchema) bool { + return C.ArrowSchemaIsReleased(s) == 1 +} + +func getMetadataKeys() ([]string, []string) { + return []string{"key1", "key2"}, []string{"key"} +} + +func getMetadataValues() ([]string, []string) { + return []string{"", "bar"}, []string{"abcde"} +} + +func exportInt32Array() CArrowArray { + var arr CArrowArray + C.export_int32_array(C.get_data(), C.int64_t(10), &arr) + return arr +} + +func isReleased(arr *CArrowArray) bool { + return C.ArrowArrayIsReleased(arr) == 1 +} + +func test1IsReleased() bool { + return C.test1_is_released() == 1 +} + +func testPrimitive(fmtstr string) CArrowSchema { + var s CArrowSchema + fmt := C.CString(fmtstr) + C.test_primitive(&s, fmt) + return s +} + +func releaseNestedSchemas(top **CArrowSchema) { + C.free_top_schema(top) +} + +func testNested(fmts, names []string) **CArrowSchema { + if len(fmts) != len(names) { + panic("testing nested lists must have same size fmts and names") + } + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + } + + return C.test_lists((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), C.int(len(fmts))) +} + +func testStruct(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing structs must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_struct((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func testMap(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing maps must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_map((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func testSchema(fmts, names []string, flags []int64) **CArrowSchema { + if len(fmts) != len(names) || len(names) != len(flags) { + panic("testing structs must all have the same size slices in args") + } + + cfmts := make([]*C.char, len(fmts)) + cnames := make([]*C.char, len(names)) + cflags := make([]C.int64_t, len(flags)) + + for i := range fmts { + cfmts[i] = C.CString(fmts[i]) + cnames[i] = C.CString(names[i]) + cflags[i] = C.int64_t(flags[i]) + } + + return C.test_schema((**C.char)(unsafe.Pointer(&cfmts[0])), (**C.char)(unsafe.Pointer(&cnames[0])), (*C.int64_t)(unsafe.Pointer(&cflags[0])), C.int(len(fmts))) +} + +func freeTestArr(carr *CArrowArray) { + C.free(unsafe.Pointer(carr)) +} + +func createCArr(arr array.Interface) *CArrowArray { + var ( + carr = C.get_test_arr() + children = (**CArrowArray)(nil) + nchildren = C.int64_t(0) + ) + + switch arr := arr.(type) { + case *array.List: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + case *array.FixedSizeList: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + case *array.Struct: + clist := []*CArrowArray{} + for i := 0; i < arr.NumField(); i++ { + clist = append(clist, createCArr(arr.Field(i))) + nchildren += 1 + } + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + case *array.Map: + clist := []*CArrowArray{createCArr(arr.ListValues())} + children = (**CArrowArray)(unsafe.Pointer(&clist[0])) + nchildren += 1 + } + + carr.children = children + carr.n_children = nchildren + carr.dictionary = nil + carr.length = C.int64_t(arr.Len()) + carr.null_count = C.int64_t(arr.NullN()) + carr.offset = C.int64_t(arr.Data().Offset()) + buffers := arr.Data().Buffers() + cbuf := []unsafe.Pointer{} + for _, b := range buffers { + if b != nil { + cbuf = append(cbuf, C.CBytes(b.Bytes())) + } + } + carr.n_buffers = C.int64_t(len(cbuf)) + if len(cbuf) > 0 { + carr.buffers = &cbuf[0] + } + carr.release = (*[0]byte)(C.release_test_arr) + + return carr +} + +func arrayStreamTest() *CArrowArrayStream { + st := C.get_test_stream() + C.setup_array_stream_test(2, st) + return st +} diff --git a/go/arrow/cdata/cdata_testframework.c b/go/arrow/cdata/cdata_testframework.c new file mode 100644 index 00000000000..670e5719a07 --- /dev/null +++ b/go/arrow/cdata/cdata_testframework.c @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + +#include +#include +#include +#include +#include "arrow/c/abi.h" +#include "utils.h" + +static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; + +static void release_int32_type(struct ArrowSchema* schema) { + // mark released + schema->release = NULL; +} + +void export_int32_type(struct ArrowSchema* schema) { + const char* encoded_metadata; + if (check_for_endianness() == 1) { + encoded_metadata = kEncodedMeta1LE; + } else { + encoded_metadata = kEncodedMeta1BE; + } + *schema = (struct ArrowSchema) { + // Type description + .format = "i", + .name = "", + .metadata = encoded_metadata, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_type, + }; +} + +static bool test1_released = false; + +int test1_is_released() { return test1_released; } + +static void release_int32_array(struct ArrowArray* array) { + assert(array->n_buffers == 2); + // free the buffers and buffers array + free((void *) array->buffers[1]); + free(array->buffers); + // mark released + array->release = NULL; + test1_released = true; +} + +void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { + // initialize primitive fields + *array = (struct ArrowArray) { + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_array + }; + + // allocate list of buffers + array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); + assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; +} diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index 46537ff037a..96e66e9ebb5 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -25,10 +25,22 @@ import ( "golang.org/x/xerrors" ) +// ImportCArrowField takes in an ArrowSchema from the C Data interface, it +// will copy the metadata and type definitions rather than keep direct references +// to them. It is safe to call C.ArrowSchemaRelease after receiving the field +// from this function. +func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { + return importSchema(out) +} + // ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it // will copy the metadata and schema definitions over from the C object rather // than keep direct references to them. It is safe to call C.ArrowSchemaRelease // after receiving the schema from this function. +// +// This version is intended to take in a schema for a record batch, which means +// that the top level of the schema should be a struct of the schema fields. If +// importing a single array's schema, then use ImportCArrowField instead. func ImportCArrowSchema(out *CArrowSchema) (*arrow.Schema, error) { ret, err := importSchema(out) if err != nil { @@ -55,7 +67,7 @@ func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (array.Interface, if err != nil { return nil, err } - + defer imp.data.Release() return array.MakeFromData(imp.data), nil } diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h new file mode 100644 index 00000000000..d3c10f88b7b --- /dev/null +++ b/go/arrow/cdata/utils.h @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cgo + + +/* + Function check_for_endianness() returns 1, if architecture + is little endian, 0 in case of big endian. +*/ +inline int check_for_endianness() +{ + unsigned int x = 1; + char *c = (char*) &x; + return (int)*c; +} + +// metadata keys 1: {"key1", "key2"} +// metadata values 1: {"", "bar"} +static const char kEncodedMeta1LE[] = { + 2, 0, 0, 0, + 4, 0, 0, 0, 'k', 'e', 'y', '1', 0, 0, 0, 0, + 4, 0, 0, 0, 'k', 'e', 'y', '2', 3, 0, 0, 0, 'b', 'a', 'r'}; + +static const char kEncodedMeta1BE[] = { + 0, 0, 0, 2, + 0, 0, 0, 4, 'k', 'e', 'y', '1', 0, 0, 0, 0, + 0, 0, 0, 4, 'k', 'e', 'y', '2', 0, 0, 0, 3, 'b', 'a', 'r'}; + +static const char* kMetadataKeys2[] = {"key"}; +static const char* kMetadataValues2[] = {"abcde"}; + +// metadata keys 2: {"key"} +// metadata values 2: {"abcde"} +static const char kEncodedMeta2LE[] = { + 1, 0, 0, 0, + 3, 0, 0, 0, 'k', 'e', 'y', 5, 0, 0, 0, 'a', 'b', 'c', 'd', 'e'}; + +static const char kEncodedMeta2BE[] = { + 0, 0, 0, 1, + 0, 0, 0, 3, 'k', 'e', 'y', 0, 0, 0, 5, 'a', 'b', 'c', 'd', 'e'}; + + diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go index 79bb878f36a..c90ee686b2e 100644 --- a/go/arrow/datatype_fixedwidth.go +++ b/go/arrow/datatype_fixedwidth.go @@ -132,7 +132,7 @@ type Decimal128Type struct { func (*Decimal128Type) ID() Type { return DECIMAL } func (*Decimal128Type) Name() string { return "decimal" } -func (*Decimal128Type) BitWidth() int { return 16 } +func (*Decimal128Type) BitWidth() int { return 128 } func (t *Decimal128Type) String() string { return fmt.Sprintf("%s(%d, %d)", t.Name(), t.Precision, t.Scale) } From 66419871401bb1dcb4ab6f6b62c75ba3442a1b2d Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Tue, 31 Aug 2021 09:46:38 -0400 Subject: [PATCH 05/20] use "test" tag to exclude the .c files from non-test builds --- go/arrow/cdata/cdata_fulltest.c | 1 + go/arrow/cdata/cdata_test.go | 18 ++++++++++++++++-- go/arrow/cdata/cdata_test_framework.go | 2 ++ go/arrow/cdata/cdata_testframework.c | 1 + go/arrow/cdata/utils.h | 2 +- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c index f2a0da70c54..929416218cf 100644 --- a/go/arrow/cdata/cdata_fulltest.c +++ b/go/arrow/cdata/cdata_fulltest.c @@ -15,6 +15,7 @@ // limitations under the License. // +build cgo +// +build test #include #include diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index 0cf874ad285..dbb6a6f5eb3 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -15,11 +15,15 @@ // limitations under the License. // +build cgo +// +build test + +// use test tag so that we only run these tests when the "test" tag is present +// so that the .c and other framework infrastructure is only compiled in during +// testing, and the .c files and symbols are not present in release builds. package cdata import ( - "fmt" "io" "runtime" "testing" @@ -598,6 +602,7 @@ func TestRecordReaderStream(t *testing.T) { defer releaseStream(stream) rdr := ImportCArrayStream(stream, nil) + i := 0 for { rec, err := rdr.Read() if err != nil { @@ -608,6 +613,15 @@ func TestRecordReaderStream(t *testing.T) { } defer rec.Release() - fmt.Println(rec) + assert.EqualValues(t, 2, rec.NumCols()) + assert.Equal(t, "a", rec.ColumnName(0)) + assert.Equal(t, "b", rec.ColumnName(1)) + i++ + for j := 0; j < int(rec.NumRows()); j++ { + assert.Equal(t, int32((j+1)*i), rec.Column(0).(*array.Int32).Value(j)) + } + assert.Equal(t, "foo", rec.Column(1).(*array.String).Value(0)) + assert.Equal(t, "bar", rec.Column(1).(*array.String).Value(1)) + assert.Equal(t, "baz", rec.Column(1).(*array.String).Value(2)) } } diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index 226b2ef49c8..c1940d66db2 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -14,6 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +// +build test + package cdata // #include diff --git a/go/arrow/cdata/cdata_testframework.c b/go/arrow/cdata/cdata_testframework.c index 670e5719a07..05aa5cfc5f4 100644 --- a/go/arrow/cdata/cdata_testframework.c +++ b/go/arrow/cdata/cdata_testframework.c @@ -15,6 +15,7 @@ // limitations under the License. // +build cgo +// +build test #include #include diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h index d3c10f88b7b..d63d80953b6 100644 --- a/go/arrow/cdata/utils.h +++ b/go/arrow/cdata/utils.h @@ -15,7 +15,7 @@ // limitations under the License. // +build cgo - +// +build test /* Function check_for_endianness() returns 1, if architecture From 30f4e121c254cc1482a0b6c135b4eb838b3c29d3 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Tue, 31 Aug 2021 09:50:45 -0400 Subject: [PATCH 06/20] update status --- docs/source/status.rst | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/source/status.rst b/docs/source/status.rst index 48084187ef9..037e74fb217 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -177,18 +177,18 @@ Notes: C Data Interface ================ -+-----------------------------+-------+--------+-------+-------+ -| Feature | C++ | Python | R | Rust | -| | | | | | -+=============================+=======+========+=======+=======+ -| Schema export | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Array export | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Schema import | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ -| Array import | ✓ | ✓ | ✓ | ✓ | -+-----------------------------+-------+--------+-------+-------+ ++-----------------------------+-------+--------+-------+-------+----+ +| Feature | C++ | Python | R | Rust | Go | +| | | | | | | ++=============================+=======+========+=======+=======+====+ +| Schema export | ✓ | ✓ | ✓ | ✓ | | ++-----------------------------+-------+--------+-------+-------+----+ +| Array export | ✓ | ✓ | ✓ | ✓ | | ++-----------------------------+-------+--------+-------+-------+----+ +| Schema import | ✓ | ✓ | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+-------+-------+-----+ +| Array import | ✓ | ✓ | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+-------+-------+-----+ .. seealso:: The :ref:`C Data Interface ` specification. @@ -197,14 +197,14 @@ C Data Interface C Stream Interface (experimental) ================================= -+-----------------------------+-------+--------+ -| Feature | C++ | Python | -| | | | -+=============================+=======+========+ -| Stream export | ✓ | ✓ | -+-----------------------------+-------+--------+ -| Stream import | ✓ | ✓ | -+-----------------------------+-------+--------+ ++-----------------------------+-------+--------+----+ +| Feature | C++ | Python | Go | +| | | | | ++=============================+=======+========+====+ +| Stream export | ✓ | ✓ | | ++-----------------------------+-------+--------+----+ +| Stream import | ✓ | ✓ | ✓ | ++-----------------------------+-------+--------+----+ .. seealso:: The :ref:`C Stream Interface ` specification. From 1f5bb6b243a3358685c17dde91aea37c4d78da99 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Tue, 31 Aug 2021 09:52:59 -0400 Subject: [PATCH 07/20] decimal128 bitwidth should be 128 --- go/arrow/datatype_fixedwidth_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go index 3f5153c5024..3349703b40b 100644 --- a/go/arrow/datatype_fixedwidth_test.go +++ b/go/arrow/datatype_fixedwidth_test.go @@ -53,7 +53,7 @@ func TestDecimal128Type(t *testing.T) { } { t.Run(tc.want, func(t *testing.T) { dt := arrow.Decimal128Type{Precision: tc.precision, Scale: tc.scale} - if got, want := dt.BitWidth(), 16; got != want { + if got, want := dt.BitWidth(), 128; got != want { t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) } @@ -80,7 +80,7 @@ func TestFixedSizeBinaryType(t *testing.T) { } { t.Run(tc.want, func(t *testing.T) { dt := arrow.FixedSizeBinaryType{tc.byteWidth} - if got, want := dt.BitWidth(), 8 * tc.byteWidth; got != want { + if got, want := dt.BitWidth(), 8*tc.byteWidth; got != want { t.Fatalf("invalid bitwidth: got=%d, want=%d", got, want) } @@ -101,9 +101,9 @@ func TestFixedSizeBinaryType(t *testing.T) { func TestTimestampType(t *testing.T) { for _, tc := range []struct { - unit arrow.TimeUnit - timeZone string - want string + unit arrow.TimeUnit + timeZone string + want string }{ {arrow.Nanosecond, "CST", "timestamp[ns, tz=CST]"}, {arrow.Microsecond, "EST", "timestamp[us, tz=EST]"}, From 4d7e8ad35df975ec5b94a6840fab637727e34ec7 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Tue, 31 Aug 2021 10:00:06 -0400 Subject: [PATCH 08/20] add "test" tag to running the go tests --- ci/scripts/go_test.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh index 7dd873df3e1..1a47a71da1f 100755 --- a/ci/scripts/go_test.sh +++ b/ci/scripts/go_test.sh @@ -23,8 +23,12 @@ source_dir=${1}/go pushd ${source_dir}/arrow +# the cgo implementation of the c data interface requires the "test" +# tag in order to run its tests so that the testing functions implemented +# in .c files don't get included in non-test builds. + for d in $(go list ./... | grep -v vendor); do - go test $d + go test -tags "test" $d done popd From fd45aabfb0c8dd3a01d886850e75725dd12d2e79 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Thu, 2 Sep 2021 16:35:35 -0400 Subject: [PATCH 09/20] add go-cgo-python test --- .github/workflows/go.yml | 33 ++++ ci/docker/debian-10-go-cgo-python.dockerfile | 42 ++++++ ci/docker/debian-10-go.dockerfile | 3 +- ci/scripts/go_cgo_python_test.sh | 45 ++++++ dev/release/rat_exclude_files.txt | 1 + docker-compose.yml | 21 ++- go/arrow/cdata/test/go.mod | 23 +++ go/arrow/cdata/test/go.sum | 150 +++++++++++++++++++ go/arrow/cdata/test/test_cimport.go | 88 +++++++++++ go/arrow/cdata/test/test_export_to_cgo.py | 59 ++++++++ 10 files changed, 463 insertions(+), 2 deletions(-) create mode 100644 ci/docker/debian-10-go-cgo-python.dockerfile create mode 100644 ci/scripts/go_cgo_python_test.sh create mode 100644 go/arrow/cdata/test/go.mod create mode 100644 go/arrow/cdata/test/go.sum create mode 100644 go/arrow/cdata/test/test_cimport.go create mode 100644 go/arrow/cdata/test/test_export_to_cgo.py diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 3c9100c20b7..e696de98871 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -75,6 +75,39 @@ jobs: continue-on-error: true run: archery docker push debian-go + docker_cgo_python: + name: AMD64 Debian 10 GO ${{ matrix.go }} - CGO Python + runs-on: ubuntu-latest + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + go: [1.15] + env: + GO: ${{ matrix.go }} + steps: + - name: Checkout Arrow + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Fetch Submodules and Tags + run: ci/scripts/util_checkout.sh + - name: Free Up Disk Space + run: ci/scripts/util_cleanup.sh + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Setup Archery + run: pip install -e dev/archery[docker] + - name: Execute Docker Build + run: archery docker run debian-go-cgo-python + - name: Docker Push + if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' + continue-on-error: true + run: archery docker push debian-go-cgo-python + windows: name: AMD64 Windows 2019 Go ${{ matrix.go }} runs-on: windows-latest diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile new file mode 100644 index 00000000000..35d0f856121 --- /dev/null +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ENV DEBIAN_FRONTEND noninteractive + +# install libarrow-dev and libarrow-python-dev so we can use pyarrow to +# test C Data Interface consumption +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \ + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + python3 \ + python3-pip \ + python3-dev \ + cmake \ + libarrow-dev \ + libarrow-python-dev && \ + apt-get clean + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +RUN pip install pyarrow diff --git a/ci/docker/debian-10-go.dockerfile b/ci/docker/debian-10-go.dockerfile index 199f09e24fc..3a24b8afee6 100644 --- a/ci/docker/debian-10-go.dockerfile +++ b/ci/docker/debian-10-go.dockerfile @@ -17,7 +17,8 @@ ARG arch=amd64 ARG go=1.15 -FROM ${arch}/golang:${go} +FROM ${arch}/golang:${go}-buster + # TODO(kszucs): # 1. add the files required to install the dependencies to .dockerignore diff --git a/ci/scripts/go_cgo_python_test.sh b/ci/scripts/go_cgo_python_test.sh new file mode 100644 index 00000000000..564b5e3f416 --- /dev/null +++ b/ci/scripts/go_cgo_python_test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow/cdata/test + +case "$(uname)" in + Linux) + testlib="cgotest.so" + ;; + Darwin) + testlib="cgotest.so" + ;; + MINGW*) + testlib="cgotest.dll" + ;; +esac + +go build -tags cdata_test -buildmode=c-shared -o $testlib . + +python test_export_to_cgo.py + +rm $testlib +rm "${testlib%.*}.h" + +popd diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index e2aa6285ea5..b07f9623e52 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -129,6 +129,7 @@ go/arrow/flight/Flight.pb.go go/arrow/flight/Flight_grpc.pb.go go/arrow/internal/cpu/* go/arrow/type_string.go +go/arrow/cdata/test/go.sum go/*.tmpldata go/*.s go/parquet/go.sum diff --git a/docker-compose.yml b/docker-compose.yml index cdbba66aebc..8541edbb545 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -112,7 +112,8 @@ x-hierarchy: - debian-c-glib: - debian-ruby - debian-python - - debian-go + - debian-go: + - debian-go-cgo-python - debian-java: - debian-java-jni - debian-js @@ -1196,6 +1197,24 @@ services: /arrow/ci/scripts/go_build.sh /arrow && /arrow/ci/scripts/go_test.sh /arrow" + debian-go-cgo-python: + # Usage: + # docker-compose build debian-go-cgo-python + # docker-compose run debian-go-cgo-python + image: ${REPO}:${ARCH}-debian-10-go-${GO}-cgo-python + build: + context: . + dockerfile: ci/docker/debian-10-go-cgo-python.dockerfile + cache_from: + - ${REPO}:${ARCH}-debian-10-go-${GO}-cgo-python + args: + base: ${REPO}:${ARCH}-debian-10-go-${GO} + shm_size: *shm-size + volumes: *debian-volumes + command: &go-cgo-python-command > + /bin/bash -c " + /arrow/ci/scripts/go_cgo_python_test.sh /arrow" + ############################# JavaScript #################################### debian-js: diff --git a/go/arrow/cdata/test/go.mod b/go/arrow/cdata/test/go.mod new file mode 100644 index 00000000000..153b1272db5 --- /dev/null +++ b/go/arrow/cdata/test/go.mod @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +module cdatatest + +go 1.15 + +replace github.com/apache/arrow/go/arrow => ../../ + +require github.com/apache/arrow/go/arrow v0.0.0-00010101000000-000000000000 diff --git a/go/arrow/cdata/test/go.sum b/go/arrow/cdata/test/go.sum new file mode 100644 index 00000000000..9cbf776cb2c --- /dev/null +++ b/go/arrow/cdata/test/go.sum @@ -0,0 +1,150 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v2.0.0+incompatible h1:dicJ2oXwypfwUGnB2/TYWYEKiuk9eYQlQO/AnOHl5mI= +github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= +github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= +github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4= +github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= +google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go new file mode 100644 index 00000000000..d133e8964b1 --- /dev/null +++ b/go/arrow/cdata/test/test_cimport.go @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build cdata_test + +package main + +import ( + "fmt" + "unsafe" + + "github.com/apache/arrow/go/arrow" + "github.com/apache/arrow/go/arrow/array" + "github.com/apache/arrow/go/arrow/cdata" + "github.com/apache/arrow/go/arrow/memory" +) + +// #include +import "C" + +//export importSchema +func importSchema(ptr uintptr) { + sc := (*cdata.CArrowSchema)(unsafe.Pointer(ptr)) + + schema, err := cdata.ImportCArrowSchema(sc) + if err != nil { + panic(err) + } + + expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) + expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) + if !schema.Equal(expectedSchema) { + panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) + } + + fmt.Println("schema matches! Huzzah!") +} + +//export importRecordBatch +func importRecordBatch(scptr, rbptr uintptr) { + sc := (*cdata.CArrowSchema)(unsafe.Pointer(scptr)) + rb := (*cdata.CArrowArray)(unsafe.Pointer(rbptr)) + + rec, err := cdata.ImportCRecordBatch(rb, sc) + if err != nil { + panic(err) + } + defer rec.Release() + + expectedMetadata := arrow.NewMetadata([]string{"key1"}, []string{"value1"}) + expectedSchema := arrow.NewSchema([]arrow.Field{{Name: "ints", Type: arrow.ListOf(arrow.PrimitiveTypes.Int32), Nullable: true}}, &expectedMetadata) + + bldr := array.NewRecordBuilder(memory.DefaultAllocator, expectedSchema) + defer bldr.Release() + + lb := bldr.Field(0).(*array.ListBuilder) + vb := lb.ValueBuilder().(*array.Int32Builder) + + lb.Append(true) + vb.Append(int32(1)) + + lb.Append(true) + vb.AppendValues([]int32{2, 42}, nil) + + expectedRec := bldr.NewRecord() + defer expectedRec.Release() + + if !array.RecordEqual(expectedRec, rec) { + panic(fmt.Sprintf("records didn't match: expected %s\n got %s", expectedRec, rec)) + } + + fmt.Println("record batch matches huzzah!") +} + +func main() {} diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py new file mode 100644 index 00000000000..57810b83655 --- /dev/null +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import pyarrow as pa +from pyarrow.cffi import ffi +# from cffi import FFI + +def make_schema(): + return pa.schema([('ints', pa.list_(pa.int32()))], + metadata={b'key1': b'value1'}) + +def make_batch(): + return pa.record_batch([[[1], [2, 42]]], make_schema()) + + +def load_lib(): + libext = 'so' + if os.name == 'nt': + libext = 'dll' + + ffi.cdef( + """ + void importSchema(uintptr_t ptr); + void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); + """) + return ffi.dlopen('cgotest.{}'.format(libext)) + + +c_schema = ffi.new("struct ArrowSchema*") +ptr_schema = int(ffi.cast("uintptr_t", c_schema)) +make_schema()._export_to_c(ptr_schema) + +cgotest = load_lib() +cgotest.importSchema(ptr_schema) + +c_array = ffi.new("struct ArrowArray*") +ptr_array = int(ffi.cast("uintptr_t", c_array)) + +batch = make_batch() +batch._export_to_c(ptr_array) + +cgotest.importRecordBatch(ptr_schema, ptr_array) From b43572d2f1d1ba1fa3ae1c5a242353243d8de882 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Thu, 2 Sep 2021 16:44:28 -0400 Subject: [PATCH 10/20] make cgo python test executable --- ci/scripts/go_cgo_python_test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/scripts/go_cgo_python_test.sh diff --git a/ci/scripts/go_cgo_python_test.sh b/ci/scripts/go_cgo_python_test.sh old mode 100644 new mode 100755 From 55c3edb20fc65622434549ff5da108225b882943 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Thu, 2 Sep 2021 16:53:28 -0400 Subject: [PATCH 11/20] need cffi for the script --- ci/docker/debian-10-go-cgo-python.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile index 35d0f856121..045acf1c350 100644 --- a/ci/docker/debian-10-go-cgo-python.dockerfile +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -39,4 +39,4 @@ RUN apt-get update -y -q && \ RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ ln -s /usr/bin/pip3 /usr/local/bin/pip -RUN pip install pyarrow +RUN pip install pyarrow cffi From be601884044384ac9e922f02408ae4a2d0d54d41 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Thu, 2 Sep 2021 17:09:27 -0400 Subject: [PATCH 12/20] use relative path for cgotest.so --- go/arrow/cdata/test/test_export_to_cgo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py index 57810b83655..e3509458cb7 100644 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -20,7 +20,6 @@ import os import pyarrow as pa from pyarrow.cffi import ffi -# from cffi import FFI def make_schema(): return pa.schema([('ints', pa.list_(pa.int32()))], @@ -40,7 +39,7 @@ def load_lib(): void importSchema(uintptr_t ptr); void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); """) - return ffi.dlopen('cgotest.{}'.format(libext)) + return ffi.dlopen('./cgotest.{}'.format(libext)) c_schema = ffi.new("struct ArrowSchema*") From 60a924d48f274544a4092c1483be11b148932745 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Sep 2021 16:53:25 +0200 Subject: [PATCH 13/20] Install PyArrow binary wheel instead of rebuilding it --- ci/docker/debian-10-go-cgo-python.dockerfile | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile index 045acf1c350..23c669a56ab 100644 --- a/ci/docker/debian-10-go-cgo-python.dockerfile +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -29,14 +29,13 @@ RUN apt-get update -y -q && \ apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ python3 \ - python3-pip \ - python3-dev \ - cmake \ - libarrow-dev \ - libarrow-python-dev && \ + python3-pip && \ apt-get clean RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ ln -s /usr/bin/pip3 /usr/local/bin/pip -RUN pip install pyarrow cffi +# Need a newer pip than Debian's to install manylinux201x wheels +RUN pip install -U pip + +RUN pip install pyarrow cffi --only-binary pyarrow From ad1cfd19d898ea3ca47369b0ce88c3651af56891 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Sep 2021 16:53:51 +0200 Subject: [PATCH 14/20] Migrate Python test to unittest; make it stricter --- go/arrow/cdata/test/test_cimport.go | 7 +++ go/arrow/cdata/test/test_export_to_cgo.py | 70 +++++++++++++++++------ 2 files changed, 58 insertions(+), 19 deletions(-) diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go index d133e8964b1..77be5d6cbc5 100644 --- a/go/arrow/cdata/test/test_cimport.go +++ b/go/arrow/cdata/test/test_cimport.go @@ -45,6 +45,9 @@ func importSchema(ptr uintptr) { if !schema.Equal(expectedSchema) { panic(fmt.Sprintf("schema didn't match: expected %s, got %s", expectedSchema, schema)) } + if !schema.Metadata().Equal(expectedMetadata) { + panic(fmt.Sprintf("metadata didn't match: expected %s, got %s", expectedMetadata, schema.Metadata())) + } fmt.Println("schema matches! Huzzah!") } @@ -69,9 +72,13 @@ func importRecordBatch(scptr, rbptr uintptr) { lb := bldr.Field(0).(*array.ListBuilder) vb := lb.ValueBuilder().(*array.Int32Builder) + // [[[1], [], None [2, 42]]] lb.Append(true) vb.Append(int32(1)) + lb.Append(true) + lb.Append(false) + lb.Append(true) vb.AppendValues([]int32{2, 42}, nil) diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py index e3509458cb7..5c29e5efb83 100644 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -17,19 +17,17 @@ # specific language governing permissions and limitations # under the License. +import contextlib +import gc import os +import unittest + import pyarrow as pa from pyarrow.cffi import ffi -def make_schema(): - return pa.schema([('ints', pa.list_(pa.int32()))], - metadata={b'key1': b'value1'}) - -def make_batch(): - return pa.record_batch([[[1], [2, 42]]], make_schema()) - -def load_lib(): +def load_cgotest(): + # XXX what about Darwin? libext = 'so' if os.name == 'nt': libext = 'dll' @@ -39,20 +37,54 @@ def load_lib(): void importSchema(uintptr_t ptr); void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); """) - return ffi.dlopen('./cgotest.{}'.format(libext)) + return ffi.dlopen(f'./cgotest.{libext}') + + +cgotest = load_cgotest() + + +class TestPythonToGo(unittest.TestCase): + + def make_schema(self): + return pa.schema([('ints', pa.list_(pa.int32()))], + metadata={b'key1': b'value1'}) + + def make_batch(self): + return pa.record_batch([[[1], [], None, [2, 42]]], + self.make_schema()) + + @contextlib.contextmanager + def assert_pyarrow_memory_released(self): + gc.collect() + old_allocated = pa.total_allocated_bytes() + yield + gc.collect() + diff = pa.total_allocated_bytes() - old_allocated + self.assertEqual( + pa.total_allocated_bytes(), old_allocated, + f"PyArrow memory was not adequately released: {diff} bytes lost") + def test_schema(self): + c_schema = ffi.new("struct ArrowSchema*") + ptr_schema = int(ffi.cast("uintptr_t", c_schema)) -c_schema = ffi.new("struct ArrowSchema*") -ptr_schema = int(ffi.cast("uintptr_t", c_schema)) -make_schema()._export_to_c(ptr_schema) + with self.assert_pyarrow_memory_released(): + self.make_schema()._export_to_c(ptr_schema) + # Will panic if expectations are not met + cgotest.importSchema(ptr_schema) -cgotest = load_lib() -cgotest.importSchema(ptr_schema) + def test_record_batch(self): + c_schema = ffi.new("struct ArrowSchema*") + ptr_schema = int(ffi.cast("uintptr_t", c_schema)) + c_array = ffi.new("struct ArrowArray*") + ptr_array = int(ffi.cast("uintptr_t", c_array)) -c_array = ffi.new("struct ArrowArray*") -ptr_array = int(ffi.cast("uintptr_t", c_array)) + with self.assert_pyarrow_memory_released(): + self.make_schema()._export_to_c(ptr_schema) + self.make_batch()._export_to_c(ptr_array) + # Will panic if expectations are not met + cgotest.importRecordBatch(ptr_schema, ptr_array) -batch = make_batch() -batch._export_to_c(ptr_array) -cgotest.importRecordBatch(ptr_schema, ptr_array) +if __name__ == '__main__': + unittest.main(verbosity=2) From 4ec5dd6584730208a67fd8ca0e785c7f6456cdb6 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Sep 2021 17:22:46 +0200 Subject: [PATCH 15/20] Fix schema leak, try to ensure Go finalizers are run --- go/arrow/cdata/cdata.go | 3 ++ go/arrow/cdata/test/test_cimport.go | 6 ++++ go/arrow/cdata/test/test_export_to_cgo.py | 35 +++++++++++++---------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index 5ae79590618..7cda454fc18 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -140,6 +140,9 @@ func decodeCMetadata(md *C.char) arrow.Metadata { // convert a C.ArrowSchema to an arrow.Field to maintain metadata with the schema func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) { + // always release, even on error + defer C.ArrowSchemaRelease(schema) + var childFields []arrow.Field if schema.n_children > 0 { // call ourselves recursively if there are children. diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go index 77be5d6cbc5..0fc5f86ea6f 100644 --- a/go/arrow/cdata/test/test_cimport.go +++ b/go/arrow/cdata/test/test_cimport.go @@ -20,6 +20,7 @@ package main import ( "fmt" + "runtime" "unsafe" "github.com/apache/arrow/go/arrow" @@ -31,6 +32,11 @@ import ( // #include import "C" +//export runGC +func runGC() { + runtime.GC() +} + //export importSchema func importSchema(ptr uintptr) { sc := (*cdata.CArrowSchema)(unsafe.Pointer(ptr)) diff --git a/go/arrow/cdata/test/test_export_to_cgo.py b/go/arrow/cdata/test/test_export_to_cgo.py index 5c29e5efb83..118aebf035a 100644 --- a/go/arrow/cdata/test/test_export_to_cgo.py +++ b/go/arrow/cdata/test/test_export_to_cgo.py @@ -36,6 +36,7 @@ def load_cgotest(): """ void importSchema(uintptr_t ptr); void importRecordBatch(uintptr_t scptr, uintptr_t rbptr); + void runGC(); """) return ffi.dlopen(f'./cgotest.{libext}') @@ -45,6 +46,12 @@ def load_cgotest(): class TestPythonToGo(unittest.TestCase): + def setUp(self): + self.c_schema = ffi.new("struct ArrowSchema*") + self.ptr_schema = int(ffi.cast("uintptr_t", self.c_schema)) + self.c_array = ffi.new("struct ArrowArray*") + self.ptr_array = int(ffi.cast("uintptr_t", self.c_array)) + def make_schema(self): return pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'}) @@ -53,37 +60,35 @@ def make_batch(self): return pa.record_batch([[[1], [], None, [2, 42]]], self.make_schema()) + def run_gc(self): + # Several Go GC runs can be required to run all finalizers + for i in range(5): + cgotest.runGC() + gc.collect() + @contextlib.contextmanager def assert_pyarrow_memory_released(self): - gc.collect() + self.run_gc() old_allocated = pa.total_allocated_bytes() yield - gc.collect() + self.run_gc() diff = pa.total_allocated_bytes() - old_allocated self.assertEqual( pa.total_allocated_bytes(), old_allocated, f"PyArrow memory was not adequately released: {diff} bytes lost") def test_schema(self): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(ptr_schema) + self.make_schema()._export_to_c(self.ptr_schema) # Will panic if expectations are not met - cgotest.importSchema(ptr_schema) + cgotest.importSchema(self.ptr_schema) def test_record_batch(self): - c_schema = ffi.new("struct ArrowSchema*") - ptr_schema = int(ffi.cast("uintptr_t", c_schema)) - c_array = ffi.new("struct ArrowArray*") - ptr_array = int(ffi.cast("uintptr_t", c_array)) - with self.assert_pyarrow_memory_released(): - self.make_schema()._export_to_c(ptr_schema) - self.make_batch()._export_to_c(ptr_array) + self.make_schema()._export_to_c(self.ptr_schema) + self.make_batch()._export_to_c(self.ptr_array) # Will panic if expectations are not met - cgotest.importRecordBatch(ptr_schema, ptr_array) + cgotest.importRecordBatch(self.ptr_schema, self.ptr_array) if __name__ == '__main__': From ce48e8932518fe32d7f210d3142f2d3061750ee9 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Sep 2021 18:01:03 +0200 Subject: [PATCH 16/20] Fix allocation issues in cdata tests --- go/arrow/cdata/cdata_fulltest.c | 60 ++++++++++++++++---------- go/arrow/cdata/cdata_test.go | 20 +++------ go/arrow/cdata/cdata_test_framework.go | 6 +-- 3 files changed, 47 insertions(+), 39 deletions(-) diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c index 929416218cf..c8d8fe6847c 100644 --- a/go/arrow/cdata/cdata_fulltest.c +++ b/go/arrow/cdata/cdata_fulltest.c @@ -29,16 +29,37 @@ static void release_primitive(struct ArrowSchema* schema) { schema->release = NULL; } -static void release_nested(struct ArrowSchema* schema) { +static void release_nested_internal(struct ArrowSchema* schema, + int is_dynamic) { + assert(!ArrowSchemaIsReleased(schema)); for (int i = 0; i < schema->n_children; ++i) { - assert(!ArrowSchemaIsReleased(schema->children[i])); ArrowSchemaRelease(schema->children[i]); - ArrowSchemaMarkReleased(schema->children[i]); + free(schema->children[i]); + } + if (is_dynamic) { + free((void*)schema->format); + free((void*)schema->name); } + ArrowSchemaMarkReleased(schema); +} + +static void release_nested_static(struct ArrowSchema* schema) { + release_nested_internal(schema, /*is_dynamic=*/0); +} + +static void release_nested_dynamic(struct ArrowSchema* schema) { + release_nested_internal(schema, /*is_dynamic=*/1); +} + +static void release_nested_dynamic_toplevel(struct ArrowSchema* schema) { assert(!ArrowSchemaIsReleased(schema)); - free((void *)schema->format); + for (int i = 0; i < schema->n_children; ++i) { + ArrowSchemaRelease(schema->children[i]); + free(schema->children[i]); + } + free((void*)schema->format); if (strlen(schema->name) > 0) { - free((void *)schema->name); + free((void*)schema->name); } ArrowSchemaMarkReleased(schema); } @@ -58,21 +79,14 @@ void test_primitive(struct ArrowSchema* schema, const char* fmt) { }; } -void free_nested_schemas(struct ArrowSchema* top) { - for (int i = 0; i < top->n_children; ++i) { - free_nested_schemas(top->children[i]); - } - ArrowSchemaMarkReleased(top); - // free(top); -} - -void free_top_schema(struct ArrowSchema** top) { - free_nested_schemas(top[0]); - free(top); +// Since test_lists et al. allocate an entirely array of ArrowSchema pointers, +// need to expose a function to free it. +void free_malloced_schemas(struct ArrowSchema** schemas) { + free(schemas); } struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n) { - struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); + struct ArrowSchema** schemas = malloc(sizeof(struct ArrowSchema*)*n); for (int i = 0; i < n; ++i) { schemas[i] = malloc(sizeof(struct ArrowSchema)); *schemas[i] = (struct ArrowSchema) { @@ -83,7 +97,7 @@ struct ArrowSchema** test_lists(const char** fmts, const char** names, const int .children = NULL, .n_children = 0, .dictionary = NULL, - .release = &release_nested, + .release = &release_nested_dynamic, }; if (i != 0) { schemas[i-1]->n_children = 1; @@ -105,7 +119,7 @@ struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t .children = NULL, .n_children = 0, .dictionary = NULL, - .release = &release_nested, + .release = &release_nested_dynamic, }; } @@ -151,7 +165,7 @@ struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* fl .children = NULL, .n_children = 0, .dictionary = NULL, - .release = &release_nested, + .release = &release_nested_dynamic, }; } @@ -181,7 +195,7 @@ static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { .children = NULL, .n_children = 0, .dictionary = NULL, - .release = &release_nested, + .release = &release_nested_static, }; out->children[1] = malloc(sizeof(struct ArrowSchema)); @@ -193,11 +207,11 @@ static int stream_schema(struct ArrowArrayStream* st, struct ArrowSchema* out) { .children = NULL, .n_children = 0, .dictionary = NULL, - .release = &release_nested, + .release = &release_nested_static, }; out->format = "+s"; - out->release = &free_nested_schemas; + out->release = &release_nested_static; return 0; } diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go index dbb6a6f5eb3..e5e43deddb7 100644 --- a/go/arrow/cdata/cdata_test.go +++ b/go/arrow/cdata/cdata_test.go @@ -49,8 +49,8 @@ func TestSchemaExport(t *testing.T) { assert.Equal(t, keys, f.Metadata.Keys()) assert.Equal(t, vals, f.Metadata.Values()) - releaseSchema(&sc) - assert.Nil(t, sc.release) + // schema was released when importing + assert.True(t, schemaIsReleased(&sc)) } func TestSimpleArrayExport(t *testing.T) { @@ -127,7 +127,6 @@ func TestPrimitiveSchemas(t *testing.T) { assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - releaseSchema(&sc) assert.True(t, schemaIsReleased(&sc)) }) } @@ -169,7 +168,6 @@ func TestImportTemporalSchema(t *testing.T) { assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - releaseSchema(&sc) assert.True(t, schemaIsReleased(&sc)) }) } @@ -189,7 +187,7 @@ func TestListSchemas(t *testing.T) { for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testNested(tt.fmts, tt.names) - defer releaseNestedSchemas(sc) + defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) @@ -197,7 +195,6 @@ func TestListSchemas(t *testing.T) { assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - releaseSchema(top) assert.True(t, schemaIsReleased(top)) }) } @@ -219,7 +216,7 @@ func TestStructSchemas(t *testing.T) { for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testStruct(tt.fmts, tt.names, tt.flags) - defer releaseNestedSchemas(sc) + defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) @@ -227,7 +224,6 @@ func TestStructSchemas(t *testing.T) { assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - releaseSchema(top) assert.True(t, schemaIsReleased(top)) }) } @@ -248,7 +244,7 @@ func TestMapSchemas(t *testing.T) { for _, tt := range tests { t.Run(tt.typ.Name(), func(t *testing.T) { sc := testMap(tt.fmts, tt.names, tt.flags) - defer releaseNestedSchemas(sc) + defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] f, err := ImportCArrowField(top) @@ -257,7 +253,6 @@ func TestMapSchemas(t *testing.T) { tt.typ.KeysSorted = tt.keysSorted assert.True(t, arrow.TypeEqual(tt.typ, f.Type)) - releaseSchema(top) assert.True(t, schemaIsReleased(top)) }) } @@ -271,7 +266,7 @@ func TestSchema(t *testing.T) { }, &metadata2) cst := testSchema([]string{"+s", "n", "l"}, []string{"", "nulls", "values"}, []int64{0, 0, flagIsNullable}) - defer releaseNestedSchemas(cst) + defer freeMallocedSchemas(cst) top := (*[1]*CArrowSchema)(unsafe.Pointer(cst))[0] out, err := ImportCArrowSchema(top) @@ -280,7 +275,6 @@ func TestSchema(t *testing.T) { assert.True(t, sc.Equal(out)) assert.True(t, sc.Metadata().Equal(out.Metadata())) - releaseSchema(top) assert.True(t, schemaIsReleased(top)) } @@ -579,7 +573,7 @@ func TestRecordBatch(t *testing.T) { defer freeTestArr(carr) sc := testStruct([]string{"+s", "c", "u"}, []string{"", "a", "b"}, []int64{0, flagIsNullable, flagIsNullable}) - defer releaseNestedSchemas(sc) + defer freeMallocedSchemas(sc) top := (*[1]*CArrowSchema)(unsafe.Pointer(sc))[0] rb, err := ImportCRecordBatch(carr, top) diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go index c1940d66db2..c9ccd74e0a2 100644 --- a/go/arrow/cdata/cdata_test_framework.go +++ b/go/arrow/cdata/cdata_test_framework.go @@ -43,7 +43,7 @@ package cdata // void export_int32_array(const int32_t*, int64_t, struct ArrowArray*); // int test1_is_released(); // void test_primitive(struct ArrowSchema* schema, const char* fmt); -// void free_top_schema(struct ArrowSchema** top); +// void free_malloced_schemas(struct ArrowSchema**); // struct ArrowSchema** test_lists(const char** fmts, const char** names, const int n); // struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n); // struct ArrowSchema** test_map(const char** fmts, const char** names, int64_t* flags, const int n); @@ -113,8 +113,8 @@ func testPrimitive(fmtstr string) CArrowSchema { return s } -func releaseNestedSchemas(top **CArrowSchema) { - C.free_top_schema(top) +func freeMallocedSchemas(schemas **CArrowSchema) { + C.free_malloced_schemas(schemas) } func testNested(fmts, names []string) **CArrowSchema { From d5a7c30f74505d37e0be023d29b5a4c40597ccb4 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 8 Sep 2021 18:10:06 +0200 Subject: [PATCH 17/20] Simplify dockerfile --- ci/docker/debian-10-go-cgo-python.dockerfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile index 23c669a56ab..0e77c3093b0 100644 --- a/ci/docker/debian-10-go-cgo-python.dockerfile +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -23,10 +23,6 @@ ENV DEBIAN_FRONTEND noninteractive # install libarrow-dev and libarrow-python-dev so we can use pyarrow to # test C Data Interface consumption RUN apt-get update -y -q && \ - apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \ - wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ - apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ - apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ python3 \ python3-pip && \ From e49ed2eb5c11fee5bb4ffb87bd3890ebc3836c48 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Wed, 8 Sep 2021 12:57:09 -0400 Subject: [PATCH 18/20] combine cdata_testframework.c into cdata_fulltest.c, rename to is_little_endian --- go/arrow/cdata/cdata_fulltest.c | 70 +++++++++++++++++++++- go/arrow/cdata/cdata_testframework.c | 88 ---------------------------- go/arrow/cdata/utils.h | 2 +- 3 files changed, 69 insertions(+), 91 deletions(-) delete mode 100644 go/arrow/cdata/cdata_testframework.c diff --git a/go/arrow/cdata/cdata_fulltest.c b/go/arrow/cdata/cdata_fulltest.c index c8d8fe6847c..5c4ca49edf6 100644 --- a/go/arrow/cdata/cdata_fulltest.c +++ b/go/arrow/cdata/cdata_fulltest.c @@ -20,10 +20,76 @@ #include #include #include +#include +#include #include "arrow/c/abi.h" #include "arrow/c/helpers.h" #include "utils.h" +static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; + +static void release_int32_type(struct ArrowSchema* schema) { + // mark released + schema->release = NULL; +} + +void export_int32_type(struct ArrowSchema* schema) { + const char* encoded_metadata; + if (is_little_endian() == 1) { + encoded_metadata = kEncodedMeta1LE; + } else { + encoded_metadata = kEncodedMeta1BE; + } + *schema = (struct ArrowSchema) { + // Type description + .format = "i", + .name = "", + .metadata = encoded_metadata, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_type, + }; +} + +static bool test1_released = false; + +int test1_is_released() { return test1_released; } + +static void release_int32_array(struct ArrowArray* array) { + assert(array->n_buffers == 2); + // free the buffers and buffers array + free((void *) array->buffers[1]); + free(array->buffers); + // mark released + array->release = NULL; + test1_released = true; +} + +void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { + // initialize primitive fields + *array = (struct ArrowArray) { + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // bookkeeping + .release = &release_int32_array + }; + + // allocate list of buffers + array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); + assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; +} + + static void release_primitive(struct ArrowSchema* schema) { free((void *)schema->format); schema->release = NULL; @@ -131,7 +197,7 @@ struct ArrowSchema** fill_structs(const char** fmts, const char** names, int64_t struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - if (check_for_endianness() == 1) { + if (is_little_endian() == 1) { schemas[n-1]->metadata = kEncodedMeta2LE; } else { schemas[n-1]->metadata = kEncodedMeta2BE; @@ -143,7 +209,7 @@ struct ArrowSchema** test_struct(const char** fmts, const char** names, int64_t* struct ArrowSchema** test_schema(const char** fmts, const char** names, int64_t* flags, const int n) { struct ArrowSchema** schemas = fill_structs(fmts, names, flags, n); - if (check_for_endianness() == 1) { + if (is_little_endian() == 1) { schemas[0]->metadata = kEncodedMeta2LE; schemas[n-1]->metadata = kEncodedMeta1LE; } else { diff --git a/go/arrow/cdata/cdata_testframework.c b/go/arrow/cdata/cdata_testframework.c deleted file mode 100644 index 05aa5cfc5f4..00000000000 --- a/go/arrow/cdata/cdata_testframework.c +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// +build cgo -// +build test - -#include -#include -#include -#include -#include "arrow/c/abi.h" -#include "utils.h" - -static const int64_t kDefaultFlags = ARROW_FLAG_NULLABLE; - -static void release_int32_type(struct ArrowSchema* schema) { - // mark released - schema->release = NULL; -} - -void export_int32_type(struct ArrowSchema* schema) { - const char* encoded_metadata; - if (check_for_endianness() == 1) { - encoded_metadata = kEncodedMeta1LE; - } else { - encoded_metadata = kEncodedMeta1BE; - } - *schema = (struct ArrowSchema) { - // Type description - .format = "i", - .name = "", - .metadata = encoded_metadata, - .flags = 0, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_type, - }; -} - -static bool test1_released = false; - -int test1_is_released() { return test1_released; } - -static void release_int32_array(struct ArrowArray* array) { - assert(array->n_buffers == 2); - // free the buffers and buffers array - free((void *) array->buffers[1]); - free(array->buffers); - // mark released - array->release = NULL; - test1_released = true; -} - -void export_int32_array(const int32_t* data, int64_t nitems, struct ArrowArray* array) { - // initialize primitive fields - *array = (struct ArrowArray) { - .length = nitems, - .offset = 0, - .null_count = 0, - .n_buffers = 2, - .n_children = 0, - .children = NULL, - .dictionary = NULL, - // bookkeeping - .release = &release_int32_array - }; - - // allocate list of buffers - array->buffers = (const void**)malloc(sizeof(void*) * array->n_buffers); - assert(array->buffers != NULL); - array->buffers[0] = NULL; // no nulls, null bitmap can be omitted - array->buffers[1] = data; -} diff --git a/go/arrow/cdata/utils.h b/go/arrow/cdata/utils.h index d63d80953b6..f38281057b4 100644 --- a/go/arrow/cdata/utils.h +++ b/go/arrow/cdata/utils.h @@ -21,7 +21,7 @@ Function check_for_endianness() returns 1, if architecture is little endian, 0 in case of big endian. */ -inline int check_for_endianness() +inline int is_little_endian() { unsigned int x = 1; char *c = (char*) &x; From 6e8733557d4d91f35a7e90e97b731d1baf91aa88 Mon Sep 17 00:00:00 2001 From: Matthew Topol Date: Wed, 8 Sep 2021 12:57:19 -0400 Subject: [PATCH 19/20] update comments regarding schema release --- go/arrow/cdata/interface.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go index 96e66e9ebb5..d6258a5abbe 100644 --- a/go/arrow/cdata/interface.go +++ b/go/arrow/cdata/interface.go @@ -35,8 +35,8 @@ func ImportCArrowField(out *CArrowSchema) (arrow.Field, error) { // ImportCArrowSchema takes in the ArrowSchema from the C Data Interface, it // will copy the metadata and schema definitions over from the C object rather -// than keep direct references to them. It is safe to call C.ArrowSchemaRelease -// after receiving the schema from this function. +// than keep direct references to them. This function will call ArrowSchemaRelease +// on the passed in schema regardless of whether or not there is an error returned. // // This version is intended to take in a schema for a record batch, which means // that the top level of the schema should be a struct of the schema fields. If @@ -73,7 +73,8 @@ func ImportCArrayWithType(arr *CArrowArray, dt arrow.DataType) (array.Interface, // ImportCArray takes a pointer to both a C Data ArrowArray and C Data ArrowSchema in order // to import them into usable Go Objects. If err is not nil, then ArrowArrayRelease must still -// be called on arr to release the memory. +// be called on arr to release the memory. The ArrowSchemaRelease will be called on the passed in +// schema regardless of whether there is an error or not. // // The Schema will be copied with the information used to populate the returned Field, complete // with metadata. The array will reference the same memory that is referred to by the ArrowArray From 8b23b00d0c8039f28126c2ffdb963d68196c082d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 16 Sep 2021 13:03:41 -0400 Subject: [PATCH 20/20] Update debian-10-go-cgo-python.dockerfile --- ci/docker/debian-10-go-cgo-python.dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/docker/debian-10-go-cgo-python.dockerfile b/ci/docker/debian-10-go-cgo-python.dockerfile index 0e77c3093b0..46455a42bb3 100644 --- a/ci/docker/debian-10-go-cgo-python.dockerfile +++ b/ci/docker/debian-10-go-cgo-python.dockerfile @@ -20,8 +20,7 @@ FROM ${base} ENV DEBIAN_FRONTEND noninteractive -# install libarrow-dev and libarrow-python-dev so we can use pyarrow to -# test C Data Interface consumption +# Install python3 and pip so we can install pyarrow to test the C data interface. RUN apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ python3 \