From d496c2ff891f38cf0dbe369a0e2f9204cc896317 Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Tue, 9 Apr 2019 15:12:06 +0200 Subject: [PATCH 1/3] ARROW-5110: [Go] implement reading struct arrays from Arrow file --- go/arrow/ipc/file_reader.go | 24 ++++++++++++++++++++++++ go/arrow/ipc/metadata.go | 3 +++ 2 files changed, 27 insertions(+) diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go index 1765e381241..26e48fa12d5 100644 --- a/go/arrow/ipc/file_reader.go +++ b/go/arrow/ipc/file_reader.go @@ -367,6 +367,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface { case *arrow.ListType: return ctx.loadList(dt) + case *arrow.StructType: + return ctx.loadStruct(dt) + default: panic(errors.Errorf("array type %T not handled yet", dt)) } @@ -448,6 +451,27 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface { return array.NewListData(data) } +func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) array.Interface { + field, buffers := ctx.loadCommon(1) + + arrs := make([]array.Interface, len(dt.Fields())) + subs := make([]*array.Data, len(dt.Fields())) + for i, f := range dt.Fields() { + arrs[i] = ctx.loadChild(f.Type) + subs[i] = arrs[i].Data() + } + defer func() { + for i := range arrs { + arrs[i].Release() + } + }() + + data := array.NewData(dt, int(field.Length()), buffers, subs, int(field.NullCount()), 0) + defer data.Release() + + return array.NewStructData(data) +} + func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) { // msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0) // var dictBatch flatbuf.DictionaryBatch diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go index cfbfd248144..11665b9927f 100644 --- a/go/arrow/ipc/metadata.go +++ b/go/arrow/ipc/metadata.go @@ -254,6 +254,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr } return arrow.ListOf(children[0].Type), nil + case flatbuf.TypeStruct_: + return arrow.StructOf(children...), nil + default: // FIXME(sbinet): implement all the other types. panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ])) From b838ee04a4a174954cf179b559fa38fcb8689521 Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Tue, 9 Apr 2019 15:12:43 +0200 Subject: [PATCH 2/3] arrow/array: fix Struct array Stringer implementation --- go/arrow/array/struct.go | 11 ++------ go/arrow/array/struct_test.go | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go index 9a65be07544..55fd9135329 100644 --- a/go/arrow/array/struct.go +++ b/go/arrow/array/struct.go @@ -46,19 +46,14 @@ func (a *Struct) Field(i int) Interface { return a.fields[i] } func (a *Struct) String() string { o := new(strings.Builder) - o.WriteString("[") + o.WriteString("{") for i, v := range a.fields { if i > 0 { o.WriteString(" ") } - switch { - case a.IsNull(i): - o.WriteString("(null)") - default: - fmt.Fprintf(o, "%v", v) - } + fmt.Fprintf(o, "%v", v) } - o.WriteString("]") + o.WriteString("}") return o.String() } diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go index ba21f20df79..d9701cee1e2 100644 --- a/go/arrow/array/struct_test.go +++ b/go/arrow/array/struct_test.go @@ -247,3 +247,56 @@ func TestStructArrayBulkAppend(t *testing.T) { } } } + +func TestStructArrayStringer(t *testing.T) { + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + defer pool.AssertSize(t, 0) + + var ( + f1s = []float64{1.1, 1.2, 1.3, 1.4} + f2s = []int32{1, 2, 3, 4} + + fields = []arrow.Field{ + {Name: "f1", Type: arrow.PrimitiveTypes.Float64}, + {Name: "f2", Type: arrow.PrimitiveTypes.Int32}, + } + dtype = arrow.StructOf(fields...) + ) + + sb := array.NewStructBuilder(pool, dtype) + defer sb.Release() + + f1b := sb.FieldBuilder(0).(*array.Float64Builder) + defer f1b.Release() + + f2b := sb.FieldBuilder(1).(*array.Int32Builder) + defer f2b.Release() + + if got, want := sb.NumField(), 2; got != want { + t.Fatalf("got=%d, want=%d", got, want) + } + + for i := range f1s { + sb.Append(true) + switch i { + case 1: + f1b.AppendNull() + f2b.Append(f2s[i]) + case 2: + f1b.Append(f1s[i]) + f2b.AppendNull() + default: + f1b.Append(f1s[i]) + f2b.Append(f2s[i]) + } + } + + arr := sb.NewArray().(*array.Struct) + defer arr.Release() + + want := "{[1.1 (null) 1.3 1.4] [1 2 (null) 4]}" + got := arr.String() + if got != want { + t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want) + } +} From f6f9b9eaf75fee4e2edfec278697e1777dedc7ee Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Tue, 9 Apr 2019 16:03:48 +0200 Subject: [PATCH 3/3] arrow/array: provide List array stringer implementation --- go/arrow/array/list.go | 23 +++++++++++++++++++++++ go/arrow/example_test.go | 2 ++ 2 files changed, 25 insertions(+) diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go index 1c4c01fcff6..b571c50c74c 100644 --- a/go/arrow/array/list.go +++ b/go/arrow/array/list.go @@ -17,6 +17,8 @@ package array import ( + "fmt" + "strings" "sync/atomic" "github.com/apache/arrow/go/arrow" @@ -42,6 +44,27 @@ func NewListData(data *Data) *List { func (a *List) ListValues() Interface { return a.values } +func (a *List) String() string { + o := new(strings.Builder) + o.WriteString("[") + for i := 0; i < a.Len(); i++ { + if i > 0 { + o.WriteString(" ") + } + if !a.IsValid(i) { + o.WriteString("(null)") + continue + } + beg := int64(a.offsets[i]) + end := int64(a.offsets[i+1]) + sub := NewSlice(a.values, beg, end) + fmt.Fprintf(o, "%v", sub) + sub.Release() + } + o.WriteString("]") + return o.String() +} + func (a *List) setData(data *Data) { a.array.setData(data) vals := data.buffers[1] diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go index 9172a5b58cb..8015cbf0509 100644 --- a/go/arrow/example_test.go +++ b/go/arrow/example_test.go @@ -188,6 +188,7 @@ func Example_listArray() { pos = int(offsets[i]) fmt.Printf("]\n") } + fmt.Printf("List = %v\n", arr) // Output: // NullN() = 2 @@ -200,6 +201,7 @@ func Example_listArray() { // List[4] = [6, 7, 8] // List[5] = (null) // List[6] = [9] + // List = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]] } // This example shows how to create a Struct array.