From c1ada02ffcd85c2ed246b4853ae249632b0083fd Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 12:11:49 -0700 Subject: [PATCH 1/8] C# support for StructArrays --- .../Arrays/ArrowArrayBuilderFactory.cs | 1 + .../Ipc/ArrowReaderImplementation.cs | 35 ++++++++--- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 28 ++++++++- .../Ipc/ArrowTypeFlatbufferBuilder.cs | 20 ++++-- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 26 ++++++-- csharp/src/Apache.Arrow/Types/StructType.cs | 4 +- .../Apache.Arrow.Tests/ArrayBuilderTests.cs | 61 +++++++++++++++++++ .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 26 ++++++-- csharp/test/Apache.Arrow.Tests/TestData.cs | 32 +++++++++- 9 files changed, 207 insertions(+), 26 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs index e654bb9dd94..68cb6646dbb 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs @@ -59,6 +59,7 @@ internal static IArrowArrayBuilder> case ArrowTypeId.List: return new ListArray.Builder(dataType as ListType); case ArrowTypeId.Struct: + return new StructArray.Builder(dataType as StructType); case ArrowTypeId.Union: case ArrowTypeId.Decimal: case ArrowTypeId.Dictionary: diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index 4c3fdd0fcac..c01f6510575 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -147,10 +147,10 @@ private ArrayData LoadPrimitiveField( { ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); - recordBatchEnumerator.MoveNextBuffer(); - ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); - recordBatchEnumerator.MoveNextBuffer(); - + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } int fieldLength = (int)fieldNode.Length; int fieldNullCount = (int)fieldNode.NullCount; @@ -165,8 +165,21 @@ private ArrayData LoadPrimitiveField( throw new InvalidDataException("Null count length must be >= 0"); // TODO:Localize exception message } - ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; - ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData); + ArrowBuffer[] arrowBuff = null; + ArrayData[] children = null; + if (field.DataType.TypeId == ArrowTypeId.Struct) + { + arrowBuff = new[] { nullArrowBuffer}; + children = GetChildren(ref recordBatchEnumerator, field, bodyData); + } + else + { + ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); + recordBatchEnumerator.MoveNextBuffer(); + + arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; + children = GetChildren(ref recordBatchEnumerator, field, bodyData); + } return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children); } @@ -180,9 +193,15 @@ private ArrayData LoadVariableField( { ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); - recordBatchEnumerator.MoveNextBuffer(); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } ArrowBuffer offsetArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); - recordBatchEnumerator.MoveNextBuffer(); + if (!recordBatchEnumerator.MoveNextBuffer()) + { + throw new Exception("Unable to move to the next buffer."); + } ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer); recordBatchEnumerator.MoveNextBuffer(); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 9d81a8ef8bb..d878cfa37ce 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -44,7 +44,8 @@ internal class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, - IArrowArrayVisitor + IArrowArrayVisitor, + IArrowArrayVisitor { public readonly struct Buffer { @@ -102,6 +103,31 @@ public void Visit(BinaryArray array) _buffers.Add(CreateBuffer(array.ValueBuffer)); } + private void Visit(ArrayData[] children) + { + for (int i = 0; i < children.Length; i++) + { + Visit(ArrowArrayFactory.BuildArray(children[i])); + } + } + + public void Visit(StructArray array) + { + _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + for (int i = 0; i < array.Data.Children.Length; i++) + { + ArrayData childArray = array.Data.Children[i]; + if (childArray.Children != null) + { + Visit(childArray.Children); + } + for (int j = 0; j < childArray.Buffers.Length; j++) + { + _buffers.Add(CreateBuffer(childArray.Buffers[j])); + } + } + } + private void CreateBuffers(BooleanArray array) { _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index e4afee3c8b6..5b7f2ffc173 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -30,7 +30,7 @@ public struct FieldType public readonly int Offset; public static FieldType Build(Flatbuf.Type type, Offset offset) - where T: struct => + where T : struct => new FieldType(type, offset.Value); public FieldType(Flatbuf.Type type, int offset) @@ -40,7 +40,7 @@ public FieldType(Flatbuf.Type type, int offset) } } - class TypeVisitor : + class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, @@ -60,7 +60,8 @@ class TypeVisitor : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private FlatBufferBuilder Builder { get; } @@ -100,7 +101,7 @@ public void Visit(ListType type) { Flatbuf.List.StartList(Builder); Result = FieldType.Build( - Flatbuf.Type.List, + Flatbuf.Type.List, Flatbuf.List.EndList(Builder)); } @@ -118,14 +119,14 @@ public void Visit(StringType type) } public void Visit(TimestampType type) - { + { StringOffset timezoneStringOffset = default; if (!string.IsNullOrWhiteSpace(type.Timezone)) timezoneStringOffset = Builder.CreateString(type.Timezone); Result = FieldType.Build( - Flatbuf.Type.Timestamp, + Flatbuf.Type.Timestamp, Flatbuf.Timestamp.CreateTimestamp(Builder, ToFlatBuffer(type.Unit), timezoneStringOffset)); } @@ -171,6 +172,13 @@ public void Visit(Time64Type type) Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit), 64)); } + public void Visit(StructType type) + { + Flatbuf.Struct_.StartStruct_(Builder); + FieldType result = FieldType.Build(Flatbuf.Type.Struct_, Flatbuf.Struct_.EndStruct_(Builder)); + Result = result; + } + private void CreateIntType(NumberType type) { Result = FieldType.Build( diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 997cc635cd5..8cce969be26 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -14,6 +14,7 @@ // limitations under the License. using System; +using System.Diagnostics; using System.IO; namespace Apache.Arrow.Ipc @@ -57,16 +58,30 @@ internal static Schema GetSchema(Flatbuf.Schema schema) for (int i = 0; i < schema.FieldsLength; i++) { Flatbuf.Field field = schema.Fields(i).GetValueOrDefault(); - - schemaBuilder.Field( - new Field(field.Name, GetFieldArrowType(field), field.Nullable)); + Field arrowField = FieldFromFlatbuffer(field); + schemaBuilder.Field(arrowField); } return schemaBuilder.Build(); } + private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField) + { + System.Collections.Generic.List childFields = null; + if (flatbufField.ChildrenLength > 0) + { + childFields = new System.Collections.Generic.List(); + for (int j = 0; j < flatbufField.ChildrenLength; j++) + { + Flatbuf.Field? childFlatbufField = flatbufField.Children(j); + Field childField = FieldFromFlatbuffer(childFlatbufField.Value); + childFields.Add(childField); + } + } + return new Field(flatbufField.Name, GetFieldArrowType(flatbufField, childFields), flatbufField.Nullable); + } - private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field) + private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, System.Collections.Generic.List childFields = null) { switch (field.TypeType) { @@ -131,6 +146,9 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field) throw new InvalidDataException($"List type must have only one child."); } return new Types.ListType(GetFieldArrowType(field.Children(0).GetValueOrDefault())); + case Flatbuf.Type.Struct_: + Debug.Assert(childFields != null); + return new Types.StructType(childFields); default: throw new InvalidDataException($"Arrow primitive '{field.TypeType}' is unsupported."); } diff --git a/csharp/src/Apache.Arrow/Types/StructType.cs b/csharp/src/Apache.Arrow/Types/StructType.cs index fb074c10130..0f442471fda 100644 --- a/csharp/src/Apache.Arrow/Types/StructType.cs +++ b/csharp/src/Apache.Arrow/Types/StructType.cs @@ -19,7 +19,7 @@ namespace Apache.Arrow.Types { - public sealed class StructType : ArrowType + public sealed class StructType : NestedType { private readonly List _fields; @@ -28,7 +28,7 @@ public sealed class StructType : ArrowType public IEnumerable Fields => _fields; - public StructType(IEnumerable fields) + public StructType(IReadOnlyList fields) : base(fields) { _fields = fields?.ToList(); } diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs index 7c1fd6476d1..6194b2221cc 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs @@ -73,6 +73,67 @@ public void StringArrayBuilderHandlesNullsAndEmptyStrings() Assert.Equal(string.Empty, stringArray.GetString(3)); } + [Fact] + public void TestStructArray() + { + // The following can be improved with a Builder class for StructArray. + List fields = new List(); + Field.Builder fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build()); + fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()); + StructType structType = new StructType(fields); + + StringArray.Builder stringBuilder = new StringArray.Builder(); + StringArray stringArray = stringBuilder.Append("joe").AppendNull().AppendNull().Append("mark").Build(); + Int32Array.Builder intBuilder = new Int32Array.Builder(); + Int32Array intArray = intBuilder.Append(1).Append(2).AppendNull().Append(4).Build(); + List arrays = new List(); + arrays.Add(stringArray); + arrays.Add(intArray); + + ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); + var nullBitmapBuffer = nullBitmap.Append(true).Append(true).Append(false).Append(true).Build(); + StructArray structs = new StructArray(structType, 4, arrays, nullBitmapBuffer, 1); + + Assert.Equal(4, structs.Length); + Assert.Equal(1, structs.NullCount); + ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array + Assert.Equal(2, childArrays.Length); + for (int j = 0; j < childArrays.Length; j++) + { + ArrayData arrayData = childArrays[j]; + Assert.Null(arrayData.Children); + if (j == 0) + { + Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); + Array array = new StringArray(arrayData); + StringArray structStringArray = array as StringArray; + Assert.NotNull(structStringArray); + Assert.Equal(structs.Length, structStringArray.Length); + Assert.Equal(stringArray.Length, structStringArray.Length); + Assert.Equal(stringArray.NullCount, structStringArray.NullCount); + for (int i = 0; i < stringArray.Length; i++) + { + Assert.Equal(stringArray.GetString(i), structStringArray.GetString(i)); + } + } + if (j == 1) + { + Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); + Array array = new Int32Array(arrayData); + Int32Array structIntArray = array as Int32Array; + Assert.NotNull(structIntArray); + Assert.Equal(structs.Length, structIntArray.Length); + Assert.Equal(intArray.Length, structIntArray.Length); + Assert.Equal(intArray.NullCount, structIntArray.NullCount); + for (int i = 0; i < intArray.Length; i++) + { + Assert.Equal(intArray.GetValue(i), structIntArray.GetValue(i)); + } + } + } + } [Fact] public void ListArrayBuilder() diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 82bd53a47fb..e50cade9bd6 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -80,8 +80,8 @@ public void Visit(TimestampType actualType) { Assert.IsAssignableFrom(_expectedType); - var expectedType = (TimestampType) _expectedType; - + var expectedType = (TimestampType)_expectedType; + Assert.Equal(expectedType.Timezone, actualType.Timezone); Assert.Equal(expectedType.Unit, actualType.Unit); } @@ -149,7 +149,8 @@ private class ArrayComparer : IArrowArrayVisitor, IArrowArrayVisitor, IArrowArrayVisitor, - IArrowArrayVisitor + IArrowArrayVisitor, + IArrowArrayVisitor { private readonly IArrowArray _expectedArray; private readonly ArrayTypeComparer _arrayTypeComparer; @@ -179,11 +180,28 @@ public ArrayComparer(IArrowArray expectedArray) public void Visit(StringArray array) => CompareBinaryArrays(array); public void Visit(BinaryArray array) => CompareBinaryArrays(array); + + public void Visit(StructArray array) + { + Assert.Equal(_expectedArray.Length, array.Length); + Assert.Equal(_expectedArray.NullCount, array.NullCount); + Assert.Equal(_expectedArray.Offset, array.Offset); + Assert.Equal(_expectedArray.Data.Children.Length, array.Data.Children.Length); + + ArrayData data = array.Data; + for (int i = 0; i < data.Children.Length; i++) + { + IArrowArray childArray = ArrowArrayFactory.BuildArray(data.Children[i]); + IArrowArray expectedChildArray = ArrowArrayFactory.BuildArray(_expectedArray.Data.Children[i]); + childArray.Accept(new ArrayComparer(expectedChildArray)); + } + } + public void Visit(FixedSizeBinaryType array) => throw new NotImplementedException(); public void Visit(IArrowArray array) => throw new NotImplementedException(); private void CompareBinaryArrays(BinaryArray actualArray) - where T: IArrowArray + where T : IArrowArray { Assert.IsAssignableFrom(_expectedArray); Assert.IsAssignableFrom(actualArray); diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs index ba581e6669f..0066f49db51 100644 --- a/csharp/test/Apache.Arrow.Tests/TestData.cs +++ b/csharp/test/Apache.Arrow.Tests/TestData.cs @@ -47,6 +47,7 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount builder.Field(CreateField(Date64Type.Default, i)); builder.Field(CreateField(TimestampType.Default, i)); builder.Field(CreateField(StringType.Default, i)); + builder.Field(CreateField(new StructType(new List { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i)); //builder.Field(CreateField(new FixedSizeBinaryType(16), i)); //builder.Field(CreateField(new DecimalType(19, 2))); //builder.Field(CreateField(HalfFloatType.Default)); @@ -104,7 +105,8 @@ private class ArrayCreator : IArrowTypeVisitor, IArrowTypeVisitor, IArrowTypeVisitor, - IArrowTypeVisitor + IArrowTypeVisitor, + IArrowTypeVisitor { private int Length { get; } public IArrowArray Array { get; private set; } @@ -201,6 +203,34 @@ public void Visit(ListType type) } + public void Visit(StructType type) + { + StringArray.Builder stringBuilder = new StringArray.Builder(); + for (int i = 0; i < Length; i++) + { + stringBuilder.Append(i.ToString()); + } + StringArray stringArray = stringBuilder.Build(); + Int32Array.Builder intBuilder = new Int32Array.Builder(); + for (int i = 0; i < Length; i++) + { + intBuilder.Append(i); + } + Int32Array intArray = intBuilder.Build(); + + List arrays = new List(); + arrays.Add(stringArray); + arrays.Add(intArray); + + ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); + for (int i = 0; i < Length; i++) + { + nullBitmap.Append(true); + } + + Array = new StructArray(type, Length, arrays, nullBitmap.Build()); + } + private void GenerateArray(IArrowArrayBuilder builder, Func generator) where TArrayBuilder : IArrowArrayBuilder where TArray : IArrowArray From 4cc2c303675d068deea5bc7e23d4ef2be206611a Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 12:26:11 -0700 Subject: [PATCH 2/8] Build fix --- csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs index 68cb6646dbb..e654bb9dd94 100644 --- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs @@ -59,7 +59,6 @@ internal static IArrowArrayBuilder> case ArrowTypeId.List: return new ListArray.Builder(dataType as ListType); case ArrowTypeId.Struct: - return new StructArray.Builder(dataType as StructType); case ArrowTypeId.Union: case ArrowTypeId.Decimal: case ArrowTypeId.Dictionary: From fb53c98e115ac70e21171a96a08d08bccb0b7314 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 12:53:05 -0700 Subject: [PATCH 3/8] Move structArray test to its own file --- .../Apache.Arrow.Tests/ArrayBuilderTests.cs | 62 ------------- .../Apache.Arrow.Tests/StructArrayTests.cs | 86 +++++++++++++++++++ 2 files changed, 86 insertions(+), 62 deletions(-) create mode 100644 csharp/test/Apache.Arrow.Tests/StructArrayTests.cs diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs index 6194b2221cc..7d5070771e8 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs @@ -73,68 +73,6 @@ public void StringArrayBuilderHandlesNullsAndEmptyStrings() Assert.Equal(string.Empty, stringArray.GetString(3)); } - [Fact] - public void TestStructArray() - { - // The following can be improved with a Builder class for StructArray. - List fields = new List(); - Field.Builder fieldBuilder = new Field.Builder(); - fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build()); - fieldBuilder = new Field.Builder(); - fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()); - StructType structType = new StructType(fields); - - StringArray.Builder stringBuilder = new StringArray.Builder(); - StringArray stringArray = stringBuilder.Append("joe").AppendNull().AppendNull().Append("mark").Build(); - Int32Array.Builder intBuilder = new Int32Array.Builder(); - Int32Array intArray = intBuilder.Append(1).Append(2).AppendNull().Append(4).Build(); - List arrays = new List(); - arrays.Add(stringArray); - arrays.Add(intArray); - - ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); - var nullBitmapBuffer = nullBitmap.Append(true).Append(true).Append(false).Append(true).Build(); - StructArray structs = new StructArray(structType, 4, arrays, nullBitmapBuffer, 1); - - Assert.Equal(4, structs.Length); - Assert.Equal(1, structs.NullCount); - ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array - Assert.Equal(2, childArrays.Length); - for (int j = 0; j < childArrays.Length; j++) - { - ArrayData arrayData = childArrays[j]; - Assert.Null(arrayData.Children); - if (j == 0) - { - Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); - Array array = new StringArray(arrayData); - StringArray structStringArray = array as StringArray; - Assert.NotNull(structStringArray); - Assert.Equal(structs.Length, structStringArray.Length); - Assert.Equal(stringArray.Length, structStringArray.Length); - Assert.Equal(stringArray.NullCount, structStringArray.NullCount); - for (int i = 0; i < stringArray.Length; i++) - { - Assert.Equal(stringArray.GetString(i), structStringArray.GetString(i)); - } - } - if (j == 1) - { - Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); - Array array = new Int32Array(arrayData); - Int32Array structIntArray = array as Int32Array; - Assert.NotNull(structIntArray); - Assert.Equal(structs.Length, structIntArray.Length); - Assert.Equal(intArray.Length, structIntArray.Length); - Assert.Equal(intArray.NullCount, structIntArray.NullCount); - for (int i = 0; i < intArray.Length; i++) - { - Assert.Equal(intArray.GetValue(i), structIntArray.GetValue(i)); - } - } - } - } - [Fact] public void ListArrayBuilder() { diff --git a/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs b/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs new file mode 100644 index 00000000000..233ad1ee69a --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Apache.Arrow.Types; +using System.Collections.Generic; +using Xunit; + +namespace Apache.Arrow.Tests +{ + public class StructArrayTests + { + [Fact] + public void TestStructArray() + { + // The following can be improved with a Builder class for StructArray. + List fields = new List(); + Field.Builder fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Strings").DataType(StringType.Default).Nullable(true).Build()); + fieldBuilder = new Field.Builder(); + fields.Add(fieldBuilder.Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()); + StructType structType = new StructType(fields); + + StringArray.Builder stringBuilder = new StringArray.Builder(); + StringArray stringArray = stringBuilder.Append("joe").AppendNull().AppendNull().Append("mark").Build(); + Int32Array.Builder intBuilder = new Int32Array.Builder(); + Int32Array intArray = intBuilder.Append(1).Append(2).AppendNull().Append(4).Build(); + List arrays = new List(); + arrays.Add(stringArray); + arrays.Add(intArray); + + ArrowBuffer.BitmapBuilder nullBitmap = new ArrowBuffer.BitmapBuilder(); + var nullBitmapBuffer = nullBitmap.Append(true).Append(true).Append(false).Append(true).Build(); + StructArray structs = new StructArray(structType, 4, arrays, nullBitmapBuffer, 1); + + Assert.Equal(4, structs.Length); + Assert.Equal(1, structs.NullCount); + ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array + Assert.Equal(2, childArrays.Length); + for (int j = 0; j < childArrays.Length; j++) + { + ArrayData arrayData = childArrays[j]; + Assert.Null(arrayData.Children); + if (j == 0) + { + Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); + Array array = new StringArray(arrayData); + StringArray structStringArray = array as StringArray; + Assert.NotNull(structStringArray); + Assert.Equal(structs.Length, structStringArray.Length); + Assert.Equal(stringArray.Length, structStringArray.Length); + Assert.Equal(stringArray.NullCount, structStringArray.NullCount); + for (int i = 0; i < stringArray.Length; i++) + { + Assert.Equal(stringArray.GetString(i), structStringArray.GetString(i)); + } + } + if (j == 1) + { + Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); + Array array = new Int32Array(arrayData); + Int32Array structIntArray = array as Int32Array; + Assert.NotNull(structIntArray); + Assert.Equal(structs.Length, structIntArray.Length); + Assert.Equal(intArray.Length, structIntArray.Length); + Assert.Equal(intArray.NullCount, structIntArray.NullCount); + for (int i = 0; i < intArray.Length; i++) + { + Assert.Equal(intArray.GetValue(i), structIntArray.GetValue(i)); + } + } + } + } + } +} From 1dabbc39c5d4f5c9d5f0f18ab43de5be859eb387 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 13:44:47 -0700 Subject: [PATCH 4/8] Improve interface --- csharp/src/Apache.Arrow/Arrays/StructArray.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/StructArray.cs b/csharp/src/Apache.Arrow/Arrays/StructArray.cs index a9dc5bc2023..79d78c3ded4 100644 --- a/csharp/src/Apache.Arrow/Arrays/StructArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/StructArray.cs @@ -21,13 +21,13 @@ namespace Apache.Arrow { public class StructArray : Array { - private readonly List _fields; + private readonly IEnumerable _fields; - public IEnumerable Fields => _fields; + public IEnumerable Fields => _fields; public StructArray( IArrowType dataType, int length, - IEnumerable children, + IEnumerable children, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : this(new ArrayData( dataType, length, nullCount, offset, new[] { nullBitmapBuffer }, From 3c723c4fd5c334e25dcc265488b7695ed65750e0 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 17:24:33 -0700 Subject: [PATCH 5/8] Address comments --- .../Ipc/ArrowReaderImplementation.cs | 9 ++++---- .../Ipc/ArrowTypeFlatbufferBuilder.cs | 3 +-- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 2 +- csharp/src/Apache.Arrow/Types/StructType.cs | 22 ++----------------- .../Apache.Arrow.Tests/ArrayBuilderTests.cs | 1 + .../Apache.Arrow.Tests/StructArrayTests.cs | 16 +++++++------- csharp/test/Apache.Arrow.Tests/TypeTests.cs | 4 ++-- 7 files changed, 19 insertions(+), 38 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index c01f6510575..70e6cf28f18 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -165,12 +165,11 @@ private ArrayData LoadPrimitiveField( throw new InvalidDataException("Null count length must be >= 0"); // TODO:Localize exception message } - ArrowBuffer[] arrowBuff = null; - ArrayData[] children = null; + ArrowBuffer[] arrowBuff; + ArrayData[] children; if (field.DataType.TypeId == ArrowTypeId.Struct) { - arrowBuff = new[] { nullArrowBuffer}; - children = GetChildren(ref recordBatchEnumerator, field, bodyData); + arrowBuff = new[] { nullArrowBuffer }; } else { @@ -178,8 +177,8 @@ private ArrayData LoadPrimitiveField( recordBatchEnumerator.MoveNextBuffer(); arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; - children = GetChildren(ref recordBatchEnumerator, field, bodyData); } + children = GetChildren(ref recordBatchEnumerator, field, bodyData); return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children); } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs index 5b7f2ffc173..b331e89b115 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs @@ -175,8 +175,7 @@ public void Visit(Time64Type type) public void Visit(StructType type) { Flatbuf.Struct_.StartStruct_(Builder); - FieldType result = FieldType.Build(Flatbuf.Type.Struct_, Flatbuf.Struct_.EndStruct_(Builder)); - Result = result; + Result = FieldType.Build(Flatbuf.Type.Struct_, Flatbuf.Struct_.EndStruct_(Builder)); } private void CreateIntType(NumberType type) diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 8cce969be26..59e3513d71f 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -70,7 +70,7 @@ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField) System.Collections.Generic.List childFields = null; if (flatbufField.ChildrenLength > 0) { - childFields = new System.Collections.Generic.List(); + childFields = new System.Collections.Generic.List(flatbufField.ChildrenLength); for (int j = 0; j < flatbufField.ChildrenLength; j++) { Flatbuf.Field? childFlatbufField = flatbufField.Children(j); diff --git a/csharp/src/Apache.Arrow/Types/StructType.cs b/csharp/src/Apache.Arrow/Types/StructType.cs index 0f442471fda..a7b900856de 100644 --- a/csharp/src/Apache.Arrow/Types/StructType.cs +++ b/csharp/src/Apache.Arrow/Types/StructType.cs @@ -21,17 +21,11 @@ namespace Apache.Arrow.Types { public sealed class StructType : NestedType { - private readonly List _fields; - public override ArrowTypeId TypeId => ArrowTypeId.Struct; public override string Name => "struct"; - public IEnumerable Fields => _fields; - public StructType(IReadOnlyList fields) : base(fields) - { - _fields = fields?.ToList(); - } + { } public Field GetFieldByName(string name, IEqualityComparer comparer = default) @@ -39,19 +33,7 @@ public Field GetFieldByName(string name, if (comparer == null) comparer = StringComparer.Ordinal; - return Fields.FirstOrDefault( - field => comparer.Equals(field.Name, name)); - } - - public int GetFieldIndex(string name, - IEqualityComparer comparer = default) - { - if (comparer == null) - comparer = StringComparer.Ordinal; - - // TODO: Consider caching field index if this method is in hot path. - - return _fields.FindIndex( + return Children.FirstOrDefault( field => comparer.Equals(field.Name, name)); } diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs index 7d5070771e8..7c1fd6476d1 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs @@ -73,6 +73,7 @@ public void StringArrayBuilderHandlesNullsAndEmptyStrings() Assert.Equal(string.Empty, stringArray.GetString(3)); } + [Fact] public void ListArrayBuilder() { diff --git a/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs b/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs index 233ad1ee69a..ef3191543cd 100644 --- a/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/StructArrayTests.cs @@ -48,11 +48,11 @@ public void TestStructArray() Assert.Equal(1, structs.NullCount); ArrayData[] childArrays = structs.Data.Children; // Data for StringArray and Int32Array Assert.Equal(2, childArrays.Length); - for (int j = 0; j < childArrays.Length; j++) + for (int i = 0; i < childArrays.Length; i++) { - ArrayData arrayData = childArrays[j]; + ArrayData arrayData = childArrays[i]; Assert.Null(arrayData.Children); - if (j == 0) + if (i == 0) { Assert.Equal(ArrowTypeId.String, arrayData.DataType.TypeId); Array array = new StringArray(arrayData); @@ -61,12 +61,12 @@ public void TestStructArray() Assert.Equal(structs.Length, structStringArray.Length); Assert.Equal(stringArray.Length, structStringArray.Length); Assert.Equal(stringArray.NullCount, structStringArray.NullCount); - for (int i = 0; i < stringArray.Length; i++) + for (int j = 0; j < stringArray.Length; j++) { - Assert.Equal(stringArray.GetString(i), structStringArray.GetString(i)); + Assert.Equal(stringArray.GetString(j), structStringArray.GetString(j)); } } - if (j == 1) + if (i == 1) { Assert.Equal(ArrowTypeId.Int32, arrayData.DataType.TypeId); Array array = new Int32Array(arrayData); @@ -75,9 +75,9 @@ public void TestStructArray() Assert.Equal(structs.Length, structIntArray.Length); Assert.Equal(intArray.Length, structIntArray.Length); Assert.Equal(intArray.NullCount, structIntArray.NullCount); - for (int i = 0; i < intArray.Length; i++) + for (int j = 0; j < intArray.Length; j++) { - Assert.Equal(intArray.GetValue(i), structIntArray.GetValue(i)); + Assert.Equal(intArray.GetValue(j), structIntArray.GetValue(j)); } } } diff --git a/csharp/test/Apache.Arrow.Tests/TypeTests.cs b/csharp/test/Apache.Arrow.Tests/TypeTests.cs index ff622089b32..4603cba31b2 100644 --- a/csharp/test/Apache.Arrow.Tests/TypeTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TypeTests.cs @@ -72,7 +72,7 @@ public void TestStructBasics() List fields = new List() { f0_nullable, f1_nullable, f2_nullable }; StructType struct_type = new StructType(fields); - var structFields = struct_type.Fields; + var structFields = struct_type.Children; Assert.True(FieldComparer.Equals(structFields.ElementAt(0), f0_nullable)); Assert.True(FieldComparer.Equals(structFields.ElementAt(1), f1_nullable)); Assert.True(FieldComparer.Equals(structFields.ElementAt(2), f2_nullable)); @@ -89,7 +89,7 @@ public void TestStructGetFieldByName() List fields = new List() { f0_nullable, f1_nullable, f2_nullable }; StructType struct_type = new StructType(fields); - var structFields = struct_type.Fields; + var structFields = struct_type.Children; Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f0"), f0_nullable)); Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f1"), f1_nullable)); Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f2"), f2_nullable)); From 0aea825dcbc50587ad224f6985fc09972de8061d Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 5 Oct 2020 17:53:16 -0700 Subject: [PATCH 6/8] Address comment --- csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index 59e3513d71f..be22b74f8be 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -67,21 +67,21 @@ internal static Schema GetSchema(Flatbuf.Schema schema) private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField) { - System.Collections.Generic.List childFields = null; + Field[] childFields = null; if (flatbufField.ChildrenLength > 0) { - childFields = new System.Collections.Generic.List(flatbufField.ChildrenLength); + childFields = new Field[flatbufField.ChildrenLength]; for (int j = 0; j < flatbufField.ChildrenLength; j++) { Flatbuf.Field? childFlatbufField = flatbufField.Children(j); Field childField = FieldFromFlatbuffer(childFlatbufField.Value); - childFields.Add(childField); + childFields[j] = (childField); } } return new Field(flatbufField.Name, GetFieldArrowType(flatbufField, childFields), flatbufField.Nullable); } - private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, System.Collections.Generic.List childFields = null) + private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] childFields = null) { switch (field.TypeType) { From 64881e6d288b5bf1dd2dd557bda3aa9d46d09794 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 6 Oct 2020 16:21:57 -0500 Subject: [PATCH 7/8] Address PR feedback. - Fill out StructArray.Fields with correct data - NestedType.Children => Fields to match C++ naming - Add back StructType.GetFieldIndex and add tests - Minor code formatting --- csharp/src/Apache.Arrow/Arrays/StructArray.cs | 27 +++++++++++++------ .../Ipc/ArrowReaderImplementation.cs | 10 +++---- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 27 +++++-------------- .../src/Apache.Arrow/Ipc/MessageSerializer.cs | 11 ++++---- csharp/src/Apache.Arrow/Types/ListType.cs | 4 +-- csharp/src/Apache.Arrow/Types/NestedType.cs | 21 ++++++++------- csharp/src/Apache.Arrow/Types/StructType.cs | 21 ++++++++++++++- .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 19 ++++++------- csharp/test/Apache.Arrow.Tests/TypeTests.cs | 20 ++++++++++++-- 9 files changed, 96 insertions(+), 64 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/StructArray.cs b/csharp/src/Apache.Arrow/Arrays/StructArray.cs index 79d78c3ded4..31aea9b4113 100644 --- a/csharp/src/Apache.Arrow/Arrays/StructArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/StructArray.cs @@ -16,33 +16,44 @@ using Apache.Arrow.Types; using System.Collections.Generic; using System.Linq; +using System.Threading; namespace Apache.Arrow { public class StructArray : Array { - private readonly IEnumerable _fields; + private IReadOnlyList _fields; - public IEnumerable Fields => _fields; + public IReadOnlyList Fields => + LazyInitializer.EnsureInitialized(ref _fields, () => InitializeFields()); public StructArray( IArrowType dataType, int length, IEnumerable children, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) - : this(new ArrayData( - dataType, length, nullCount, offset, new[] { nullBitmapBuffer }, - children.Select(child => child.Data))) - { } + : this(new ArrayData( + dataType, length, nullCount, offset, new[] { nullBitmapBuffer }, + children.Select(child => child.Data))) + { + _fields = children.ToArray(); + } public StructArray(ArrayData data) : base(data) { data.EnsureDataType(ArrowTypeId.Struct); - - _fields = new List(); } public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + private IReadOnlyList InitializeFields() + { + IArrowArray[] result = new IArrowArray[Data.Children.Length]; + for (int i = 0; i < Data.Children.Length; i++) + { + result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]); + } + return result; + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index 70e6cf28f18..a28bc5724ff 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -138,7 +138,6 @@ private List BuildArrays( return arrays; } - private ArrayData LoadPrimitiveField( ref RecordBatchEnumerator recordBatchEnumerator, Field field, @@ -166,7 +165,6 @@ private ArrayData LoadPrimitiveField( } ArrowBuffer[] arrowBuff; - ArrayData[] children; if (field.DataType.TypeId == ArrowTypeId.Struct) { arrowBuff = new[] { nullArrowBuffer }; @@ -178,12 +176,12 @@ private ArrayData LoadPrimitiveField( arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer }; } - children = GetChildren(ref recordBatchEnumerator, field, bodyData); + + ArrayData[] children = GetChildren(ref recordBatchEnumerator, field, bodyData); return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children); } - private ArrayData LoadVariableField( ref RecordBatchEnumerator recordBatchEnumerator, Field field, @@ -230,14 +228,14 @@ private ArrayData[] GetChildren( { if (!(field.DataType is NestedType type)) return null; - int childrenCount = type.Children.Count; + int childrenCount = type.Fields.Count; var children = new ArrayData[childrenCount]; for (int index = 0; index < childrenCount; index++) { recordBatchEnumerator.MoveNextNode(); Flatbuf.FieldNode childFieldNode = recordBatchEnumerator.CurrentNode; - Field childField = type.Children[index]; + Field childField = type.Fields[index]; ArrayData child = childField.DataType.IsFixedPrimitive() ? LoadPrimitiveField(ref recordBatchEnumerator, childField, in childFieldNode, bodyData) : LoadVariableField(ref recordBatchEnumerator, childField, in childFieldNode, bodyData); diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index d878cfa37ce..a7daf65db2a 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -103,28 +103,13 @@ public void Visit(BinaryArray array) _buffers.Add(CreateBuffer(array.ValueBuffer)); } - private void Visit(ArrayData[] children) - { - for (int i = 0; i < children.Length; i++) - { - Visit(ArrowArrayFactory.BuildArray(children[i])); - } - } - public void Visit(StructArray array) { _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - for (int i = 0; i < array.Data.Children.Length; i++) + + for (int i = 0; i < array.Fields.Count; i++) { - ArrayData childArray = array.Data.Children[i]; - if (childArray.Children != null) - { - Visit(childArray.Children); - } - for (int j = 0; j < childArray.Buffers.Length; j++) - { - _buffers.Add(CreateBuffer(childArray.Buffers[j])); - } + array.Fields[i].Accept(this); } } @@ -230,7 +215,7 @@ private void CountSelfAndChildrenNodes(IArrowType type, ref int count) { if (type is NestedType nestedType) { - foreach (Field childField in nestedType.Children) + foreach (Field childField in nestedType.Fields) { CountSelfAndChildrenNodes(childField.DataType, ref count); } @@ -493,12 +478,12 @@ private ValueTask WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken ca return System.Array.Empty>(); } - int childrenCount = type.Children.Count; + int childrenCount = type.Fields.Count; var children = new Offset[childrenCount]; for (int i = 0; i < childrenCount; i++) { - Field childField = type.Children[i]; + Field childField = type.Fields[i]; StringOffset childFieldNameOffset = Builder.CreateString(childField.Name); ArrowTypeFlatbufferBuilder.FieldType childFieldType = _fieldTypeBuilder.BuildFieldType(childField); VectorOffset childFieldChildrenVectorOffset = Builder.CreateVectorOfTables(GetChildrenFieldOffsets(childField)); diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs index be22b74f8be..9953ffe0385 100644 --- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs +++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs @@ -58,8 +58,8 @@ internal static Schema GetSchema(Flatbuf.Schema schema) for (int i = 0; i < schema.FieldsLength; i++) { Flatbuf.Field field = schema.Fields(i).GetValueOrDefault(); - Field arrowField = FieldFromFlatbuffer(field); - schemaBuilder.Field(arrowField); + + schemaBuilder.Field(FieldFromFlatbuffer(field)); } return schemaBuilder.Build(); @@ -71,11 +71,10 @@ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField) if (flatbufField.ChildrenLength > 0) { childFields = new Field[flatbufField.ChildrenLength]; - for (int j = 0; j < flatbufField.ChildrenLength; j++) + for (int i = 0; i < flatbufField.ChildrenLength; i++) { - Flatbuf.Field? childFlatbufField = flatbufField.Children(j); - Field childField = FieldFromFlatbuffer(childFlatbufField.Value); - childFields[j] = (childField); + Flatbuf.Field? childFlatbufField = flatbufField.Children(i); + childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value); } } return new Field(flatbufField.Name, GetFieldArrowType(flatbufField, childFields), flatbufField.Nullable); diff --git a/csharp/src/Apache.Arrow/Types/ListType.cs b/csharp/src/Apache.Arrow/Types/ListType.cs index ec702cfdd18..a006c2282dd 100644 --- a/csharp/src/Apache.Arrow/Types/ListType.cs +++ b/csharp/src/Apache.Arrow/Types/ListType.cs @@ -22,9 +22,9 @@ public sealed class ListType : NestedType public override ArrowTypeId TypeId => ArrowTypeId.List; public override string Name => "list"; - public Field ValueField => Children[0]; + public Field ValueField => Fields[0]; - public IArrowType ValueDataType => Children[0].DataType; + public IArrowType ValueDataType => Fields[0].DataType; public ListType(Field valueField) : base(valueField) { } diff --git a/csharp/src/Apache.Arrow/Types/NestedType.cs b/csharp/src/Apache.Arrow/Types/NestedType.cs index cdb25675808..da6b0140aa5 100644 --- a/csharp/src/Apache.Arrow/Types/NestedType.cs +++ b/csharp/src/Apache.Arrow/Types/NestedType.cs @@ -20,24 +20,27 @@ namespace Apache.Arrow.Types { public abstract class NestedType : ArrowType { - public IReadOnlyList Children { get; } + [Obsolete("Use `Fields` instead")] + public IReadOnlyList Children => Fields; - protected NestedType(IReadOnlyList children) + public IReadOnlyList Fields { get; } + + protected NestedType(IReadOnlyList fields) { - if (children == null || children.Count == 0) + if (fields == null || fields.Count == 0) { - throw new ArgumentNullException(nameof(children)); + throw new ArgumentNullException(nameof(fields)); } - Children = children; + Fields = fields; } - protected NestedType(Field child) + protected NestedType(Field field) { - if (child == null) + if (field == null) { - throw new ArgumentNullException(nameof(child)); + throw new ArgumentNullException(nameof(field)); } - Children = new List { child }; + Fields = new Field[] { field }; } } } diff --git a/csharp/src/Apache.Arrow/Types/StructType.cs b/csharp/src/Apache.Arrow/Types/StructType.cs index a7b900856de..79e83db165c 100644 --- a/csharp/src/Apache.Arrow/Types/StructType.cs +++ b/csharp/src/Apache.Arrow/Types/StructType.cs @@ -33,10 +33,29 @@ public Field GetFieldByName(string name, if (comparer == null) comparer = StringComparer.Ordinal; - return Children.FirstOrDefault( + return Fields.FirstOrDefault( field => comparer.Equals(field.Name, name)); } + public int GetFieldIndex(string name, + IEqualityComparer comparer = default) + { + if (comparer == null) + comparer = StringComparer.Ordinal; + + // TODO: Consider caching field index if this method is in hot path. + + for (int i = 0; i < Fields.Count; i++) + { + if (comparer.Equals(Fields[i].Name, name)) + { + return i; + } + } + + return -1; + } + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); } } diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index e50cade9bd6..b3d9837b06f 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -183,17 +183,18 @@ public ArrayComparer(IArrowArray expectedArray) public void Visit(StructArray array) { - Assert.Equal(_expectedArray.Length, array.Length); - Assert.Equal(_expectedArray.NullCount, array.NullCount); - Assert.Equal(_expectedArray.Offset, array.Offset); - Assert.Equal(_expectedArray.Data.Children.Length, array.Data.Children.Length); + Assert.IsAssignableFrom(_expectedArray); + StructArray expectedArray = (StructArray)_expectedArray; - ArrayData data = array.Data; - for (int i = 0; i < data.Children.Length; i++) + Assert.Equal(expectedArray.Length, array.Length); + Assert.Equal(expectedArray.NullCount, array.NullCount); + Assert.Equal(expectedArray.Offset, array.Offset); + Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); + Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); + + for (int i = 0; i < array.Fields.Count; i++) { - IArrowArray childArray = ArrowArrayFactory.BuildArray(data.Children[i]); - IArrowArray expectedChildArray = ArrowArrayFactory.BuildArray(_expectedArray.Data.Children[i]); - childArray.Accept(new ArrayComparer(expectedChildArray)); + array.Fields[i].Accept(new ArrayComparer(expectedArray.Fields[i])); } } diff --git a/csharp/test/Apache.Arrow.Tests/TypeTests.cs b/csharp/test/Apache.Arrow.Tests/TypeTests.cs index 4603cba31b2..6141a122a28 100644 --- a/csharp/test/Apache.Arrow.Tests/TypeTests.cs +++ b/csharp/test/Apache.Arrow.Tests/TypeTests.cs @@ -72,7 +72,7 @@ public void TestStructBasics() List fields = new List() { f0_nullable, f1_nullable, f2_nullable }; StructType struct_type = new StructType(fields); - var structFields = struct_type.Children; + var structFields = struct_type.Fields; Assert.True(FieldComparer.Equals(structFields.ElementAt(0), f0_nullable)); Assert.True(FieldComparer.Equals(structFields.ElementAt(1), f1_nullable)); Assert.True(FieldComparer.Equals(structFields.ElementAt(2), f2_nullable)); @@ -89,13 +89,29 @@ public void TestStructGetFieldByName() List fields = new List() { f0_nullable, f1_nullable, f2_nullable }; StructType struct_type = new StructType(fields); - var structFields = struct_type.Children; Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f0"), f0_nullable)); Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f1"), f1_nullable)); Assert.True(FieldComparer.Equals(struct_type.GetFieldByName("f2"), f2_nullable)); Assert.True(struct_type.GetFieldByName("not_found") == null); } + [Fact] + public void TestStructGetFieldIndex() + { + Field f0_nullable = new Field.Builder().Name("f0").DataType(Int32Type.Default).Build(); + Field f1_nullable = new Field.Builder().Name("f1").DataType(StringType.Default).Build(); + Field f2_nullable = new Field.Builder().Name("f2").DataType(UInt8Type.Default).Build(); + + StructType struct_type = new StructType(new[] { f0_nullable, f1_nullable, f2_nullable }); + + Assert.Equal(0, struct_type.GetFieldIndex("f0")); + Assert.Equal(1, struct_type.GetFieldIndex("f1")); + Assert.Equal(2, struct_type.GetFieldIndex("F2", StringComparer.OrdinalIgnoreCase)); + Assert.Equal(-1, struct_type.GetFieldIndex("F2")); + Assert.Equal(-1, struct_type.GetFieldIndex("F2", StringComparer.Ordinal)); + Assert.Equal(-1, struct_type.GetFieldIndex("not_found")); + } + [Fact] public void TestListTypeConstructor() { From e2e873e49cdec45406caeee72a85769943d42811 Mon Sep 17 00:00:00 2001 From: Eric Erhardt Date: Tue, 6 Oct 2020 16:30:32 -0500 Subject: [PATCH 8/8] Update docs now that Struct arrays are supported in C# --- csharp/README.md | 2 +- docs/source/status.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/README.md b/csharp/README.md index c6901a1e4e2..29c6086379b 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -94,6 +94,7 @@ This implementation is under development and may not be suitable for use in prod - Time64 - Binary (fixed-length) - List +- Struct ### Type Metadata @@ -119,7 +120,6 @@ This implementation is under development and may not be suitable for use in prod - Tensor - Table - Arrays - - Struct - Union - Dense - Sparse diff --git a/docs/source/status.rst b/docs/source/status.rst index 9dcc06ddab6..d43c3bc80ca 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -75,7 +75,7 @@ Data Types +-------------------+-------+-------+-------+------------+-------+-------+ | Large List | ✓ | ✓ | | | | | +-------------------+-------+-------+-------+------------+-------+-------+ -| Struct | ✓ | ✓ | ✓ | ✓ | | | +| Struct | ✓ | ✓ | ✓ | ✓ | ✓ | | +-------------------+-------+-------+-------+------------+-------+-------+ | Map | ✓ | ✓ | | ✓ | | | +-------------------+-------+-------+-------+------------+-------+-------+