From c0ef0af669773c74b87c0ad0fcf662b7071941cb Mon Sep 17 00:00:00 2001 From: dparu-dev <56425654+dparu-dev@users.noreply.github.com> Date: Fri, 11 Oct 2019 17:08:44 +1100 Subject: [PATCH 1/2] Added basic support for DictionaryArray types --- .../DictionaryArrays/Date32DictionaryArray.cs | 57 ++++ .../DictionaryArrays/DictionaryArray.cs | 158 +++++++++++ .../DictionaryArrayFactory.cs | 66 +++++ .../DictionaryArrays/DoubleDictionaryArray.cs | 40 +++ .../DictionaryArrays/FloatDictionaryArray.cs | 37 +++ .../DictionaryArrays/Int16DictionaryArray.cs | 36 +++ .../DictionaryArrays/Int32DictionaryArray.cs | 37 +++ .../DictionaryArrays/Int64DictionaryArray.cs | 37 +++ .../DictionaryArrays/Int8DictionaryArray.cs | 37 +++ .../PrimitiveDictionaryArray.cs | 259 ++++++++++++++++++ .../DictionaryArrays/StringDictionaryArray.cs | 202 ++++++++++++++ .../DictionaryArrays/UInt16DictionaryArray.cs | 37 +++ .../DictionaryArrays/UInt32DictionaryArray.cs | 37 +++ .../DictionaryArrays/UInt64DictionaryArray.cs | 37 +++ .../DictionaryArrays/UInt8DictionaryArray.cs | 37 +++ .../src/Apache.Arrow/Types/DictionaryType.cs | 21 ++ .../Apache.Arrow.Tests/ArrayBuilderTests.cs | 140 +++++++++- 17 files changed, 1274 insertions(+), 1 deletion(-) create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs create mode 100644 csharp/src/Apache.Arrow/Types/DictionaryType.cs diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs new file mode 100644 index 00000000000..570f3068aab --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class Date32DictionaryArray : PrimitiveDictionaryArray + { + private const int MillisecondsPerDay = 86400000; + + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(new DateBuilder()) { } + + internal class DateBuilder : PrimitiveDictionaryArrayBuilder + { + + public DateBuilder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base( + comparer, hashFunc) + { + } + + /// + public override Date32DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new Date32DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + /// + protected override int ConvertTo(DateTimeOffset value) + { + return (int)(value.ToUnixTimeMilliseconds() / MillisecondsPerDay); + + } + + /// + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(new DateBuilder(comparer, hashFunc)) + { + } + } + + public Date32DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public Date32DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Date32), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArray.cs new file mode 100644 index 00000000000..bdb8034c797 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArray.cs @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + public interface IDictionaryArray : IArrowArray + { + int UniqueValuesCount { get; } + ReadOnlySpan Indices { get; } + + ArrowTypeId EnclosedTypeId { get; } + } + + public interface IDictionaryArrayBuilder : IArrowArrayBuilder, IDictionaryArrayBuilder + where TArray : IDictionaryArray + where TBuilder : IArrowArrayBuilder + { + + } + + public interface IDictionaryArrayBuilder : IDictionaryArrayBuilder, IArrowArrayBuilder + where TArray : IDictionaryArray + { } + + public interface IDictionaryArrayBuilder : IArrowArrayBuilder + where TArray : IDictionaryArray + { + TArray Build(MemoryAllocator allocator); + } + + + public abstract class DictionaryArray : Array, IDictionaryArray + { + /// + protected DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data) + { + UniqueValuesCount = uniqueValuesCount; + data.EnsureDataType(ArrowTypeId.Dictionary); + EnclosedTypeId = ((DictionaryType) data.DataType).ContainedTypeId; + } + + + #region DictionaryArrayBuilderBase + + public abstract class DictionaryArrayBuilderBase + { + public delegate int HashFunctionDelegate(T obj); + + + protected readonly Dictionary Entries = new Dictionary(); + protected readonly IEqualityComparer Comparer = EqualityComparer.Default; + protected readonly HashFunctionDelegate HashFunction = EqualityComparer.Default.GetHashCode; + protected int NextIndex = 0; + + public ArrowBuffer.Builder IndicesBuffer { get; } + protected ArrowBuffer.Builder NullBitmap { get; set; } + + + protected DictionaryArrayBuilderBase(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) + { + IndicesBuffer = new ArrowBuffer.Builder(); + NullBitmap = new ArrowBuffer.Builder(); + + if (comparer != null) + Comparer = comparer; + + if (hashFunc != null) + HashFunction = hashFunc; + + } + + // This allows for custom comparers/hash functions for storing objects in a dictionary array, + // allowing for specialized use and potentially better performance in certain circumstances + protected struct DictionaryEntry + { + public DictionaryEntry(T value, IEqualityComparer comparer = null, HashFunctionDelegate hashFunction = null) + { + if (comparer == null) + comparer = EqualityComparer.Default; + _comparer = comparer; + + if (hashFunction == null) + hashFunction = comparer.GetHashCode; + + _hashFunction = hashFunction; + Value = value; + } + + public T Value { get; } + + private readonly IEqualityComparer _comparer; + + private readonly HashFunctionDelegate _hashFunction; + + public bool Equals(DictionaryEntry other) + { + return _comparer.Equals(Value, other.Value); + } + + /// + public override bool Equals(object obj) + { + if (ReferenceEquals(null, obj)) return false; + return obj is DictionaryEntry other && Equals(other); + } + + /// + public override int GetHashCode() + { + return _hashFunction(Value); + } + + public static bool operator ==(DictionaryEntry left, DictionaryEntry right) + { + return left.Equals(right); + } + + public static bool operator !=(DictionaryEntry left, DictionaryEntry right) + { + return !left.Equals(right); + } + } + + + } + #endregion + + /// + public int UniqueValuesCount { get; } + + /// + public abstract ReadOnlySpan Indices { get; } + + /// + public ArrowTypeId EnclosedTypeId { get; } + } + +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs new file mode 100644 index 00000000000..342c45dba16 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs @@ -0,0 +1,66 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public static class DictionaryArrayFactory + { + public static IDictionaryArray BuildArray(ArrayData data, int uniqueValuesCount) + { + data.EnsureDataType(ArrowTypeId.Dictionary); + var dictType = (DictionaryType) data.DataType; + if (dictType == null) + { + throw new ArgumentException($"Cannot infer enclosed type as data.DataType doesn't inherit from {typeof(DictionaryType)}." + + $" Is of type {data.DataType.GetType()}"); + } + + switch (dictType.ContainedTypeId) + { + case ArrowTypeId.UInt8: + return new UInt8DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Int8: + return new Int8DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.UInt16: + return new UInt16DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Int16: + return new Int16DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.UInt32: + return new UInt32DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Int32: + return new Int32DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.UInt64: + return new UInt64DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Int64: + return new Int64DictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Float: + return new FloatDictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Double: + return new DoubleDictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.String: + return new StringDictionaryArray(data, uniqueValuesCount); + case ArrowTypeId.Binary: + case ArrowTypeId.Timestamp: + case ArrowTypeId.List: + case ArrowTypeId.Struct: + case ArrowTypeId.Union: + case ArrowTypeId.Date64: + case ArrowTypeId.Date32: + case ArrowTypeId.Decimal: + case ArrowTypeId.Dictionary: + case ArrowTypeId.FixedSizedBinary: + case ArrowTypeId.HalfFloat: + case ArrowTypeId.Interval: + case ArrowTypeId.Map: + case ArrowTypeId.Time32: + case ArrowTypeId.Time64: + case ArrowTypeId.Boolean: + default: + throw new NotSupportedException($"An ArrowDictionaryArray cannot be built for type {data.DataType.TypeId}."); + } + + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs new file mode 100644 index 00000000000..54a51cc37e3 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs @@ -0,0 +1,40 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class DoubleDictionaryArray : PrimitiveDictionaryArray, IArrowArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override DoubleDictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new DoubleDictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public DoubleDictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public DoubleDictionaryArray(int length, int uniqueValuesCount, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Double), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValuesCount) + { + } + + public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor); + + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs new file mode 100644 index 00000000000..fbca0d031ae --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class FloatDictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override FloatDictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new FloatDictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public FloatDictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public FloatDictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Float), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs new file mode 100644 index 00000000000..0c1e56d6144 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs @@ -0,0 +1,36 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class Int16DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null){} + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override Int16DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new Int16DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public Int16DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public Int16DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Int16), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs new file mode 100644 index 00000000000..b2a71cd0b11 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class Int32DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override Int32DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new Int32DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public Int32DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public Int32DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Int32), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs new file mode 100644 index 00000000000..e559bc3c4bd --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class Int64DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override Int64DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new Int64DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public Int64DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public Int64DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Int64), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs new file mode 100644 index 00000000000..d30e3c161a0 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class Int8DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : this(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override Int8DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new Int8DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public Int8DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public Int8DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.Int8), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs new file mode 100644 index 00000000000..9c6a90edb17 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs @@ -0,0 +1,259 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays +{ + public abstract class PrimitiveDictionaryArray : DictionaryArray where T : struct + { + + public ArrowBuffer ValueBuffer => Data.Buffers[2]; + + public ReadOnlySpan Values => ValueBuffer.Span.CastTo().Slice(0, UniqueValuesCount); + + public ArrowBuffer IndicesBuffer => Data.Buffers[1]; + + public override ReadOnlySpan Indices => IndicesBuffer.Span.CastTo().Slice(0, Length); + + + /// + protected PrimitiveDictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + Data.EnsureBufferCount(3); + } + + protected PrimitiveDictionaryArray(IArrowType dataType, int length, int uniqueValuesCount, + ArrowBuffer nullBitmapBuffer, + ArrowBuffer indices, + ArrowBuffer dataBuffer, + int nullCount = 0, int offset = 0) + : this(new ArrayData(dataType, length, nullCount, offset, + new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValuesCount) + { } + + } + + + public abstract class PrimitiveDictionaryArrayBuilder : + DictionaryArray.DictionaryArrayBuilderBase, IDictionaryArrayBuilder + where TTo : struct, IEquatable + where TArray : IDictionaryArray + where TBuilder : class, IDictionaryArrayBuilder + + { + protected TBuilder Instance => this as TBuilder; + + protected IDictionaryArrayBuilder> ArrayBuilder { get; } + + + protected ArrowBuffer.Builder ValuesBuffer; + + internal PrimitiveDictionaryArrayBuilder(IDictionaryArrayBuilder> builder, + IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + ArrayBuilder = builder ?? throw new ArgumentNullException(nameof(builder)); + } + + public TArray Build(MemoryAllocator allocator = default) => ArrayBuilder.Build(allocator); + + public TBuilder Append(TFrom value) + { + var convertedVal = ConvertTo(value); + var temp = new DictionaryEntry(convertedVal, Comparer, HashFunction); + if (!Entries.TryGetValue(temp, out var index)) + { + index = NextIndex++; + Entries.Add(temp, index); + ValuesBuffer.Append(convertedVal); + } + + IndicesBuffer.Append(index); + return Instance; + } + + public TBuilder Append(ReadOnlySpan span) + { + ArrayBuilder.Reserve(span.Length); + foreach (var value in span) + { + Append(value); + } + return Instance; + } + + public TBuilder AppendRange(IEnumerable values) + { + ArrayBuilder.AppendRange(values.Select(ConvertTo)); + return Instance; + } + + public TBuilder Reserve(int capacity) + { + ArrayBuilder.Reserve(capacity); + return Instance; + } + + public TBuilder Resize(int length) + { + ArrayBuilder.Resize(length); + return Instance; + } + + public TBuilder Swap(int i, int j) + { + ArrayBuilder.Swap(i, j); + return Instance; + } + + public TBuilder Set(int index, TFrom value) + { + ArrayBuilder.Set(index, ConvertTo(value)); + return Instance; + } + + public TBuilder Clear() + { + ArrayBuilder.Clear(); + return Instance; + } + + protected abstract TTo ConvertTo(TFrom value); + + } + + + public abstract class PrimitiveDictionaryArrayBuilder : DictionaryArray.DictionaryArrayBuilderBase, IDictionaryArrayBuilder + where T : struct, IEquatable + where TArray : IDictionaryArray + where TBuilder : class, IDictionaryArrayBuilder + { + protected TBuilder Instance => this as TBuilder; + + protected ArrowBuffer.Builder ValuesBuffer; + + /// + protected PrimitiveDictionaryArrayBuilder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + ValuesBuffer = new ArrowBuffer.Builder(); + } + + //public PrimitiveDictionaryArray Build(MemoryAllocator allocator) + //{ + // // create a buffer of values + // var size = Unsafe.SizeOf(); + // int bufferLength = checked((int)BitUtility.RoundUpToMultipleOf64(size * Entries.Count)); + + // allocator = allocator ?? MemoryAllocator.Default.Value; + // allocator.Allocate(bufferLength); + + // var dataBuffer = new ArrowBuffer.Builder(Entries.Count); + // foreach (var entry in Entries.Keys) + // { + // dataBuffer.Append(entry.Value); + // } + + + // return new PrimitiveDictionaryArray(ArrowType, IndicesBuffer.Length, IndicesBuffer.Build(allocator), dataBuffer.Build(allocator), + // ArrowBuffer.Empty); + //} + + public TBuilder Append(T value) + { + var temp = new DictionaryEntry(value, Comparer, HashFunction); + if (!Entries.TryGetValue(temp, out var index)) + { + index = NextIndex++; + Entries.Add(temp, index); + ValuesBuffer.Append(value); + } + + IndicesBuffer.Append(index); + return Instance; + } + + public TBuilder Append(ReadOnlySpan span) + { + foreach (var t in span) + Append(t); + + return Instance; + } + + public TBuilder AppendRange(IEnumerable values) + { + foreach (var t in values) + Append(t); + + return Instance; + } + + /// + public TBuilder Reserve(int capacity) + { + IndicesBuffer.Reserve(capacity); + ValuesBuffer.Reserve(capacity); + return Instance; + } + + /// + public TBuilder Resize(int length) + { + throw new NotImplementedException(); + } + + /// + public TBuilder Swap(int i, int j) + { + if (i < 0 || j < 0 || i > IndicesBuffer.Length || j > IndicesBuffer.Length) + { + throw new ArgumentOutOfRangeException(); + } + + var span = IndicesBuffer.Memory.Span.CastTo(); + var temp = span[i]; + span[i] = span[j]; + span[j] = temp; + + return Instance; + } + + /// + public TBuilder Set(int index, T value) + { + if (index < 0 || index > IndicesBuffer.Length) + { + throw new ArgumentOutOfRangeException(); + } + + var temp = new DictionaryEntry(value, Comparer, HashFunction); + if (!Entries.TryGetValue(temp, out var valueIndex)) + { + valueIndex = NextIndex++; + Entries.Add(temp, valueIndex); + ValuesBuffer.Append(value); + } + + IndicesBuffer.Memory.Span.CastTo()[index] = valueIndex; + + return Instance; + } + + /// + public TBuilder Clear() + { + Entries.Clear(); + IndicesBuffer.Clear(); + ValuesBuffer.Clear(); + NextIndex = 0; + return Instance; + } + + /// + public abstract TArray Build(MemoryAllocator allocator); + } + +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs new file mode 100644 index 00000000000..5ed15c31ede --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs @@ -0,0 +1,202 @@ +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays +{ + public class StringDictionaryArray : DictionaryArray + { + public static readonly Encoding DefaultEncoding = Encoding.UTF8; + + public ArrowBuffer ValueBuffer => Data.Buffers[2]; + + public ReadOnlySpan ValueOffsets => ValueOffsetsBuffer.Span.CastTo().Slice(0, UniqueValuesCount+1); + + public ArrowBuffer IndicesBuffer => Data.Buffers[1]; + + public override ReadOnlySpan Indices => IndicesBuffer.Span.CastTo().Slice(0, Length); + + public ReadOnlySpan Values => ValueBuffer.Span.CastTo().Slice(0, ValueOffsets[UniqueValuesCount]); + + public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[3]; + + /// + public StringDictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + Data.EnsureBufferCount(4); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetValueOffset(int index) + { + index = Indices[index]; // get dictionary value index from entry index + return ValueOffsets[Offset + index]; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetValueLength(int index) + { + // get dictionary value index from entry index + index = Indices[index]; + var offsets = ValueOffsets; + var offset = Offset + index; + + return offsets[offset + 1] - offsets[offset]; + } + + public ReadOnlySpan GetBytes(int index) + { + var offset = GetValueOffset(index); + var length = GetValueLength(index); + + return ValueBuffer.Span.Slice(offset, length); + } + + public string GetString(int index, Encoding encoding = default) + { + encoding = encoding ?? DefaultEncoding; + + var bytes = GetBytes(index); + + unsafe + { + fixed (byte* data = &MemoryMarshal.GetReference(bytes)) + return encoding.GetString(data, bytes.Length); + } + } + + public StringDictionaryArray(int length, + int uniqueValues, + ArrowBuffer nullBitmapBuffer, + ArrowBuffer indices, + ArrowBuffer dataBuffer, + ArrowBuffer dataOffsets, + int nullCount = 0, int offset = 0) + : this(new ArrayData(DictionaryType.Default(ArrowTypeId.String), length, nullCount, offset, + new[] { nullBitmapBuffer, indices, dataBuffer, dataOffsets }), uniqueValues) + { } + + public class StringDictionaryBuilder : DictionaryArrayBuilderBase, IDictionaryArrayBuilder + { + + public Encoding Encoding; + + protected ArrowBuffer.Builder ValueOffsets { get; } + protected ArrowBuffer.Builder ValueBuffer { get; } + protected int Offset { get; set; } + + protected int NullCount = 0; + + public StringDictionaryBuilder() : this(null, null, null){ } + + + /// + public StringDictionaryBuilder(Encoding encoding = null, IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + if (encoding == null) + encoding = DefaultEncoding; + + Encoding = encoding; + ValueOffsets = new ArrowBuffer.Builder(); + ValueBuffer = new ArrowBuffer.Builder(); + } + + + public StringDictionaryArray Build(MemoryAllocator allocator) + { + ValueOffsets.Append(Offset); + return new StringDictionaryArray(IndicesBuffer.Length, Entries.Count, NullBitmap.Build(allocator), IndicesBuffer.Build(allocator), + ValueBuffer.Build(allocator), ValueOffsets.Build(allocator), NullCount); + } + + + /// + public StringDictionaryBuilder Append(string value) + { + if (value == null) + { + NullBitmap.Append(0); + IndicesBuffer.Append(-1); // need something in the indices buffer to make sure it makes sense + NullCount++; + return this; + } + + + var temp = new DictionaryEntry(value, Comparer, HashFunction); + if (!Entries.TryGetValue(temp, out var index)) + { + index = NextIndex++; + Entries.Add(temp, index); + AppendStringToBuffer(value); + } + + NullBitmap.Append(1); + IndicesBuffer.Append(index); + return this; + } + + private void AppendStringToBuffer(string s) + { + var span = Encoding.GetBytes(s); + ValueOffsets.Append(Offset); + ValueBuffer.Append(span); + Offset += span.Length; + } + + public StringDictionaryBuilder Append(ReadOnlySpan span) + { + foreach (var s in span) + { + Append(s); + } + + return this; + } + + public StringDictionaryBuilder AppendRange(IEnumerable values) + { + foreach (var s in values) + { + Append(s); + } + + return this; + } + + public StringDictionaryBuilder Reserve(int capacity) + { + IndicesBuffer.Reserve(capacity); + return this; + } + + public StringDictionaryBuilder Resize(int length) + { + IndicesBuffer.Reserve(length); + return this; + } + + public StringDictionaryBuilder Swap(int i, int j) + { + throw new NotImplementedException(); + } + + public StringDictionaryBuilder Set(int index, string value) + { + throw new NotImplementedException(); + } + + public StringDictionaryBuilder Clear() + { + Entries.Clear(); + IndicesBuffer.Clear(); + NullBitmap.Clear(); + NextIndex = 0; + return this; + } + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs new file mode 100644 index 00000000000..55fd3599b96 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class UInt16DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override UInt16DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new UInt16DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public UInt16DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public UInt16DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.UInt16), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs new file mode 100644 index 00000000000..7bbd27aaa30 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class UInt32DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override UInt32DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new UInt32DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public UInt32DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public UInt32DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.UInt32), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs new file mode 100644 index 00000000000..adf6925244f --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class UInt64DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override UInt64DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new UInt64DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public UInt64DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public UInt64DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.UInt64), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs new file mode 100644 index 00000000000..136ff1f05f2 --- /dev/null +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Apache.Arrow.Memory; +using Apache.Arrow.Types; + +namespace Apache.Arrow.Arrays.DictionaryArrays +{ + public class UInt8DictionaryArray : PrimitiveDictionaryArray + { + public class Builder : PrimitiveDictionaryArrayBuilder + { + public Builder() : base(null, null) { } + + public Builder(IEqualityComparer comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc) + { + } + + public override UInt8DictionaryArray Build(MemoryAllocator allocator) + { + allocator = allocator ?? MemoryAllocator.Default.Value; + + return new UInt8DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator), + ArrowBuffer.Empty); + } + } + + public UInt8DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount) + { + } + + public UInt8DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) : + this(new ArrayData(DictionaryType.Default(ArrowTypeId.UInt8), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues) + { + } + } +} diff --git a/csharp/src/Apache.Arrow/Types/DictionaryType.cs b/csharp/src/Apache.Arrow/Types/DictionaryType.cs new file mode 100644 index 00000000000..a96907c1100 --- /dev/null +++ b/csharp/src/Apache.Arrow/Types/DictionaryType.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace Apache.Arrow.Types +{ + public sealed class DictionaryType : ArrowType + { + DictionaryType(ArrowTypeId containedTypeId) + { + ContainedTypeId = containedTypeId; + } + public static DictionaryType Default(ArrowTypeId containedTypeId) => new DictionaryType(containedTypeId); + + public override ArrowTypeId TypeId => ArrowTypeId.Dictionary; + public ArrowTypeId ContainedTypeId { get; } + public override string Name => "dictionary"; + + public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs index 3d5cc7ff597..94b4836695e 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrayBuilderTests.cs @@ -15,6 +15,9 @@ using Apache.Arrow.Types; using System; +using System.Reflection.Metadata; +using Apache.Arrow.Arrays; +using Apache.Arrow.Arrays.DictionaryArrays; using Xunit; namespace Apache.Arrow.Tests @@ -23,6 +26,9 @@ public class ArrayBuilderTests { // TODO: Test various builder invariants (Append, AppendRange, Clear, Resize, Reserve, etc) + private static readonly string[] StringDictionaryElems = new string[] {"string1", "string2", "string3" }; + private static readonly string[] StringDictionaryElemsDupes = new string[] {"string1", "string2", "string3"}; + [Fact] public void PrimitiveArrayBuildersProduceExpectedArray() { @@ -38,6 +44,104 @@ public void PrimitiveArrayBuildersProduceExpectedArray() TestArrayBuilder(x => x.Append(10).Append(20).Append(30)); } + [Fact] + public void PrimitiveDictionaryArrayBuildersProduceExpectedArray() + { + // simple case, no duplicates + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + TestPrimitiveDictionaryArrayBuilderNoDuplicates(x => x.Append(10).Append(20).Append(30)); + + // with a duplicate value + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + TestPrimitiveDictionaryArrayBuilderDuplicates(x => x.Append(10).Append(20).Append(20)); + } + + [Fact] + public void TestStringDictionaryArrayBuilderSimpleCase() + { + var dictArr = new StringDictionaryArray.StringDictionaryBuilder() + .AppendRange(StringDictionaryElems) + .Build(default); + + Assert.Equal(3, dictArr.Length); + Assert.Equal(3, dictArr.Indices.Length); + Assert.Equal(0, dictArr.NullCount); + + var arr = new StringArray.Builder() + .AppendRange(StringDictionaryElems) + .Build(default); + + + for (int i = 0; i < arr.Length; i++) + { + Assert.Equal(dictArr.GetString(i), arr.GetString(i)); + } + } + + [Fact] + public void TestStringDictionaryArrayBuilderDuplicates() + { + var dictArr = new StringDictionaryArray.StringDictionaryBuilder() + .AppendRange(StringDictionaryElems) + .AppendRange(StringDictionaryElems) + .Build(default); + + Assert.Equal(6, dictArr.Length); + Assert.Equal(6, dictArr.Indices.Length); + Assert.Equal(0, dictArr.NullCount); + Assert.Equal(3, dictArr.UniqueValuesCount); + + + var arr = new StringArray.Builder() + .AppendRange(StringDictionaryElems) + .AppendRange(StringDictionaryElems) + .Build(default); + + + for (int i = 0; i < arr.Length; i++) + { + Assert.Equal(dictArr.GetString(i), arr.GetString(i)); + } + } + + [Fact] + public void TestStringDictionaryArrayBuilderDuplicatesAndNulls() + { + var dictArr = new StringDictionaryArray.StringDictionaryBuilder() + .Append((string)null) + .AppendRange(StringDictionaryElems) + .AppendRange(StringDictionaryElems) + .Build(default); + + Assert.Equal(7, dictArr.Length); + Assert.Equal(7, dictArr.Indices.Length); + Assert.Equal(1, dictArr.NullCount); + Assert.Equal(3, dictArr.UniqueValuesCount); + + var arr = new StringArray.Builder() + .AppendRange(StringDictionaryElems) + .AppendRange(StringDictionaryElems) + .Build(default); + + + for (int i = 0; i < arr.Length; i++) + { + Assert.Equal(dictArr.GetString(i+1), arr.GetString(i)); + } + } + public class TimestampArrayBuilder { [Fact] @@ -67,6 +171,40 @@ private static void TestArrayBuilder(Action(Action action) + where T : struct, IEquatable + where TArray : PrimitiveDictionaryArray, IDictionaryArray + where TArrayBuilder : IDictionaryArrayBuilder, new() + { + var builder = new TArrayBuilder(); + action(builder); + var array = builder.Build(default); + Assert.NotNull(array); + Assert.IsAssignableFrom(array); + Assert.Equal(3, array.Length); + Assert.Equal(3, array.Indices.Length); + Assert.Equal(0, array.NullCount); + Assert.Equal(3, array.Values.Length); + + } + + private static void TestPrimitiveDictionaryArrayBuilderDuplicates(Action action) + where T : struct, IEquatable + where TArray : PrimitiveDictionaryArray, IDictionaryArray + where TArrayBuilder : IDictionaryArrayBuilder, new() + { + var builder = new TArrayBuilder(); + + action(builder); + var array = (PrimitiveDictionaryArray)builder.Build(default); + Assert.NotNull(array); + Assert.IsAssignableFrom>(array); + Assert.Equal(3, array.Length); + Assert.Equal(3, array.Indices.Length); + Assert.Equal(2, array.Values.Length); + Assert.Equal(0, array.NullCount); + } + } } From 9ed262cb9d537facd4ac1abd2cc7425eadb0a151 Mon Sep 17 00:00:00 2001 From: dparu-dev <56425654+dparu-dev@users.noreply.github.com> Date: Mon, 14 Oct 2019 10:43:42 +1100 Subject: [PATCH 2/2] I forgot the license text like an idiot, fixed now --- .../DictionaryArrays/Date32DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/DictionaryArrayFactory.cs | 17 ++++++++++++++++- .../DictionaryArrays/DoubleDictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/FloatDictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/Int16DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/Int32DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/Int64DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/Int8DictionaryArray.cs | 17 ++++++++++++++++- .../PrimitiveDictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/StringDictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/UInt16DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/UInt32DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/UInt64DictionaryArray.cs | 17 ++++++++++++++++- .../DictionaryArrays/UInt8DictionaryArray.cs | 17 ++++++++++++++++- csharp/src/Apache.Arrow/Types/DictionaryType.cs | 17 ++++++++++++++++- 15 files changed, 240 insertions(+), 15 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs index 570f3068aab..4d79961d6bc 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Date32DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs index 342c45dba16..dd7102aabad 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArrayFactory.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Types; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs index 54a51cc37e3..8e2e703fa4b 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DoubleDictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs index fbca0d031ae..0d7b6ab1106 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/FloatDictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs index 0c1e56d6144..f2602e61080 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int16DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs index b2a71cd0b11..40f0d69d975 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int32DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs index e559bc3c4bd..ff091812a5d 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int64DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs index d30e3c161a0..e2664735fcb 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/Int8DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs index 9c6a90edb17..c007439d475 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/PrimitiveDictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Linq; using System.Runtime.CompilerServices; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs index 5ed15c31ede..35d07ea03ee 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/StringDictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs index 55fd3599b96..7231820c3eb 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt16DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs index 7bbd27aaa30..4250d38b2a3 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt32DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs index adf6925244f..8375c7c59cf 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt64DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs index 136ff1f05f2..e124552d863 100644 --- a/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/DictionaryArrays/UInt8DictionaryArray.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text; using Apache.Arrow.Memory; diff --git a/csharp/src/Apache.Arrow/Types/DictionaryType.cs b/csharp/src/Apache.Arrow/Types/DictionaryType.cs index a96907c1100..ccafd85bdf5 100644 --- a/csharp/src/Apache.Arrow/Types/DictionaryType.cs +++ b/csharp/src/Apache.Arrow/Types/DictionaryType.cs @@ -1,4 +1,19 @@ -using System; +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; using System.Collections.Generic; using System.Text;