Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Text;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow.Arrays.DictionaryArrays
{
public class Date32DictionaryArray : PrimitiveDictionaryArray<int>
{
private const int MillisecondsPerDay = 86400000;

public class Builder : PrimitiveDictionaryArrayBuilder<DateTimeOffset, int, Date32DictionaryArray, Builder>
{
public Builder() : base(new DateBuilder()) { }

internal class DateBuilder : PrimitiveDictionaryArrayBuilder<int, Date32DictionaryArray, DateBuilder>
{

public DateBuilder(IEqualityComparer<int> comparer = null, HashFunctionDelegate hashFunc = null) : base(
comparer, hashFunc)
{
}

/// <inheritdoc />
public override Date32DictionaryArray Build(MemoryAllocator allocator)
{
allocator = allocator ?? MemoryAllocator.Default.Value;

return new Date32DictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator),
ArrowBuffer.Empty);
}
}

/// <inheritdoc />
protected override int ConvertTo(DateTimeOffset value)
{
return (int)(value.ToUnixTimeMilliseconds() / MillisecondsPerDay);

}

/// <inheritdoc />
public Builder(IEqualityComparer<int> comparer = null, HashFunctionDelegate hashFunc = null) : base(new DateBuilder(comparer, hashFunc))
{
}
}

public Date32DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount)
{
}

public Date32DictionaryArray(int length, int uniqueValues, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) :
this(new ArrayData(DictionaryType.Default(ArrowTypeId.Date32), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValues)
{
}
}
}
158 changes: 158 additions & 0 deletions csharp/src/Apache.Arrow/Arrays/DictionaryArrays/DictionaryArray.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Text;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow
{
public interface IDictionaryArray : IArrowArray
{
int UniqueValuesCount { get; }
ReadOnlySpan<int> Indices { get; }

ArrowTypeId EnclosedTypeId { get; }
}

public interface IDictionaryArrayBuilder<T, out TArray, out TBuilder> : IArrowArrayBuilder<T, TArray, TBuilder>, IDictionaryArrayBuilder<T, TArray>
where TArray : IDictionaryArray
where TBuilder : IArrowArrayBuilder<TArray>
{

}

public interface IDictionaryArrayBuilder<T, out TArray> : IDictionaryArrayBuilder<TArray>, IArrowArrayBuilder<T, TArray>
where TArray : IDictionaryArray
{ }

public interface IDictionaryArrayBuilder<out TArray> : IArrowArrayBuilder<TArray>
where TArray : IDictionaryArray
{
TArray Build(MemoryAllocator allocator);
}


public abstract class DictionaryArray : Array, IDictionaryArray
{
/// <inheritdoc />
protected DictionaryArray(ArrayData data, int uniqueValuesCount) : base(data)
{
UniqueValuesCount = uniqueValuesCount;
data.EnsureDataType(ArrowTypeId.Dictionary);
EnclosedTypeId = ((DictionaryType) data.DataType).ContainedTypeId;
}


#region DictionaryArrayBuilderBase<T>

public abstract class DictionaryArrayBuilderBase<T>
{
public delegate int HashFunctionDelegate(T obj);


protected readonly Dictionary<DictionaryEntry, int> Entries = new Dictionary<DictionaryEntry, int>();
protected readonly IEqualityComparer<T> Comparer = EqualityComparer<T>.Default;
protected readonly HashFunctionDelegate HashFunction = EqualityComparer<T>.Default.GetHashCode;
protected int NextIndex = 0;

public ArrowBuffer.Builder<int> IndicesBuffer { get; }
protected ArrowBuffer.Builder<byte> NullBitmap { get; set; }


protected DictionaryArrayBuilderBase(IEqualityComparer<T> comparer = null, HashFunctionDelegate hashFunc = null)
{
IndicesBuffer = new ArrowBuffer.Builder<int>();
NullBitmap = new ArrowBuffer.Builder<byte>();

if (comparer != null)
Comparer = comparer;

if (hashFunc != null)
HashFunction = hashFunc;

}

// This allows for custom comparers/hash functions for storing objects in a dictionary array,
// allowing for specialized use and potentially better performance in certain circumstances
protected struct DictionaryEntry
{
public DictionaryEntry(T value, IEqualityComparer<T> comparer = null, HashFunctionDelegate hashFunction = null)
{
if (comparer == null)
comparer = EqualityComparer<T>.Default;
_comparer = comparer;

if (hashFunction == null)
hashFunction = comparer.GetHashCode;

_hashFunction = hashFunction;
Value = value;
}

public T Value { get; }

private readonly IEqualityComparer<T> _comparer;

private readonly HashFunctionDelegate _hashFunction;

public bool Equals(DictionaryEntry other)
{
return _comparer.Equals(Value, other.Value);
}

/// <inheritdoc />
public override bool Equals(object obj)
{
if (ReferenceEquals(null, obj)) return false;
return obj is DictionaryEntry other && Equals(other);
}

/// <inheritdoc />
public override int GetHashCode()
{
return _hashFunction(Value);
}

public static bool operator ==(DictionaryEntry left, DictionaryEntry right)
{
return left.Equals(right);
}

public static bool operator !=(DictionaryEntry left, DictionaryEntry right)
{
return !left.Equals(right);
}
}


}
#endregion

/// <inheritdoc />
public int UniqueValuesCount { get; }

/// <inheritdoc />
public abstract ReadOnlySpan<int> Indices { get; }

/// <inheritdoc />
public ArrowTypeId EnclosedTypeId { get; }
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Text;
using Apache.Arrow.Types;

namespace Apache.Arrow.Arrays.DictionaryArrays
{
public static class DictionaryArrayFactory
{
public static IDictionaryArray BuildArray(ArrayData data, int uniqueValuesCount)
{
data.EnsureDataType(ArrowTypeId.Dictionary);
var dictType = (DictionaryType) data.DataType;
if (dictType == null)
{
throw new ArgumentException($"Cannot infer enclosed type as data.DataType doesn't inherit from {typeof(DictionaryType)}." +
$" Is of type {data.DataType.GetType()}");
}

switch (dictType.ContainedTypeId)
{
case ArrowTypeId.UInt8:
return new UInt8DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Int8:
return new Int8DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.UInt16:
return new UInt16DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Int16:
return new Int16DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.UInt32:
return new UInt32DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Int32:
return new Int32DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.UInt64:
return new UInt64DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Int64:
return new Int64DictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Float:
return new FloatDictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Double:
return new DoubleDictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.String:
return new StringDictionaryArray(data, uniqueValuesCount);
case ArrowTypeId.Binary:
case ArrowTypeId.Timestamp:
case ArrowTypeId.List:
case ArrowTypeId.Struct:
case ArrowTypeId.Union:
case ArrowTypeId.Date64:
case ArrowTypeId.Date32:
case ArrowTypeId.Decimal:
case ArrowTypeId.Dictionary:
case ArrowTypeId.FixedSizedBinary:
case ArrowTypeId.HalfFloat:
case ArrowTypeId.Interval:
case ArrowTypeId.Map:
case ArrowTypeId.Time32:
case ArrowTypeId.Time64:
case ArrowTypeId.Boolean:
default:
throw new NotSupportedException($"An ArrowDictionaryArray cannot be built for type {data.DataType.TypeId}.");
}

}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Text;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;

namespace Apache.Arrow.Arrays.DictionaryArrays
{
public class DoubleDictionaryArray : PrimitiveDictionaryArray<double>, IArrowArray
{
public class Builder : PrimitiveDictionaryArrayBuilder<double, DoubleDictionaryArray, Builder>
{
public Builder() : base(null, null) { }

public Builder(IEqualityComparer<double> comparer = null, HashFunctionDelegate hashFunc = null) : base(comparer, hashFunc)
{
}

public override DoubleDictionaryArray Build(MemoryAllocator allocator)
{
allocator = allocator ?? MemoryAllocator.Default.Value;

return new DoubleDictionaryArray(IndicesBuffer.Length, ValuesBuffer.Length, IndicesBuffer.Build(allocator), ValuesBuffer.Build(allocator),
ArrowBuffer.Empty);
}
}

public DoubleDictionaryArray(ArrayData data, int uniqueValuesCount) : base(data, uniqueValuesCount)
{
}

public DoubleDictionaryArray(int length, int uniqueValuesCount, ArrowBuffer indices, ArrowBuffer dataBuffer, ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0) :
this(new ArrayData(DictionaryType.Default(ArrowTypeId.Double), length, nullCount, offset, new[] { nullBitmapBuffer, indices, dataBuffer }), uniqueValuesCount)
{
}

public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);

}
}
Loading