Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions c_glib/arrow-glib/basic-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2091,6 +2091,9 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
case arrow::Type::type::STRUCT:
type = GARROW_TYPE_STRUCT_ARRAY;
break;
case arrow::Type::type::DICTIONARY:
type = GARROW_TYPE_DICTIONARY_ARRAY;
break;
default:
type = GARROW_TYPE_ARRAY;
break;
Expand Down
107 changes: 107 additions & 0 deletions c_glib/arrow-glib/composite-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ G_BEGIN_DECLS
* or more structs. One struct has zero or more fields. If you don't
* have Arrow format data, you need to use #GArrowStructArrayBuilder
* to create a new array.
*
* #GArrowDictionaryArray is a class for dictionary array. It can
* store data with dictionary and indices. It's space effective than
* normal array when the array has many same values. You can convert a
* normal array to dictionary array by garrow_array_dictionary_encode().
*/

G_DEFINE_TYPE(GArrowListArray, \
Expand Down Expand Up @@ -234,4 +239,106 @@ garrow_struct_array_get_fields(GArrowStructArray *array)
return g_list_reverse(fields);
}


G_DEFINE_TYPE(GArrowDictionaryArray, \
garrow_dictionary_array, \
GARROW_TYPE_ARRAY)

static void
garrow_dictionary_array_init(GArrowDictionaryArray *object)
{
}

static void
garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass)
{
}

/**
* garrow_dictionary_array_new:
* @data_type: The data type of dictionary.
* @indices: The indices of values in dictionary.
*
* Returns: A newly created #GArrowDictionaryArray.
*
* Since: 0.8.0
*/
GArrowDictionaryArray *
garrow_dictionary_array_new(GArrowDataType *data_type,
GArrowArray *indices)
{
const auto arrow_data_type = garrow_data_type_get_raw(data_type);
const auto arrow_indices = garrow_array_get_raw(indices);
auto arrow_dictionary_array =
std::make_shared<arrow::DictionaryArray>(arrow_data_type,
arrow_indices);
auto arrow_array =
std::static_pointer_cast<arrow::Array>(arrow_dictionary_array);
return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array));
}

/**
* garrow_dictionary_array_get_indices:
* @array: A #GArrowDictionaryArray.
*
* Returns: (transfer full): The indices of values in dictionary.
*
* Since: 0.8.0
*/
GArrowArray *
garrow_dictionary_array_get_indices(GArrowDictionaryArray *array)
{
auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
auto arrow_dictionary_array =
std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
auto arrow_indices = arrow_dictionary_array->indices();
return garrow_array_new_raw(&arrow_indices);
}

/**
* garrow_dictionary_array_get_dictionary:
* @array: A #GArrowDictionaryArray.
*
* Returns: (transfer full): The dictionary of this array.
*
* Since: 0.8.0
*/
GArrowArray *
garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array)
{
auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
auto arrow_dictionary_array =
std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
auto arrow_dictionary = arrow_dictionary_array->dictionary();
return garrow_array_new_raw(&arrow_dictionary);
}

/**
* garrow_dictionary_array_get_dictionary_data_type:
* @array: A #GArrowDictionaryArray.
*
* Returns: (transfer full): The dictionary data type of this array.
*
* Since: 0.8.0
*/
GArrowDictionaryDataType *
garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array)
{
auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
auto arrow_dictionary_array =
std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
auto arrow_dictionary_data_type = arrow_dictionary_array->dict_type();
auto const_arrow_data_type =
static_cast<const arrow::DataType *>(arrow_dictionary_data_type);
auto arrow_data_type = const_cast<arrow::DataType *>(const_arrow_data_type);
struct NullDeleter {
void operator()(arrow::DataType *data_type) {
}
};
std::shared_ptr<arrow::DataType>
shared_arrow_data_type(arrow_data_type, NullDeleter());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a possibility of this object being deleted while the result value of this function is still alive?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. It may be deleted because this dictionary array doesn't have reference to this object.

I created the meaningless shared_ptr because the current GArrowDataType requires std::shared_ptr<arrow::DataType>. It's just an implementation detail. I can change GArrowDataType to support raw arrow::DataType * but it increases complexity. So I chose this approach.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, thank you

auto data_type = garrow_data_type_new_raw(&shared_arrow_data_type);
return GARROW_DICTIONARY_DATA_TYPE(data_type);
}

G_END_DECLS
21 changes: 21 additions & 0 deletions c_glib/arrow-glib/composite-array.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,4 +129,25 @@ GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array,
gint i);
GList *garrow_struct_array_get_fields(GArrowStructArray *array);


#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
garrow_dictionary_array,
GARROW,
DICTIONARY_ARRAY,
GArrowArray)
struct _GArrowDictionaryArrayClass
{
GArrowArrayClass parent_class;
};

GArrowDictionaryArray *
garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices);
GArrowArray *
garrow_dictionary_array_get_indices(GArrowDictionaryArray *array);
GArrowArray *
garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array);
GArrowDictionaryDataType *
garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array);

G_END_DECLS
8 changes: 8 additions & 0 deletions c_glib/arrow-glib/composite-data-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass)
* @ordered: Whether dictionary contents are ordered or not.
*
* Returns: The newly created dictionary data type.
*
* Since: 0.8.0
*/
GArrowDictionaryDataType *
garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
Expand All @@ -177,6 +179,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: (transfer full): The #GArrowDataType of index.
*
* Since: 0.8.0
*/
GArrowDataType *
garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type)
Expand All @@ -193,6 +197,8 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_t
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: (transfer full): The dictionary as #GArrowArray.
*
* Since: 0.8.0
*/
GArrowArray *
garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
Expand All @@ -209,6 +215,8 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: Whether dictionary contents are ordered or not.
*
* Since: 0.8.0
*/
gboolean
garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type)
Expand Down
63 changes: 63 additions & 0 deletions c_glib/test/test-dictionary-array.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

class TestDictionaryArray < Test::Unit::TestCase
include Helper::Buildable

def setup
@index_data_type = Arrow::Int32DataType.new
@dictionary = build_string_array(["C", "C++", "Ruby"])
@ordered = false
@data_type = Arrow::DictionaryDataType.new(@index_data_type,
@dictionary,
@ordered)
end

sub_test_case(".new") do
def test_new
indices = build_int32_array([0, 2, 2, 1, 0])
dictionary_array = Arrow::DictionaryArray.new(@data_type, indices)
assert_equal(<<-STRING.chomp, dictionary_array.to_s)

-- is_valid: all not null
-- dictionary: ["C", "C++", "Ruby"]
-- indices: [0, 2, 2, 1, 0]
STRING
end
end

sub_test_case("instance methods") do
def setup
super
@indices = build_int32_array([0, 2, 2, 1, 0])
@dictionary_array = Arrow::DictionaryArray.new(@data_type, @indices)
end

def test_indices
assert_equal(@indices, @dictionary_array.indices)
end

def test_dictionary
assert_equal(@dictionary, @dictionary_array.dictionary)
end

def test_dictionary_data_type
assert_equal(@data_type,
@dictionary_array.dictionary_data_type)
end
end
end