diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index 0698a040980..36cf4603a55 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -2091,6 +2091,9 @@ garrow_array_new_raw(std::shared_ptr *arrow_array) case arrow::Type::type::STRUCT: type = GARROW_TYPE_STRUCT_ARRAY; break; + case arrow::Type::type::DICTIONARY: + type = GARROW_TYPE_DICTIONARY_ARRAY; + break; default: type = GARROW_TYPE_ARRAY; break; diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp index 445103d3740..14cc46d733e 100644 --- a/c_glib/arrow-glib/composite-array.cpp +++ b/c_glib/arrow-glib/composite-array.cpp @@ -44,6 +44,11 @@ G_BEGIN_DECLS * or more structs. One struct has zero or more fields. If you don't * have Arrow format data, you need to use #GArrowStructArrayBuilder * to create a new array. + * + * #GArrowDictionaryArray is a class for dictionary array. It can + * store data with dictionary and indices. It's space effective than + * normal array when the array has many same values. You can convert a + * normal array to dictionary array by garrow_array_dictionary_encode(). */ G_DEFINE_TYPE(GArrowListArray, \ @@ -234,4 +239,106 @@ garrow_struct_array_get_fields(GArrowStructArray *array) return g_list_reverse(fields); } + +G_DEFINE_TYPE(GArrowDictionaryArray, \ + garrow_dictionary_array, \ + GARROW_TYPE_ARRAY) + +static void +garrow_dictionary_array_init(GArrowDictionaryArray *object) +{ +} + +static void +garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass) +{ +} + +/** + * garrow_dictionary_array_new: + * @data_type: The data type of dictionary. + * @indices: The indices of values in dictionary. + * + * Returns: A newly created #GArrowDictionaryArray. + * + * Since: 0.8.0 + */ +GArrowDictionaryArray * +garrow_dictionary_array_new(GArrowDataType *data_type, + GArrowArray *indices) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_indices = garrow_array_get_raw(indices); + auto arrow_dictionary_array = + std::make_shared(arrow_data_type, + arrow_indices); + auto arrow_array = + std::static_pointer_cast(arrow_dictionary_array); + return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array)); +} + +/** + * garrow_dictionary_array_get_indices: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The indices of values in dictionary. + * + * Since: 0.8.0 + */ +GArrowArray * +garrow_dictionary_array_get_indices(GArrowDictionaryArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_dictionary_array = + std::static_pointer_cast(arrow_array); + auto arrow_indices = arrow_dictionary_array->indices(); + return garrow_array_new_raw(&arrow_indices); +} + +/** + * garrow_dictionary_array_get_dictionary: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The dictionary of this array. + * + * Since: 0.8.0 + */ +GArrowArray * +garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_dictionary_array = + std::static_pointer_cast(arrow_array); + auto arrow_dictionary = arrow_dictionary_array->dictionary(); + return garrow_array_new_raw(&arrow_dictionary); +} + +/** + * garrow_dictionary_array_get_dictionary_data_type: + * @array: A #GArrowDictionaryArray. + * + * Returns: (transfer full): The dictionary data type of this array. + * + * Since: 0.8.0 + */ +GArrowDictionaryDataType * +garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_dictionary_array = + std::static_pointer_cast(arrow_array); + auto arrow_dictionary_data_type = arrow_dictionary_array->dict_type(); + auto const_arrow_data_type = + static_cast(arrow_dictionary_data_type); + auto arrow_data_type = const_cast(const_arrow_data_type); + struct NullDeleter { + void operator()(arrow::DataType *data_type) { + } + }; + std::shared_ptr + shared_arrow_data_type(arrow_data_type, NullDeleter()); + auto data_type = garrow_data_type_new_raw(&shared_arrow_data_type); + return GARROW_DICTIONARY_DATA_TYPE(data_type); +} + G_END_DECLS diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index ebf95549872..c59a61681bf 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -129,4 +129,25 @@ GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array, gint i); GList *garrow_struct_array_get_fields(GArrowStructArray *array); + +#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray, + garrow_dictionary_array, + GARROW, + DICTIONARY_ARRAY, + GArrowArray) +struct _GArrowDictionaryArrayClass +{ + GArrowArrayClass parent_class; +}; + +GArrowDictionaryArray * +garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices); +GArrowArray * +garrow_dictionary_array_get_indices(GArrowDictionaryArray *array); +GArrowArray * +garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array); +GArrowDictionaryDataType * +garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array); + G_END_DECLS diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp index 7ce8a978e20..5f742e50e40 100644 --- a/c_glib/arrow-glib/composite-data-type.cpp +++ b/c_glib/arrow-glib/composite-data-type.cpp @@ -158,6 +158,8 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass) * @ordered: Whether dictionary contents are ordered or not. * * Returns: The newly created dictionary data type. + * + * Since: 0.8.0 */ GArrowDictionaryDataType * garrow_dictionary_data_type_new(GArrowDataType *index_data_type, @@ -177,6 +179,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type, * @data_type: The #GArrowDictionaryDataType. * * Returns: (transfer full): The #GArrowDataType of index. + * + * Since: 0.8.0 */ GArrowDataType * garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type) @@ -193,6 +197,8 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_t * @data_type: The #GArrowDictionaryDataType. * * Returns: (transfer full): The dictionary as #GArrowArray. + * + * Since: 0.8.0 */ GArrowArray * garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type) @@ -209,6 +215,8 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type) * @data_type: The #GArrowDictionaryDataType. * * Returns: Whether dictionary contents are ordered or not. + * + * Since: 0.8.0 */ gboolean garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type) diff --git a/c_glib/test/test-dictionary-array.rb b/c_glib/test/test-dictionary-array.rb new file mode 100644 index 00000000000..d4f4b344305 --- /dev/null +++ b/c_glib/test/test-dictionary-array.rb @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictionaryArray < Test::Unit::TestCase + include Helper::Buildable + + def setup + @index_data_type = Arrow::Int32DataType.new + @dictionary = build_string_array(["C", "C++", "Ruby"]) + @ordered = false + @data_type = Arrow::DictionaryDataType.new(@index_data_type, + @dictionary, + @ordered) + end + + sub_test_case(".new") do + def test_new + indices = build_int32_array([0, 2, 2, 1, 0]) + dictionary_array = Arrow::DictionaryArray.new(@data_type, indices) + assert_equal(<<-STRING.chomp, dictionary_array.to_s) + +-- is_valid: all not null +-- dictionary: ["C", "C++", "Ruby"] +-- indices: [0, 2, 2, 1, 0] + STRING + end + end + + sub_test_case("instance methods") do + def setup + super + @indices = build_int32_array([0, 2, 2, 1, 0]) + @dictionary_array = Arrow::DictionaryArray.new(@data_type, @indices) + end + + def test_indices + assert_equal(@indices, @dictionary_array.indices) + end + + def test_dictionary + assert_equal(@dictionary, @dictionary_array.dictionary) + end + + def test_dictionary_data_type + assert_equal(@data_type, + @dictionary_array.dictionary_data_type) + end + end +end