Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 142 additions & 3 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ G_BEGIN_DECLS
* #GArrowScalarAggregateOptions is a class to customize the scalar
* aggregate functions such as `count` function and convenient
* functions of them such as garrow_array_count().

* #GArrowCountOptions is a class to customize the `count` function and
* garrow_array_count() family.
*
* #GArrowFilterOptions is a class to customize the `filter` function and
* garrow_array_filter() family.
Expand Down Expand Up @@ -767,6 +770,135 @@ garrow_scalar_aggregate_options_new(void)
}


typedef struct GArrowCountOptionsPrivate_ {
arrow::compute::CountOptions options;
} GArrowCountOptionsPrivate;

enum {
PROP_MODE = 1,
};

static arrow::compute::FunctionOptions *
garrow_count_options_get_raw_function_options(GArrowFunctionOptions *options)
{
return garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(options));
}

static void
garrow_count_options_function_options_interface_init(
GArrowFunctionOptionsInterface *iface)
{
iface->get_raw = garrow_count_options_get_raw_function_options;
}

G_DEFINE_TYPE_WITH_CODE(GArrowCountOptions,
garrow_count_options,
G_TYPE_OBJECT,
G_ADD_PRIVATE(GArrowCountOptions)
G_IMPLEMENT_INTERFACE(
GARROW_TYPE_FUNCTION_OPTIONS,
garrow_count_options_function_options_interface_init))

#define GARROW_COUNT_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowCountOptionsPrivate *>( \
garrow_count_options_get_instance_private( \
GARROW_COUNT_OPTIONS(object)))

static void
garrow_count_options_finalize(GObject *object)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
priv->options.~CountOptions();
G_OBJECT_CLASS(garrow_count_options_parent_class)->finalize(object);
}

static void
garrow_count_options_set_property(GObject *object,
guint prop_id,
const GValue *value,
GParamSpec *pspec)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);

switch (prop_id) {
case PROP_MODE:
priv->options.mode =
static_cast<arrow::compute::CountOptions::CountMode>(g_value_get_enum(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}

static void
garrow_count_options_get_property(GObject *object,
guint prop_id,
GValue *value,
GParamSpec *pspec)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);

switch (prop_id) {
case PROP_MODE:
g_value_set_enum(value, priv->options.mode);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
break;
}
}

static void
garrow_count_options_init(GArrowCountOptions *object)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
new(&priv->options) arrow::compute::CountOptions;
}

static void
garrow_count_options_class_init(GArrowCountOptionsClass *klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);

gobject_class->finalize = garrow_count_options_finalize;
gobject_class->set_property = garrow_count_options_set_property;
gobject_class->get_property = garrow_count_options_get_property;

arrow::compute::CountOptions default_options;

GParamSpec *spec;
/**
* GArrowCountOptions:null-selection-behavior:
*
* How to handle counted values.
*
* Since: 0.17.0
*/
spec = g_param_spec_enum("mode",
"Count mode",
"Which values to count",
GARROW_TYPE_COUNT_MODE,
static_cast<GArrowCountMode>(default_options.mode),
static_cast<GParamFlags>(G_PARAM_READWRITE));
g_object_class_install_property(gobject_class, PROP_MODE, spec);
}

/**
* garrow_count_options_new:
*
* Returns: A newly created #GArrowCountOptions.
*
* Since: 6.0.0
*/
GArrowCountOptions *
garrow_count_options_new(void)
{
auto count_options = g_object_new(GARROW_TYPE_COUNT_OPTIONS, NULL);
return GARROW_COUNT_OPTIONS(count_options);
}


typedef struct GArrowFilterOptionsPrivate_ {
arrow::compute::FilterOptions options;
} GArrowFilterOptionsPrivate;
Expand Down Expand Up @@ -1558,7 +1690,7 @@ garrow_array_dictionary_encode(GArrowArray *array,
/**
* garrow_array_count:
* @array: A #GArrowArray.
* @options: (nullable): A #GArrowScalarAggregateOptions.
* @options: (nullable): A #GArrowCountOptions.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: The number of target values on success. If an error is occurred,
Expand All @@ -1568,14 +1700,14 @@ garrow_array_dictionary_encode(GArrowArray *array,
*/
gint64
garrow_array_count(GArrowArray *array,
GArrowScalarAggregateOptions *options,
GArrowCountOptions *options,
GError **error)
{
auto arrow_array = garrow_array_get_raw(array);
auto arrow_array_raw = arrow_array.get();
arrow::Result<arrow::Datum> arrow_counted_datum;
if (options) {
auto arrow_options = garrow_scalar_aggregate_options_get_raw(options);
auto arrow_options = garrow_count_options_get_raw(options);
arrow_counted_datum =
arrow::compute::Count(*arrow_array_raw, *arrow_options);
} else {
Expand Down Expand Up @@ -2694,6 +2826,13 @@ garrow_scalar_aggregate_options_get_raw(
return &(priv->options);
}

arrow::compute::CountOptions *
garrow_count_options_get_raw(GArrowCountOptions *count_options)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(count_options);
return &(priv->options);
}

arrow::compute::FilterOptions *
garrow_filter_options_get_raw(GArrowFilterOptions *filter_options)
{
Expand Down
34 changes: 33 additions & 1 deletion c_glib/arrow-glib/compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,38 @@ GARROW_AVAILABLE_IN_5_0
GArrowScalarAggregateOptions *
garrow_scalar_aggregate_options_new(void);

/**
* GArrowCountMode:
* @GARROW_COUNT_MODE_ONLY_VALID:
* Only non-null values will be counted.
* @GARROW_COUNT_MODE_ONLY_NULL:
* Only null values will be counted.
* @GARROW_COUNT_MODE_ALL:
* All will be counted.
*
* They correspond to the values of `arrow::compute::CountOptions::CountMode`.
*/
typedef enum {
GARROW_COUNT_MODE_ONLY_VALID,
GARROW_COUNT_MODE_ONLY_NULL,
GARROW_COUNT_MODE_ALL,
} GArrowCountMode;

#define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowCountOptions,
garrow_count_options,
GARROW,
COUNT_OPTIONS,
GObject)
struct _GArrowCountOptionsClass
{
GObjectClass parent_class;
};

GARROW_AVAILABLE_IN_6_0
GArrowCountOptions *
garrow_count_options_new(void);


/**
* GArrowFilterNullSelectionBehavior:
Expand Down Expand Up @@ -242,7 +274,7 @@ GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array,
GError **error);
GARROW_AVAILABLE_IN_0_13
gint64 garrow_array_count(GArrowArray *array,
GArrowScalarAggregateOptions *options,
GArrowCountOptions *options,
GError **error);
GARROW_AVAILABLE_IN_0_13
GArrowStructArray *garrow_array_count_values(GArrowArray *array,
Expand Down
3 changes: 3 additions & 0 deletions c_glib/arrow-glib/compute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ arrow::compute::ScalarAggregateOptions *
garrow_scalar_aggregate_options_get_raw(
GArrowScalarAggregateOptions *scalar_aggregate_options);

arrow::compute::CountOptions *
garrow_count_options_get_raw(GArrowCountOptions *count_options);

arrow::compute::FilterOptions *
garrow_filter_options_get_raw(GArrowFilterOptions *filter_options);

Expand Down
18 changes: 14 additions & 4 deletions c_glib/test/test-count.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,25 @@ class TestCount < Test::Unit::TestCase
include Helper::Buildable
include Helper::Omittable

sub_test_case("skip_nulls") do
sub_test_case("mode") do
def test_default
assert_equal(2, build_int32_array([1, nil, 3]).count)

options = Arrow::CountOptions.new
options.mode = Arrow::CountMode::ONLY_VALID
assert_equal(2, build_int32_array([1, nil, 3]).count(options))
end

def test_false
options = Arrow::ScalarAggregateOptions.new
options.skip_nulls = false
def test_nulls
options = Arrow::CountOptions.new
options.mode = Arrow::CountMode::ONLY_NULL
assert_equal(1, build_int32_array([1, nil, 3]).count(options))
end

def test_all
options = Arrow::CountOptions.new
options.mode = Arrow::CountMode::ALL
assert_equal(3, build_int32_array([1, nil, 3]).count(options))
end
end
end
28 changes: 26 additions & 2 deletions cpp/src/arrow/compute/api_aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@
namespace arrow {

namespace internal {
template <>
struct EnumTraits<compute::CountOptions::CountMode>
: BasicEnumTraits<compute::CountOptions::CountMode, compute::CountOptions::ONLY_VALID,
compute::CountOptions::ONLY_NULL, compute::CountOptions::ALL> {
static std::string name() { return "CountOptions::CountMode"; }
static std::string value_name(compute::CountOptions::CountMode value) {
switch (value) {
case compute::CountOptions::ONLY_VALID:
return "NON_NULL";
case compute::CountOptions::ONLY_NULL:
return "NULLS";
case compute::CountOptions::ALL:
return "ALL";
}
return "<INVALID>";
}
};

template <>
struct EnumTraits<compute::QuantileOptions::Interpolation>
: BasicEnumTraits<compute::QuantileOptions::Interpolation,
Expand Down Expand Up @@ -65,6 +83,8 @@ using ::arrow::internal::DataMember;
static auto kScalarAggregateOptionsType = GetFunctionOptionsType<ScalarAggregateOptions>(
DataMember("skip_nulls", &ScalarAggregateOptions::skip_nulls),
DataMember("min_count", &ScalarAggregateOptions::min_count));
static auto kCountOptionsType =
GetFunctionOptionsType<CountOptions>(DataMember("mode", &CountOptions::mode));
static auto kModeOptionsType =
GetFunctionOptionsType<ModeOptions>(DataMember("n", &ModeOptions::n));
static auto kVarianceOptionsType =
Expand All @@ -86,6 +106,10 @@ ScalarAggregateOptions::ScalarAggregateOptions(bool skip_nulls, uint32_t min_cou
min_count(min_count) {}
constexpr char ScalarAggregateOptions::kTypeName[];

CountOptions::CountOptions(CountMode mode)
: FunctionOptions(internal::kCountOptionsType), mode(mode) {}
constexpr char CountOptions::kTypeName[];

ModeOptions::ModeOptions(int64_t n) : FunctionOptions(internal::kModeOptionsType), n(n) {}
constexpr char ModeOptions::kTypeName[];

Expand Down Expand Up @@ -124,6 +148,7 @@ constexpr char IndexOptions::kTypeName[];
namespace internal {
void RegisterAggregateOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kScalarAggregateOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kCountOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kModeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kVarianceOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kQuantileOptionsType));
Expand All @@ -135,8 +160,7 @@ void RegisterAggregateOptions(FunctionRegistry* registry) {
// ----------------------------------------------------------------------
// Scalar aggregates

Result<Datum> Count(const Datum& value, const ScalarAggregateOptions& options,
ExecContext* ctx) {
Result<Datum> Count(const Datum& value, const CountOptions& options, ExecContext* ctx) {
return CallFunction("count", {value}, &options, ctx);
}

Expand Down
31 changes: 25 additions & 6 deletions cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,26 @@ class ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
uint32_t min_count;
};

/// \brief Control count aggregate kernel behavior.
///
/// By default, only non-null values are counted.
class ARROW_EXPORT CountOptions : public FunctionOptions {
public:
enum CountMode {
/// Count only non-null values.
ONLY_VALID = 0,
/// Count only null values.
ONLY_NULL,
/// Count both non-null and null values.
ALL,
};
explicit CountOptions(CountMode mode = CountMode::ONLY_VALID);
constexpr static char const kTypeName[] = "CountOptions";
static CountOptions Defaults() { return CountOptions{}; }

CountMode mode;
};

/// \brief Control Mode kernel behavior
///
/// Returns top-n common values and counts.
Expand Down Expand Up @@ -139,20 +159,19 @@ class ARROW_EXPORT IndexOptions : public FunctionOptions {

/// @}

/// \brief Count non-null (or null) values in an array.
/// \brief Count values in an array.
///
/// \param[in] options counting options, see ScalarAggregateOptions for more information
/// \param[in] options counting options, see CountOptions for more information
/// \param[in] datum to count
/// \param[in] ctx the function execution context, optional
/// \return out resulting datum
///
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Count(
const Datum& datum,
const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(),
ExecContext* ctx = NULLPTR);
Result<Datum> Count(const Datum& datum,
const CountOptions& options = CountOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Compute the mean of a numeric array.
///
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/compute/function_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ TEST(FunctionOptions, Equality) {
std::vector<std::shared_ptr<FunctionOptions>> options;
options.emplace_back(new ScalarAggregateOptions());
options.emplace_back(new ScalarAggregateOptions(/*skip_nulls=*/false, /*min_count=*/1));
options.emplace_back(new CountOptions());
options.emplace_back(new CountOptions(CountOptions::ALL));
options.emplace_back(new ModeOptions());
options.emplace_back(new ModeOptions(/*n=*/2));
options.emplace_back(new VarianceOptions());
Expand Down
Loading