-
Notifications
You must be signed in to change notification settings - Fork 4k
Closed
Closed
Copy link
Description
Describe the enhancement requested
We can represent "utf8" and "binary" min/max values by arrow::ArrayStatistics:
arrow/cpp/src/arrow/array/statistics.h
Lines 29 to 67 in 4c36f12
| /// \brief Statistics for an Array | |
| /// | |
| /// Apache Arrow format doesn't have statistics but data source such | |
| /// as Apache Parquet may have statistics. Statistics associated with | |
| /// data source can be read unified API via this class. | |
| struct ARROW_EXPORT ArrayStatistics { | |
| using ValueType = std::variant<bool, int64_t, uint64_t, double, std::string>; | |
| /// \brief The number of null values, may not be set | |
| std::optional<int64_t> null_count = std::nullopt; | |
| /// \brief The number of distinct values, may not be set | |
| std::optional<int64_t> distinct_count = std::nullopt; | |
| /// \brief The minimum value, may not be set | |
| std::optional<ValueType> min = std::nullopt; | |
| /// \brief Whether the minimum value is exact or not | |
| bool is_min_exact = false; | |
| /// \brief The maximum value, may not be set | |
| std::optional<ValueType> max = std::nullopt; | |
| /// \brief Whether the maximum value is exact or not | |
| bool is_max_exact = false; | |
| /// \brief Check two statistics for equality | |
| bool Equals(const ArrayStatistics& other) const { | |
| return null_count == other.null_count && distinct_count == other.distinct_count && | |
| min == other.min && is_min_exact == other.is_min_exact && max == other.max && | |
| is_max_exact == other.is_max_exact; | |
| } | |
| /// \brief Check two statistics for equality | |
| bool operator==(const ArrayStatistics& other) const { return Equals(other); } | |
| /// \brief Check two statistics for not equality | |
| bool operator!=(const ArrayStatistics& other) const { return !Equals(other); } | |
| }; |
But we can't distinct them because we use std::string in ValueType for both of them.
How can we distinct them? Should we add arrow::ArrayStatistics::{min,max}_type?
Component(s)
C++