From 0caf6585a91bf5113f98322679ab2298a1643816 Mon Sep 17 00:00:00 2001 From: "artem.alekseev" Date: Fri, 15 Nov 2019 05:38:42 -0600 Subject: [PATCH] Custom column builder --- cpp/src/arrow/csv/options.h | 14 +++++++++++++- cpp/src/arrow/csv/reader.cc | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h index 5af70620442..b0cf3962dbf 100644 --- a/cpp/src/arrow/csv/options.h +++ b/cpp/src/arrow/csv/options.h @@ -19,11 +19,14 @@ #define ARROW_CSV_OPTIONS_H #include +#include #include #include #include #include +#include "arrow/csv/column_builder.h" +#include "arrow/util/task_group.h" #include "arrow/util/visibility.h" namespace arrow { @@ -67,7 +70,16 @@ struct ARROW_EXPORT ConvertOptions { bool check_utf8 = true; /// Optional per-column types (disabling type inference on those columns) std::unordered_map> column_types; - /// Recognized spellings for null values + + /// Optional per-column fabrics for custom column builders + std::unordered_map< + std::string, + std::function& type, + int32_t col_index, const ConvertOptions& options, + const std::shared_ptr& task_group, + std::shared_ptr* out)>> + column_builder_fabrics; + // Recognized spellings for null values std::vector null_values; /// Recognized spellings for boolean true values std::vector true_values; diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc index 5d90f6620a5..dfbc40ebf51 100644 --- a/cpp/src/arrow/csv/reader.cc +++ b/cpp/src/arrow/csv/reader.cc @@ -225,6 +225,12 @@ class BaseTableReader : public csv::TableReader { if (it == convert_options_.column_types.end()) { return ColumnBuilder::Make(pool_, col_index, convert_options_, task_group_, out); } else { + auto builder_fabric = + convert_options_.column_builder_fabrics.find(col_name); + if (builder_fabric != convert_options_.column_builder_fabrics.end()) { + return builder_fabric->second(pool_, it->second, col_index, convert_options_, + task_group_, out); + } return ColumnBuilder::Make(pool_, it->second, col_index, convert_options_, task_group_, out); }