From bb0a1a5ced26e549c28bcf202865f9ab38f5c224 Mon Sep 17 00:00:00 2001 From: tianchen Date: Wed, 9 Sep 2020 10:57:37 +0800 Subject: [PATCH 1/2] ARROW-9279: [C++] Implement PrettyPrint for Scalars Change-Id: Ic06325485fbf4c5b8fde59283fce0e631cd2c2bb --- cpp/src/arrow/pretty_print.cc | 215 ++++++++++++++++++++++++----- cpp/src/arrow/pretty_print.h | 9 ++ cpp/src/arrow/pretty_print_test.cc | 196 +++++++++++++++++++++++++- cpp/src/arrow/scalar.h | 4 +- 4 files changed, 382 insertions(+), 42 deletions(-) diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index f88230ffd64..43dbb0776ca 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -44,6 +44,7 @@ namespace arrow { using internal::checked_cast; +using internal::checked_pointer_cast; class PrettyPrinter { public: @@ -111,6 +112,160 @@ void PrettyPrinter::Indent() { } } +static arrow_vendored::date::sys_days epoch_ = + arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970}; + +template +static void FormatDateTime(std::ostream* sink, const char* fmt, int64_t value, + bool add_epoch) { + if (add_epoch) { + (*sink) << arrow_vendored::date::format(fmt, epoch_ + Unit{value}); + } else { + (*sink) << arrow_vendored::date::format(fmt, Unit{value}); + } +} + +static void FormatDateTime(std::ostream* sink, TimeUnit::type unit, const char* fmt, + int64_t value, bool add_epoch) { + switch (unit) { + case TimeUnit::NANO: + FormatDateTime(sink, fmt, value, add_epoch); + break; + case TimeUnit::MICRO: + FormatDateTime(sink, fmt, value, add_epoch); + break; + case TimeUnit::MILLI: + FormatDateTime(sink, fmt, value, add_epoch); + break; + case TimeUnit::SECOND: + FormatDateTime(sink, fmt, value, add_epoch); + break; + } +} + +class ScalarPrinter : public PrettyPrinter { + public: + ScalarPrinter(const PrettyPrintOptions& options, std::ostream* sink) + : PrettyPrinter(options, sink) {} + + Status Print(const Scalar& scalar) { + if (scalar.is_valid == false) { + (*sink_) << "null"; + } else { + RETURN_NOT_OK(VisitScalarInline(scalar, this)); + Flush(); + } + return Status::OK(); + } + + template + enable_if_integer Visit(const T& scalar) { + (*sink_) << internal::UpcastInt(scalar.value); + return Status::OK(); + } + + template + typename std::enable_if::value || + std::is_base_of::value || + std::is_base_of::value || + std::is_base_of::value || + std::is_base_of::value, + Status>::type + Visit(const T& scalar) { + (*sink_) << scalar.value; + return Status::OK(); + } + + template + enable_if_string_like Visit(const T& scalar) { + (*sink_) << "\"" << scalar.value->ToString() << "\""; + return Status::OK(); + } + + template + enable_if_binary_like Visit(const T& scalar) { + (*sink_) << HexEncode(scalar.value->ToString()); + return Status::OK(); + } + + template + enable_if_date Visit(const T& scalar) { + using unit = typename std::conditional::value, + arrow_vendored::date::days, + std::chrono::milliseconds>::type; + FormatDateTime(sink_, "%F", scalar.value, true); + return Status::OK(); + } + + template + enable_if_time Visit(const T& scalar) { + const auto type = static_cast(scalar.type.get()); + FormatDateTime(sink_, type->unit(), "%T", scalar.value, false); + return Status::OK(); + } + + Status Visit(const TimestampScalar& scalar) { + const auto type = static_cast(scalar.type.get()); + FormatDateTime(sink_, type->unit(), "%F %T", scalar.value, true); + return Status::OK(); + } + + Status Visit(const DayTimeIntervalScalar& scalar) { + auto day_millis = scalar.value; + (*sink_) << day_millis.days << "d" << day_millis.milliseconds << "ms"; + return Status::OK(); + } + + Status Visit(const BooleanScalar& scalar) { + (*sink_) << (scalar.value == true ? "true" : "false"); + return Status::OK(); + } + + Status Visit(const Decimal128Scalar& scalar) { + auto type = checked_pointer_cast(scalar.type); + (*sink_) << scalar.value.ToString(type->scale()); + return Status::OK(); + } + + Status Visit(const BaseListScalar& scalar) { + (*sink_) << scalar.value->ToString(); + return Status::OK(); + } + + Status Visit(const MapScalar& scalar) { + auto inner_array = checked_pointer_cast(scalar.value); + (*sink_) << "keys:\n"; + (*sink_) << inner_array->field(0)->ToString(); + (*sink_) << "values:\n"; + (*sink_) << inner_array->field(1)->ToString(); + return Status::OK(); + } + + Status Visit(const StructScalar& scalar) { + std::vector> scalars = scalar.value; + (*sink_) << "{"; + for (size_t i = 0; i < scalars.size(); i++) { + ScalarPrinter printer(options_, sink_); + printer.Print(*scalars[i]); + if (i != scalars.size() - 1) { + (*sink_) << ", "; + } + } + (*sink_) << "}"; + return Status::OK(); + } + + Status Visit(const UnionScalar& scalar) { + ScalarPrinter printer(options_, sink_); + printer.Print(*(scalar.value)); + return Status::OK(); + } + + Status Visit(const Scalar& scalar) { + return Status::NotImplemented("Not implemented type:" + scalar.type->ToString()); + } +}; + class ArrayPrinter : public PrettyPrinter { public: ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink) @@ -166,7 +321,8 @@ class ArrayPrinter : public PrettyPrinter { using unit = typename std::conditional::value, arrow_vendored::date::days, std::chrono::milliseconds>::type; - WriteValues(array, [&](int64_t i) { FormatDateTime("%F", data[i], true); }); + WriteValues(array, + [&](int64_t i) { FormatDateTime(sink_, "%F", data[i], true); }); return Status::OK(); } @@ -174,16 +330,18 @@ class ArrayPrinter : public PrettyPrinter { enable_if_time WriteDataValues(const T& array) { const auto data = array.raw_values(); const auto type = static_cast(array.type().get()); - WriteValues(array, - [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i], false); }); + WriteValues(array, [&](int64_t i) { + FormatDateTime(sink_, type->unit(), "%T", data[i], false); + }); return Status::OK(); } Status WriteDataValues(const TimestampArray& array) { const int64_t* data = array.raw_values(); const auto type = static_cast(array.type().get()); - WriteValues(array, - [&](int64_t i) { FormatDateTime(type->unit(), "%F %T", data[i], true); }); + WriteValues(array, [&](int64_t i) { + FormatDateTime(sink_, type->unit(), "%F %T", data[i], true); + }); return Status::OK(); } @@ -385,41 +543,8 @@ class ArrayPrinter : public PrettyPrinter { Flush(); return Status::OK(); } - - private: - template - void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) { - if (add_epoch) { - (*sink_) << arrow_vendored::date::format(fmt, epoch_ + Unit{value}); - } else { - (*sink_) << arrow_vendored::date::format(fmt, Unit{value}); - } - } - - void FormatDateTime(TimeUnit::type unit, const char* fmt, int64_t value, - bool add_epoch) { - switch (unit) { - case TimeUnit::NANO: - FormatDateTime(fmt, value, add_epoch); - break; - case TimeUnit::MICRO: - FormatDateTime(fmt, value, add_epoch); - break; - case TimeUnit::MILLI: - FormatDateTime(fmt, value, add_epoch); - break; - case TimeUnit::SECOND: - FormatDateTime(fmt, value, add_epoch); - break; - } - } - - static arrow_vendored::date::sys_days epoch_; }; -arrow_vendored::date::sys_days ArrayPrinter::epoch_ = - arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970}; - Status ArrayPrinter::WriteValidityBitmap(const Array& array) { Indent(); Write("-- is_valid:"); @@ -456,6 +581,20 @@ Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options, return Status::OK(); } +Status PrettyPrint(const Scalar& scalar, const PrettyPrintOptions& options, + std::ostream* sink) { + ScalarPrinter printer(options, sink); + return printer.Print(scalar); +} + +Status PrettyPrint(const Scalar& scalar, const PrettyPrintOptions& options, + std::string* result) { + std::ostringstream sink; + RETURN_NOT_OK(PrettyPrint(scalar, options, &sink)); + *result = sink.str(); + return Status::OK(); +} + Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options, std::ostream* sink) { int num_chunks = chunked_arr.num_chunks(); diff --git a/cpp/src/arrow/pretty_print.h b/cpp/src/arrow/pretty_print.h index 9d2c72c7186..54c18c0ea17 100644 --- a/cpp/src/arrow/pretty_print.h +++ b/cpp/src/arrow/pretty_print.h @@ -117,6 +117,15 @@ ARROW_EXPORT Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options, std::string* result); +/// \brief Print human-readable representation of Scalar +ARROW_EXPORT +Status PrettyPrint(const Scalar& scalar, const PrettyPrintOptions& options, + std::ostream* sink); + +ARROW_EXPORT +Status PrettyPrint(const Scalar& scalar, const PrettyPrintOptions& options, + std::string* result); + ARROW_EXPORT Status DebugPrint(const Array& arr, int indent); diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 6124b8f2ddc..3b4e98491d0 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include + #include #include #include @@ -22,8 +24,6 @@ #include #include -#include - #include "arrow/array.h" #include "arrow/builder.h" #include "arrow/pretty_print.h" @@ -740,4 +740,196 @@ TEST_F(TestPrettyPrint, SchemaIndentation) { Check(*sch, options, expected); } +TEST_F(TestPrettyPrint, NullScalar) { + PrettyPrintOptions options; + + auto ty = decimal(3, 2); + auto null = MakeNullScalar(ty); + + Check(*null, options, "null"); +} + +TEST_F(TestPrettyPrint, BooleanScalar) { + PrettyPrintOptions options; + + auto bool_scalar = BooleanScalar(true); + Check(bool_scalar, options, "true"); + bool_scalar.is_valid = false; + Check(bool_scalar, options, "null"); +} + +TEST_F(TestPrettyPrint, NumericScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared((int8_t)12); + Check(*scalar, options, "12"); + scalar = std::make_shared(34); + Check(*scalar, options, "34"); + scalar = std::make_shared(56); + Check(*scalar, options, "56"); + scalar = std::make_shared(78); + Check(*scalar, options, "78"); + + scalar = std::make_shared((uint8_t)12); + Check(*scalar, options, "12"); + scalar = std::make_shared(34); + Check(*scalar, options, "34"); + scalar = std::make_shared(56); + Check(*scalar, options, "56"); + scalar = std::make_shared(78); + Check(*scalar, options, "78"); + + scalar = std::make_shared(2.2); + Check(*scalar, options, "2.2"); + scalar = std::make_shared(3.3); + Check(*scalar, options, "3.3"); +} + +TEST_F(TestPrettyPrint, StringScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + auto buffer = Buffer::FromString("test data"); + + scalar = std::make_shared("hello"); + Check(*scalar, options, "\"hello\""); + scalar = std::make_shared("world"); + Check(*scalar, options, "\"world\""); +} + +TEST_F(TestPrettyPrint, BinaryScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + auto buffer = Buffer::FromString("test data"); + + scalar = std::make_shared(buffer); + Check(*scalar, options, "746573742064617461"); + scalar = std::make_shared(buffer); + Check(*scalar, options, "746573742064617461"); + scalar = std::make_shared(buffer, fixed_size_binary(9)); + Check(*scalar, options, "746573742064617461"); +} + +TEST_F(TestPrettyPrint, DateScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared(12345); + Check(*scalar, options, "2003-10-20"); + + scalar = std::make_shared(1599636094000); + Check(*scalar, options, "2020-09-09"); +} + +TEST_F(TestPrettyPrint, TimeScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared(12345, time32(TimeUnit::MILLI)); + Check(*scalar, options, "00:00:12.345"); + scalar = std::make_shared(12345, time32(TimeUnit::SECOND)); + Check(*scalar, options, "03:25:45"); + + scalar = std::make_shared(12345678, time64(TimeUnit::MICRO)); + Check(*scalar, options, "00:00:12.345678"); + scalar = std::make_shared(12345678, time64(TimeUnit::NANO)); + Check(*scalar, options, "00:00:00.012345678"); +} + +TEST_F(TestPrettyPrint, TimestampScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared(1599637102445, timestamp(TimeUnit::MILLI)); + Check(*scalar, options, "2020-09-09 07:38:22.445"); + + scalar = std::make_shared(1599637102, timestamp(TimeUnit::SECOND)); + Check(*scalar, options, "2020-09-09 07:38:22"); +} + +TEST_F(TestPrettyPrint, IntervalScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared(63); + Check(*scalar, options, "63"); + + DayTimeIntervalType::DayMilliseconds daytime_val = {1, 2}; + scalar = std::make_shared(daytime_val); + Check(*scalar, options, "1d2ms"); +} + +TEST_F(TestPrettyPrint, DurationScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + + scalar = std::make_shared(12, duration(TimeUnit::MILLI)); + Check(*scalar, options, "12"); + + scalar = std::make_shared(34, duration(TimeUnit::SECOND)); + Check(*scalar, options, "34"); +} + +TEST_F(TestPrettyPrint, Decimal128Scalar) { + PrettyPrintOptions options; + + auto ty = decimal(3, 2); + auto pi = Decimal128Scalar(Decimal128("3.14"), ty); + auto null = MakeNullScalar(ty); + + Check(pi, options, "3.14"); +} + +TEST_F(TestPrettyPrint, ListScalar) { + PrettyPrintOptions options; + + auto type = list(utf8()); + auto array = ArrayFromJSON(utf8(), R"(["hello", "world"])"); + auto scalar = ListScalar(std::move(array), type); + + Check(scalar, options, "[\n \"hello\",\n \"world\"\n]"); +} + +TEST_F(TestPrettyPrint, MapScalar) { + PrettyPrintOptions options; + + auto type = map(utf8(), int64()); + auto array = ArrayFromJSON(type->field(0)->type(), R"([["hello", 12], ["k2", 34]])"); + auto scalar = MapScalar(std::move(array), map(utf8(), int64())); + + Check(scalar, options, "keys:\n[\n \"hello\",\n \"k2\"\n]values:\n[\n 12,\n 34\n]"); +} + +TEST_F(TestPrettyPrint, StructScalar) { + PrettyPrintOptions options; + + std::vector> scalars; + scalars.reserve(2); + scalars.emplace_back(std::make_shared("hello")); + scalars.emplace_back(std::make_shared(123)); + + auto scalar = + StructScalar(scalars, struct_({field("f1", utf8()), field("f2", int64())})); + + Check(scalar, options, "{\"hello\", 123}"); +} + +TEST_F(TestPrettyPrint, UnionScalar) { + PrettyPrintOptions options; + std::shared_ptr scalar; + std::shared_ptr type; + + type = sparse_union({field("string", utf8()), field("number", uint64())}); + scalar = + std::make_shared(std::make_shared("sparse"), type); + Check(*scalar, options, "\"sparse\""); + scalar = std::make_shared(std::make_shared(12), type); + Check(*scalar, options, "12"); + + type = dense_union({field("string", utf8()), field("number", uint64())}); + scalar = + std::make_shared(std::make_shared("dense"), type); + Check(*scalar, options, "\"dense\""); +} + } // namespace arrow diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index 946d3bfe44f..da7114c05a4 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -214,7 +214,7 @@ struct ARROW_EXPORT BaseBinaryScalar : public Scalar { struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; - using TypeClass = BinaryScalar; + using TypeClass = BinaryType; BinaryScalar(std::shared_ptr value, std::shared_ptr type) : BaseBinaryScalar(std::move(value), std::move(type)) {} @@ -239,7 +239,7 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar { struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar { using BaseBinaryScalar::BaseBinaryScalar; - using TypeClass = LargeBinaryScalar; + using TypeClass = LargeBinaryType; LargeBinaryScalar(std::shared_ptr value, std::shared_ptr type) : BaseBinaryScalar(std::move(value), std::move(type)) {} From 86b1ab6dcef9385a0c4a90321fb30cf73b5ce027 Mon Sep 17 00:00:00 2001 From: tianchen Date: Thu, 10 Sep 2020 10:51:29 +0800 Subject: [PATCH 2/2] fix build fail Change-Id: I2475b9dd11046ec43de7c1baacbb57293279c5fa --- cpp/src/arrow/pretty_print.cc | 4 ++-- cpp/src/arrow/pretty_print_test.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc index 43dbb0776ca..8ec1e57d384 100644 --- a/cpp/src/arrow/pretty_print.cc +++ b/cpp/src/arrow/pretty_print.cc @@ -246,7 +246,7 @@ class ScalarPrinter : public PrettyPrinter { (*sink_) << "{"; for (size_t i = 0; i < scalars.size(); i++) { ScalarPrinter printer(options_, sink_); - printer.Print(*scalars[i]); + RETURN_NOT_OK(printer.Print(*scalars[i])); if (i != scalars.size() - 1) { (*sink_) << ", "; } @@ -257,7 +257,7 @@ class ScalarPrinter : public PrettyPrinter { Status Visit(const UnionScalar& scalar) { ScalarPrinter printer(options_, sink_); - printer.Print(*(scalar.value)); + RETURN_NOT_OK(printer.Print(*(scalar.value))); return Status::OK(); } diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc index 3b4e98491d0..61db633c0a2 100644 --- a/cpp/src/arrow/pretty_print_test.cc +++ b/cpp/src/arrow/pretty_print_test.cc @@ -780,7 +780,7 @@ TEST_F(TestPrettyPrint, NumericScalar) { scalar = std::make_shared(78); Check(*scalar, options, "78"); - scalar = std::make_shared(2.2); + scalar = std::make_shared(2.2f); Check(*scalar, options, "2.2"); scalar = std::make_shared(3.3); Check(*scalar, options, "3.3");