Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,39 @@ struct NumericToStringCastFunctor {
}
};

// ----------------------------------------------------------------------
// Temporal to String

template <typename O, typename I>
struct TemporalToStringCastFunctor {
using value_type = typename TypeTraits<I>::CType;
using BuilderType = typename TypeTraits<O>::BuilderType;
using FormatterType = StringFormatter<I>;

static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
DCHECK(out->is_array());
const ArrayData& input = *batch[0].array();
ArrayData* output = out->mutable_array();
return Convert(ctx, input, output);
}

static Status Convert(KernelContext* ctx, const ArrayData& input, ArrayData* output) {
FormatterType formatter(input.type);
BuilderType builder(input.type, ctx->memory_pool());
RETURN_NOT_OK(VisitArrayDataInline<I>(
input,
[&](value_type v) {
return formatter(v, [&](util::string_view v) { return builder.Append(v); });
},
[&]() { return builder.AppendNull(); }));

std::shared_ptr<Array> output_array;
RETURN_NOT_OK(builder.Finish(&output_array));
*output = std::move(*output_array->data());
return Status::OK();
}
};

// ----------------------------------------------------------------------
// Binary-like to binary-like
//
Expand Down Expand Up @@ -192,6 +225,18 @@ void AddNumberToStringCasts(CastFunction* func) {
}
}

template <typename OutType>
void AddTemporalToStringCasts(CastFunction* func) {
auto out_ty = TypeTraits<OutType>::type_singleton();
for (const std::shared_ptr<DataType>& in_ty : TemporalTypes()) {
DCHECK_OK(func->AddKernel(
in_ty->id(), {in_ty}, out_ty,
TrivialScalarUnaryAsArraysExec(
GenerateTemporal<TemporalToStringCastFunctor, OutType>(*in_ty)),
NullHandling::COMPUTED_NO_PREALLOCATE));
}
}

template <typename OutType, typename InType>
void AddBinaryToBinaryCast(CastFunction* func) {
auto in_ty = TypeTraits<InType>::type_singleton();
Expand Down Expand Up @@ -226,12 +271,14 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
auto cast_string = std::make_shared<CastFunction>("cast_string", Type::STRING);
AddCommonCasts(Type::STRING, utf8(), cast_string.get());
AddNumberToStringCasts<StringType>(cast_string.get());
AddTemporalToStringCasts<StringType>(cast_string.get());
AddBinaryToBinaryCast<StringType>(cast_string.get());

auto cast_large_string =
std::make_shared<CastFunction>("cast_large_string", Type::LARGE_STRING);
AddCommonCasts(Type::LARGE_STRING, large_utf8(), cast_large_string.get());
AddNumberToStringCasts<LargeStringType>(cast_large_string.get());
AddTemporalToStringCasts<LargeStringType>(cast_large_string.get());
AddBinaryToBinaryCast<LargeStringType>(cast_large_string.get());

auto cast_fsb =
Expand Down
36 changes: 35 additions & 1 deletion cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ TEST(Cast, CanCast) {
ExpectCanCast(utf8(), {timestamp(TimeUnit::MILLI)});
ExpectCanCast(large_utf8(), {timestamp(TimeUnit::NANO)});
ExpectCannotCast(timestamp(TimeUnit::MICRO),
kBaseBinaryTypes); // no formatting supported
{binary(), large_binary()}); // no formatting supported

ExpectCannotCast(fixed_size_binary(3),
{fixed_size_binary(3)}); // FIXME missing identity cast
Expand All @@ -208,6 +208,13 @@ TEST(Cast, CanCast) {
ExpectCanCast(smallint(),
kNumericTypes); // any cast which is valid for storage is supported
ExpectCannotCast(null(), {smallint()}); // FIXME missing common cast from null

ExpectCanCast(date32(), {utf8(), large_utf8()});
ExpectCanCast(date64(), {utf8(), large_utf8()});
ExpectCanCast(timestamp(TimeUnit::NANO), {utf8(), large_utf8()});
ExpectCanCast(timestamp(TimeUnit::MICRO), {utf8(), large_utf8()});
ExpectCanCast(time32(TimeUnit::MILLI), {utf8(), large_utf8()});
ExpectCanCast(time64(TimeUnit::NANO), {utf8(), large_utf8()});
}

TEST(Cast, SameTypeZeroCopy) {
Expand Down Expand Up @@ -1208,6 +1215,33 @@ TEST(Cast, TimeZeroCopy) {
time64(TimeUnit::MICRO));
}

TEST(Cast, DateToString) {
for (auto string_type : {utf8(), large_utf8()}) {
CheckCast(ArrayFromJSON(date32(), "[0, null]"),
ArrayFromJSON(string_type, R"(["1970-01-01", null])"));
CheckCast(ArrayFromJSON(date64(), "[86400000, null]"),
ArrayFromJSON(string_type, R"(["1970-01-02", null])"));
}
}

TEST(Cast, TimeToString) {
for (auto string_type : {utf8(), large_utf8()}) {
CheckCast(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 62]"),
ArrayFromJSON(string_type, R"(["00:00:01", "00:01:02"])"));
CheckCast(
ArrayFromJSON(time64(TimeUnit::NANO), "[0, 1]"),
ArrayFromJSON(string_type, R"(["00:00:00.000000000", "00:00:00.000000001"])"));
}
}

TEST(Cast, TimestampToString) {
for (auto string_type : {utf8(), large_utf8()}) {
CheckCast(
ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
}
}

TEST(Cast, DateToDate) {
auto day_32 = ArrayFromJSON(date32(), "[0, null, 100, 1, 10]");
auto day_64 = ArrayFromJSON(date64(), R"([
Expand Down
22 changes: 14 additions & 8 deletions cpp/src/arrow/csv/writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,26 @@ std::vector<WriterTestParams> GenerateTestCases() {
{field("a", uint64())},
{field("b\"", utf8())},
{field("c ", int32())},
{field("d", date32())},
{field("e", date64())},
});
auto populated_batch = R"([{"a": 1, "c ": -1},
{ "a": 1, "b\"": "abc\"efg", "c ": 2324},
{ "b\"": "abcd", "c ": 5467},
{ },
{ "a": 546, "b\"": "", "c ": 517 },
{ "a": 124, "b\"": "a\"\"b\"" }])";
std::string expected_without_header = std::string("1,,-1") + "\n" + // line 1
+R"(1,"abc""efg",2324)" + "\n" + // line 2
R"(,"abcd",5467)" + "\n" + // line 3
R"(,,)" + "\n" + // line 4
R"(546,"",517)" + "\n" + // line 5
R"(124,"a""""b""",)" + "\n"; // line 6
std::string expected_header = std::string(R"("a","b""","c ")") + "\n";
{ "a": 124, "b\"": "a\"\"b\"" },
{ "d": 0 },
{ "e": 86400000 }])";
std::string expected_without_header = std::string("1,,-1,,") + "\n" + // line 1
R"(1,"abc""efg",2324,,)" + "\n" + // line 2
R"(,"abcd",5467,,)" + "\n" + // line 3
R"(,,,,)" + "\n" + // line 4
R"(546,"",517,,)" + "\n" + // line 5
R"(124,"a""""b""",,,)" + "\n" + // line 6
R"(,,,1970-01-01,)" + "\n" + // line 7
R"(,,,,1970-01-02)" + "\n"; // line 8
std::string expected_header = std::string(R"("a","b""","c ","d","e")") + "\n";

return std::vector<WriterTestParams>{
{abc_schema, "[]", DefaultTestOptions(/*header=*/false), ""},
Expand Down