Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions cpp/examples/arrow/from_json_string_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ arrow::Status RunExample() {
"[[11, 22], null, [null, 33]]"));

// ChunkedArrayFromJSONString
std::shared_ptr<arrow::ChunkedArray> chunked_array;
ARROW_RETURN_NOT_OK(ChunkedArrayFromJSONString(
arrow::int32(), {"[5, 10]", "[null]", "[16]"}, &chunked_array));
ARROW_ASSIGN_OR_RAISE(
auto chunked_array,
ChunkedArrayFromJSONString(arrow::int32(), {"[5, 10]", "[null]", "[16]"}));

// DictArrayFromJSONString
std::shared_ptr<arrow::Array> dict_array;
ARROW_RETURN_NOT_OK(DictArrayFromJSONString(
dictionary(arrow::int32(), arrow::utf8()), "[0, 1, 0, 2, 0, 3]",
R"(["k1", "k2", "k3", "k4"])", &dict_array));
ARROW_ASSIGN_OR_RAISE(
auto dict_array,
DictArrayFromJSONString(dictionary(arrow::int32(), arrow::utf8()),
"[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])"));

return arrow::Status::OK();
}
Expand Down
39 changes: 16 additions & 23 deletions cpp/src/arrow/json/from_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1004,23 +1004,20 @@ Result<std::shared_ptr<Array>> ArrayFromJSONString(const std::shared_ptr<DataTyp
return ArrayFromJSONString(type, std::string_view(json_string));
}

Status ChunkedArrayFromJSONString(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json_strings,
std::shared_ptr<ChunkedArray>* out) {
Result<std::shared_ptr<ChunkedArray>> ChunkedArrayFromJSONString(
const std::shared_ptr<DataType>& type, const std::vector<std::string>& json_strings) {
ArrayVector out_chunks;
out_chunks.reserve(json_strings.size());
for (const std::string& chunk_json : json_strings) {
out_chunks.emplace_back();
ARROW_ASSIGN_OR_RAISE(out_chunks.back(), ArrayFromJSONString(type, chunk_json));
}
*out = std::make_shared<ChunkedArray>(std::move(out_chunks), type);
return Status::OK();
return std::make_shared<ChunkedArray>(std::move(out_chunks), type);
}

Status DictArrayFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view indices_json,
std::string_view dictionary_json,
std::shared_ptr<Array>* out) {
Result<std::shared_ptr<Array>> DictArrayFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view indices_json,
std::string_view dictionary_json) {
if (type->id() != Type::DICTIONARY) {
return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type);
}
Expand All @@ -1031,13 +1028,11 @@ Status DictArrayFromJSONString(const std::shared_ptr<DataType>& type,
ArrayFromJSONString(dictionary_type.index_type(), indices_json));
ARROW_ASSIGN_OR_RAISE(auto dictionary, ArrayFromJSONString(dictionary_type.value_type(),
dictionary_json));

return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary))
.Value(out);
return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary));
}

Status ScalarFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view json_string, std::shared_ptr<Scalar>* out) {
Result<std::shared_ptr<Scalar>> ScalarFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view json_string) {
std::shared_ptr<JSONConverter> converter;
RETURN_NOT_OK(GetConverter(type, &converter));

Expand All @@ -1052,28 +1047,26 @@ Status ScalarFromJSONString(const std::shared_ptr<DataType>& type,
RETURN_NOT_OK(converter->AppendValue(json_doc));
RETURN_NOT_OK(converter->Finish(&array));
DCHECK_EQ(array->length(), 1);
return array->GetScalar(0).Value(out);
return array->GetScalar(0);
}

Status DictScalarFromJSONString(const std::shared_ptr<DataType>& type,
std::string_view index_json,
std::string_view dictionary_json,
std::shared_ptr<Scalar>* out) {
Result<std::shared_ptr<Scalar>> DictScalarFromJSONString(
const std::shared_ptr<DataType>& type, std::string_view index_json,
std::string_view dictionary_json) {
if (type->id() != Type::DICTIONARY) {
return Status::TypeError("DictScalarFromJSONString requires dictionary type, got ",
*type);
}

const auto& dictionary_type = checked_cast<const DictionaryType&>(*type);

std::shared_ptr<Scalar> index;
std::shared_ptr<Array> dictionary;
RETURN_NOT_OK(ScalarFromJSONString(dictionary_type.index_type(), index_json, &index));
ARROW_ASSIGN_OR_RAISE(auto index,
ScalarFromJSONString(dictionary_type.index_type(), index_json));
ARROW_ASSIGN_OR_RAISE(
dictionary, ArrayFromJSONString(dictionary_type.value_type(), dictionary_json));

*out = DictionaryScalar::Make(std::move(index), std::move(dictionary));
return Status::OK();
return DictionaryScalar::Make(std::move(index), std::move(dictionary));
}

} // namespace json
Expand Down
53 changes: 22 additions & 31 deletions cpp/src/arrow/json/from_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ namespace json {
/// \brief Create an Array from a JSON string
///
/// \code {.cpp}
/// std::shared_ptr<Array> array = ArrayFromJSONString(
/// int64(), "[2, 3, null, 7, 11]"
/// ).ValueOrDie();
/// Result<std::shared_ptr<Array>> maybe_array =
/// ArrayFromJSONString(int64(), "[2, 3, null, 7, 11]");
/// \endcode
ARROW_EXPORT
Result<std::shared_ptr<Array>> ArrayFromJSONString(const std::shared_ptr<DataType>&,
Expand All @@ -68,52 +67,44 @@ Result<std::shared_ptr<Array>> ArrayFromJSONString(const std::shared_ptr<DataTyp
/// \brief Create a ChunkedArray from a JSON string
///
/// \code {.cpp}
/// std::shared_ptr<ChunkedArray> chunked_array;
/// ChunkedArrayFromJSONString(
/// int64(), {R"([5, 10])", R"([null])", R"([16])"}, &chunked_array
/// );
/// Result<std::shared_ptr<ChunkedArray>> maybe_chunked_array =
/// ChunkedArrayFromJSONString(int64(), {R"([5, 10])", R"([null])", R"([16])"});
/// \endcode
ARROW_EXPORT
Status ChunkedArrayFromJSONString(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json_strings,
std::shared_ptr<ChunkedArray>* out);
Result<std::shared_ptr<ChunkedArray>> ChunkedArrayFromJSONString(
const std::shared_ptr<DataType>& type, const std::vector<std::string>& json_strings);

/// \brief Create a DictionaryArray from a JSON string
///
/// \code {.cpp}
/// std::shared_ptr<Array> array;
/// DictArrayFromJSONString(
/// dictionary(int32(), utf8()),
/// "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])",
/// &array
/// );
/// Result<std::shared_ptr<Array>> maybe_dict_array =
/// DictArrayFromJSONString(dictionary(int32(), utf8()), "[0, 1, 0, 2, 0, 3]",
/// R"(["k1", "k2", "k3", "k4"])");
/// \endcode
ARROW_EXPORT
Status DictArrayFromJSONString(const std::shared_ptr<DataType>&,
std::string_view indices_json,
std::string_view dictionary_json,
std::shared_ptr<Array>* out);
Result<std::shared_ptr<Array>> DictArrayFromJSONString(const std::shared_ptr<DataType>&,
std::string_view indices_json,
std::string_view dictionary_json);

/// \brief Create a Scalar from a JSON string
/// \code {.cpp}
/// std::shared_ptr<Scalar> scalar;
/// ScalarFromJSONString(float64(), "42", &scalar);
/// Result<std::shared_ptr<Scalar>> maybe_scalar =
/// ScalarFromJSONString(float64(), "42", &scalar);
/// \endcode
ARROW_EXPORT
Status ScalarFromJSONString(const std::shared_ptr<DataType>&, std::string_view json,
std::shared_ptr<Scalar>* out);
Result<std::shared_ptr<Scalar>> ScalarFromJSONString(const std::shared_ptr<DataType>&,
std::string_view json);

/// \brief Create a DictionaryScalar from a JSON string
/// \code {.cpp}
/// std::shared_ptr<Scalar> scalar;
/// DictScalarFromJSONString(dictionary(int32(), utf8()), "3", R"(["k1", "k2", "k3",
/// "k4"])", &scalar);
/// Result<std::shared_ptr<Scalar>> maybe_dict_scalar =
/// DictScalarFromJSONString(dictionary(int32(), utf8()), "3", R"(["k1", "k2", "k3",
/// "k4"])", &scalar);
/// \endcode
ARROW_EXPORT
Status DictScalarFromJSONString(const std::shared_ptr<DataType>&,
std::string_view index_json,
std::string_view dictionary_json,
std::shared_ptr<Scalar>* out);
Result<std::shared_ptr<Scalar>> DictScalarFromJSONString(
const std::shared_ptr<DataType>&, std::string_view index_json,
std::string_view dictionary_json);

/// @}

Expand Down
66 changes: 30 additions & 36 deletions cpp/src/arrow/json/from_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ template <typename T, typename C_TYPE = typename T::c_type>
void AssertJSONScalar(const std::shared_ptr<DataType>& type, const std::string& json,
const bool is_valid, const C_TYPE value) {
SCOPED_TRACE(json);
std::shared_ptr<Scalar> actual, expected;
std::shared_ptr<Scalar> expected;

ASSERT_OK(ScalarFromJSONString(type, json, &actual));
ASSERT_OK_AND_ASSIGN(auto actual, ScalarFromJSONString(type, json));
if (is_valid) {
ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value));
} else {
Expand Down Expand Up @@ -1471,35 +1471,33 @@ TEST(TestDictArrayFromJSON, Basics) {

TEST(TestDictArrayFromJSON, Errors) {
auto type = dictionary(int32(), utf8());
std::shared_ptr<Array> array;

ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[\"not a valid index\"]",
"[\"\"]", &array));
ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[0, 1]", "[1]",
&array)); // dict value isn't string
ASSERT_RAISES(Invalid,
DictArrayFromJSONString(type, "[\"not a valid index\"]", "[\"\"]"));
ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[0, 1]",
"[1]")); // dict value isn't string
}

TEST(TestChunkedArrayFromJSON, Basics) {
auto type = int32();
std::shared_ptr<ChunkedArray> chunked_array;
ASSERT_OK(ChunkedArrayFromJSONString(type, {}, &chunked_array));
ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArrayFromJSONString(type, {}));
ASSERT_OK(chunked_array->ValidateFull());
ASSERT_EQ(chunked_array->num_chunks(), 0);
AssertTypeEqual(type, chunked_array->type());

ASSERT_OK(ChunkedArrayFromJSONString(type, {"[1, 2]", "[3, null, 4]"}, &chunked_array));
ASSERT_OK(chunked_array->ValidateFull());
ASSERT_EQ(chunked_array->num_chunks(), 2);
ASSERT_OK_AND_ASSIGN(auto chunked_array_two,
ChunkedArrayFromJSONString(type, {"[1, 2]", "[3, null, 4]"}));
ASSERT_OK(chunked_array_two->ValidateFull());
ASSERT_EQ(chunked_array_two->num_chunks(), 2);
std::shared_ptr<Array> expected_chunk;
ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[1, 2]"));
AssertArraysEqual(*expected_chunk, *chunked_array->chunk(0), /*verbose=*/true);
AssertArraysEqual(*expected_chunk, *chunked_array_two->chunk(0), /*verbose=*/true);
ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[3, null, 4]"));
AssertArraysEqual(*expected_chunk, *chunked_array->chunk(1), /*verbose=*/true);
AssertArraysEqual(*expected_chunk, *chunked_array_two->chunk(1), /*verbose=*/true);
}

TEST(TestScalarFromJSON, Basics) {
// Sanity check for common types (not exhaustive)
std::shared_ptr<Scalar> scalar;
AssertJSONScalar<Int64Type>(int64(), "4", true, 4);
AssertJSONScalar<Int64Type>(int64(), "null", false, 0);
AssertJSONScalar<StringType, std::shared_ptr<Buffer>>(utf8(), R"("")", true,
Expand All @@ -1516,25 +1514,22 @@ TEST(TestScalarFromJSON, Basics) {
AssertJSONScalar<BooleanType, bool>(boolean(), "1", true, true);
AssertJSONScalar<DoubleType>(float64(), "1.0", true, 1.0);
AssertJSONScalar<DoubleType>(float64(), "-0.0", true, -0.0);
ASSERT_OK(ScalarFromJSONString(float64(), "NaN", &scalar));
ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*scalar).value));
ASSERT_OK(ScalarFromJSONString(float64(), "Inf", &scalar));
ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*scalar).value));
ASSERT_OK_AND_ASSIGN(auto nan_scalar, ScalarFromJSONString(float64(), "NaN"));
ASSERT_TRUE(std::isnan(checked_cast<DoubleScalar&>(*nan_scalar).value));
ASSERT_OK_AND_ASSIGN(auto inf_scalar, ScalarFromJSONString(float64(), "Inf"));
ASSERT_TRUE(std::isinf(checked_cast<DoubleScalar&>(*inf_scalar).value));
}

TEST(TestScalarFromJSON, Errors) {
std::shared_ptr<Scalar> scalar;
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[0]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[9223372036854775808]", &scalar));
ASSERT_RAISES(Invalid,
ScalarFromJSONString(int64(), "[-9223372036854775809]", &scalar));
ASSERT_RAISES(Invalid,
ScalarFromJSONString(uint64(), "[18446744073709551616]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[-1]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "[]", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "0.0", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "\"true\"", &scalar));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[0]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[9223372036854775808]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[-9223372036854775809]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[18446744073709551616]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[-1]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "0"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "[]"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "0.0"));
ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "\"true\""));
}

TEST(TestDictScalarFromJSONString, Basics) {
Expand All @@ -1553,12 +1548,11 @@ TEST(TestDictScalarFromJSONString, Basics) {

TEST(TestDictScalarFromJSONString, Errors) {
auto type = dictionary(int32(), utf8());
std::shared_ptr<Scalar> scalar;

ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "\"not a valid index\"", "[\"\"]",
&scalar));
ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "0", "[1]",
&scalar)); // dict value isn't string
ASSERT_RAISES(Invalid,
DictScalarFromJSONString(type, "\"not a valid index\"", "[\"\"]"));
ASSERT_RAISES(Invalid,
DictScalarFromJSONString(type, "0", "[1]")); // dict value isn't string
}

} // namespace json
Expand Down
14 changes: 6 additions & 8 deletions cpp/src/arrow/testing/gtest_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -387,15 +387,14 @@ std::shared_ptr<Array> ArrayFromJSON(const std::shared_ptr<DataType>& type,
std::shared_ptr<Array> DictArrayFromJSON(const std::shared_ptr<DataType>& type,
std::string_view indices_json,
std::string_view dictionary_json) {
std::shared_ptr<Array> out;
ABORT_NOT_OK(json::DictArrayFromJSONString(type, indices_json, dictionary_json, &out));
EXPECT_OK_AND_ASSIGN(
auto out, json::DictArrayFromJSONString(type, indices_json, dictionary_json));
return out;
}

std::shared_ptr<ChunkedArray> ChunkedArrayFromJSON(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& json) {
std::shared_ptr<ChunkedArray> out;
ABORT_NOT_OK(json::ChunkedArrayFromJSONString(type, json, &out));
EXPECT_OK_AND_ASSIGN(auto out, json::ChunkedArrayFromJSONString(type, json));
return out;
}

Expand All @@ -411,16 +410,15 @@ std::shared_ptr<RecordBatch> RecordBatchFromJSON(const std::shared_ptr<Schema>&

std::shared_ptr<Scalar> ScalarFromJSON(const std::shared_ptr<DataType>& type,
std::string_view json) {
std::shared_ptr<Scalar> out;
ABORT_NOT_OK(json::ScalarFromJSONString(type, json, &out));
EXPECT_OK_AND_ASSIGN(auto out, json::ScalarFromJSONString(type, json));
return out;
}

std::shared_ptr<Scalar> DictScalarFromJSON(const std::shared_ptr<DataType>& type,
std::string_view index_json,
std::string_view dictionary_json) {
std::shared_ptr<Scalar> out;
ABORT_NOT_OK(json::DictScalarFromJSONString(type, index_json, dictionary_json, &out));
EXPECT_OK_AND_ASSIGN(auto out,
json::DictScalarFromJSONString(type, index_json, dictionary_json));
return out;
}

Expand Down
16 changes: 6 additions & 10 deletions python/pyarrow/src/arrow/python/gdb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,8 @@ void TestSession() {
ExtensionScalar extension_scalar_null{extension_scalar.value, extension_scalar_type,
/*is_valid=*/false};

std::shared_ptr<Scalar> heap_map_scalar;
ARROW_CHECK_OK(ScalarFromJSONString(map(utf8(), int32()), R"([["a", 5], ["b", 6]])",
&heap_map_scalar));
auto heap_map_scalar =
*ScalarFromJSONString(map(utf8(), int32()), R"([["a", 5], ["b", 6]])");
auto heap_map_scalar_null = MakeNullScalar(heap_map_scalar->type);

// Array and ArrayData
Expand Down Expand Up @@ -479,13 +478,10 @@ void TestSession() {
key_value_metadata({"key1", "key2", "key3"}, {"value1", "value2", "value3"}));

// Table
ChunkedArrayVector table_columns{2};
ARROW_CHECK_OK(
ChunkedArrayFromJSONString(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0]));
ARROW_CHECK_OK(ChunkedArrayFromJSONString(
utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"},
&table_columns[1]));
auto table = Table::Make(batch_schema, table_columns);
auto col1 = ChunkedArrayFromJSONString(int32(), {"[1, 2, 3]", "[4, 5]"});
auto col2 = ChunkedArrayFromJSONString(
utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"});
auto table = Table::Make(batch_schema, {*col1, *col2});

// Datum
Datum empty_datum{};
Expand Down
Loading