From e6b82677057510a5c47200785489a46260bf6970 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 18 Apr 2025 11:16:25 -0700 Subject: [PATCH 01/33] Move core *FromJSON helpers from ipc to util NS This moves the following functions from the IPC namespace to util to make it clear these are useful outside of their use in Arrow IPC. - ArrayFromJSON - ChunkedArrayFromJSON - DictArrayFromJSON - ScalarFromJSON - DictScalarFromJSON --- cpp/src/arrow/CMakeLists.txt | 12 +- cpp/src/arrow/acero/hash_join_node_test.cc | 173 ++++--- cpp/src/arrow/api.h | 1 + cpp/src/arrow/c/bridge_benchmark.cc | 6 +- cpp/src/arrow/c/bridge_test.cc | 488 +++++++++--------- .../arrow/compute/kernels/vector_hash_test.cc | 2 - cpp/src/arrow/dataset/test_util_internal.h | 4 +- cpp/src/arrow/ipc/CMakeLists.txt | 1 - cpp/src/arrow/ipc/api.h | 2 +- cpp/src/arrow/ipc/generate_fuzz_corpus.cc | 4 +- cpp/src/arrow/testing/gtest_util.cc | 30 +- cpp/src/arrow/testing/matchers.h | 4 +- cpp/src/arrow/util/CMakeLists.txt | 4 + .../{ipc/json_simple.cc => util/from_json.cc} | 10 +- .../{ipc/json_simple.h => util/from_json.h} | 8 +- .../from_json_test.cc} | 13 +- python/pyarrow/src/arrow/python/gdb.cc | 8 +- python/pyarrow/tests/extensions.pyx | 4 +- 18 files changed, 402 insertions(+), 372 deletions(-) rename cpp/src/arrow/{ipc/json_simple.cc => util/from_json.cc} (99%) rename cpp/src/arrow/{ipc/json_simple.h => util/from_json.h} (95%) rename cpp/src/arrow/{ipc/json_simple_test.cc => util/from_json_test.cc} (99%) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 7bfdc332f14..1aa43c68fd1 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -551,6 +551,10 @@ if(ARROW_HAVE_NEON) list(APPEND ARROW_UTIL_SRCS util/bpacking_neon.cc) endif() +if(ARROW_JSON) + list(APPEND ARROW_UTIL_SRCS util/from_json.cc) +endif() + if(ARROW_WITH_BROTLI) list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc) endif() @@ -620,6 +624,11 @@ if(ARROW_WITH_OPENTELEMETRY) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) endforeach() endif() +if(ARROW_WITH_RAPIDJSON) + foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) + target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE RapidJSON) + endforeach() +endif() if(ARROW_WITH_ZLIB) foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS}) target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ZLIB::ZLIB) @@ -914,9 +923,6 @@ if(ARROW_IPC) ipc/options.cc ipc/reader.cc ipc/writer.cc) - if(ARROW_JSON) - list(APPEND ARROW_IPC_SRCS ipc/json_simple.cc) - endif() arrow_add_object_library(ARROW_IPC ${ARROW_IPC_SRCS}) foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS}) target_link_libraries(${ARROW_IPC_TARGET} PRIVATE arrow::flatbuffers) diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 654fd59c45d..f4c6ab895c5 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -1397,16 +1397,16 @@ TEST(HashJoin, Dictionary) { TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::EQ, parallel, // Input - ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), DictArrayFromJSON(int8_utf8, R"([4, 2, 3, 0])", R"(["p", "q", "r", null, "r"])"), - ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), DictArrayFromJSON(int16_utf8, R"([0, 1, 0, 2])", R"(["r", null, "r", "q"])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), DictArrayFromJSON(int8_utf8, R"([4, 4, 2, 3, 0, null])", R"(["p", "q", "r", null, "r"])"), - ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), DictArrayFromJSON(int16_utf8, R"([0, 1, 2, 2, null, 0])", R"(["r", null, "r", "q"])"), 1, swap_sides); @@ -1421,15 +1421,16 @@ TEST(HashJoin, Dictionary) { auto l_key_dict_type = dict_types[rng.from_range(0, 7)]; auto r_key_dict_type = dict_types[rng.from_range(0, 7)]; - auto l_key = l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 1])", - R"(["b", null, "a"])") - : ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); - auto l_payload = ArrayFromJSON(utf8(), R"(["x", "y", "z", "y"])"); - auto r_key = r_key_dict - ? DictArrayFromJSON(int16_utf8, R"([1, 0, null, 1, 2])", - R"([null, "b", "c"])") - : ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); - auto r_payload = ArrayFromJSON(utf8(), R"(["p", "r", "p", "q", "s"])"); + auto l_key = l_key_dict + ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 1])", + R"(["b", null, "a"])") + : arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); + auto l_payload = arrow::ArrayFromJSON(utf8(), R"(["x", "y", "z", "y"])"); + auto r_key = + r_key_dict ? DictArrayFromJSON(int16_utf8, R"([1, 0, null, 1, 2])", + R"([null, "b", "c"])") + : arrow::ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); + auto r_payload = arrow::ArrayFromJSON(utf8(), R"(["p", "r", "p", "q", "s"])"); // IS comparison function (null is equal to null when matching keys) TestHashJoinDictionaryHelper( @@ -1437,18 +1438,20 @@ TEST(HashJoin, Dictionary) { // Input l_key, l_payload, r_key, r_payload, // Expected - l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, 1, + l_key_dict + ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, 1, null])", - R"(["b", null, "a"])") - : ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, + R"(["b", null, "a"])") + : arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, null])"), - ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", "y", null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", "y", null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, 1])", R"(["b", "c"])") - : ArrayFromJSON(utf8(), R"([null, null, "b", "b", null, null, "c"])"), - ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r", "p", "s"])"), 1, + : arrow::ArrayFromJSON(utf8(), + R"([null, null, "b", "b", null, null, "c"])"), + arrow::ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r", "p", "s"])"), 1, swap_sides); // EQ comparison function (null is not matching null) @@ -1460,17 +1463,19 @@ TEST(HashJoin, Dictionary) { l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, null, null, null])", R"(["b", null, "a"])") - : ArrayFromJSON( + : arrow::ArrayFromJSON( utf8(), R"(["a", "a", "b", "b", null, null, null, null])"), - ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", null, null, null])"), + arrow::ArrayFromJSON(utf8(), + R"(["x", "y", "z", "z", "y", null, null, null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, null, 1])", R"(["b", "c"])") - : ArrayFromJSON(utf8(), - R"([null, null, "b", "b", null, null, null, "c"])"), - ArrayFromJSON(utf8(), R"([null, null, "p", "q", null, "r", "p", "s"])"), 3, - swap_sides); + : arrow::ArrayFromJSON( + utf8(), R"([null, null, "b", "b", null, null, null, "c"])"), + arrow::ArrayFromJSON(utf8(), + R"([null, null, "p", "q", null, "r", "p", "s"])"), + 3, swap_sides); } } } @@ -1557,17 +1562,17 @@ TEST(HashJoin, Scalars) { TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::EQ, false /*parallel*/, // Input - ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), use_scalar_dict ? DictScalarFromJSON(int16_utf8, "1", R"(["z", "x", "y"])") : ScalarFromJSON(utf8(), "\"x\""), - ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), use_scalar_dict ? DictScalarFromJSON(int32_utf8, "0", R"(["z", "x", "y"])") : ScalarFromJSON(utf8(), "\"z\""), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), - ArrayFromJSON(utf8(), R"(["x", "x", "x", "x", "x", null])"), - ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), - ArrayFromJSON(utf8(), R"(["z", "z", "z", "z", null, "z"])"), 1, + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "x", "x", "x", "x", null])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["z", "z", "z", "z", null, "z"])"), 1, false /*swap sides*/); } @@ -1579,14 +1584,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a", null, null])"), - ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), - ArrayFromJSON(utf8(), R"(["a", "a", null, "b"])"), - ArrayFromJSON(utf8(), R"(["p", "p", "q", "r"])"), 2, swap_sides); + arrow::ArrayFromJSON(utf8(), R"(["a", "a", null, null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "p", "q", "r"])"), 2, swap_sides); } } @@ -1598,27 +1603,27 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int16_utf8, "2", R"(["a", "b", null])") : ScalarFromJSON(utf8(), "null"), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"([null, null, null, null, null])"), - ArrayFromJSON(utf8(), R"(["x", "y", null, null, null])"), - ArrayFromJSON(utf8(), R"([null, null, "a", null, "b"])"), - ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r"])"), 3, swap_sides); + arrow::ArrayFromJSON(utf8(), R"([null, null, null, null, null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null, null])"), + arrow::ArrayFromJSON(utf8(), R"([null, null, "a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r"])"), 3, swap_sides); TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::IS, false /*parallel*/, // Input use_scalar_dict ? DictScalarFromJSON(int16_utf8, "null", R"(["a", "b", null])") : ScalarFromJSON(utf8(), "null"), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"([null, null, null, null])"), - ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), - ArrayFromJSON(utf8(), R"([null, null, "a", "b"])"), - ArrayFromJSON(utf8(), R"(["q", "q", "p", "r"])"), 2, swap_sides); + arrow::ArrayFromJSON(utf8(), R"([null, null, null, null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), + arrow::ArrayFromJSON(utf8(), R"([null, null, "a", "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["q", "q", "p", "r"])"), 2, swap_sides); } } @@ -1630,12 +1635,13 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - ArrayFromJSON(utf8(), R"(["x", "y"])"), ArrayFromJSON(utf8(), R"([])"), - ArrayFromJSON(utf8(), R"([])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"([])"), arrow::ArrayFromJSON(utf8(), R"([])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"([null, null])"), - ArrayFromJSON(utf8(), R"([null, null])"), 0, swap_sides); + arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"([null, null])"), + arrow::ArrayFromJSON(utf8(), R"([null, null])"), 0, swap_sides); } } @@ -1647,14 +1653,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int32_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), DictArrayFromJSON(int32_utf8, R"([2, 2, 1])", R"(["b", null, "a"])"), - ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), - ArrayFromJSON(utf8(), R"(["x", "x", "y", "y", null])"), - ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), - ArrayFromJSON(utf8(), R"(["p", "q", "p", "q", "r"])"), 1, swap_sides); + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "x", "y", "y", null])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "p", "q", "r"])"), 1, swap_sides); } } @@ -1666,13 +1672,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["p", "p"])"), - 2, swap_sides); + arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), + arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), + arrow::ArrayFromJSON(utf8(), R"(["p", "p"])"), 2, swap_sides); } } } @@ -1681,15 +1688,15 @@ TEST(HashJoin, DictNegative) { // For dictionary keys, all batches must share a single dictionary. // Eventually, differing dictionaries will be unified and indices transposed // during encoding to relieve this restriction. - const auto dictA = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); - const auto dictB = ArrayFromJSON(utf8(), R"(["different", "dictionary"])"); + const auto dictA = arrow::ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); + const auto dictB = arrow::ArrayFromJSON(utf8(), R"(["different", "dictionary"])"); - Datum datumFirst = Datum( - *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); - Datum datumSecondA = Datum( - *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); - Datum datumSecondB = Datum( - *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); + Datum datumFirst = Datum(*DictionaryArray::FromArrays( + arrow::ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); + Datum datumSecondA = Datum(*DictionaryArray::FromArrays( + arrow::ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); + Datum datumSecondB = Datum(*DictionaryArray::FromArrays( + arrow::ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); for (int i = 0; i < 4; ++i) { BatchesWithSchema l, r; @@ -1788,9 +1795,9 @@ void TestSimpleJoinHelper(BatchesWithSchema input_left, BatchesWithSchema input_ TEST(HashJoin, ExtensionTypesSwissJoin) { // For simpler types swiss join will be used. auto ext_arr = ExampleUuid(); - auto l_int_arr = ArrayFromJSON(int32(), "[1, 2, 3, 4]"); - auto l_int_arr2 = ArrayFromJSON(int32(), "[4, 5, 6, 7]"); - auto r_int_arr = ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); + auto l_int_arr = arrow::ArrayFromJSON(int32(), "[1, 2, 3, 4]"); + auto l_int_arr2 = arrow::ArrayFromJSON(int32(), "[4, 5, 6, 7]"); + auto r_int_arr = arrow::ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); BatchesWithSchema input_left; ASSERT_OK_AND_ASSIGN(ExecBatch left_batches, @@ -1818,9 +1825,9 @@ TEST(HashJoin, ExtensionTypesHashJoin) { // Swiss join doesn't support dictionaries so HashJoin will be used. auto dict_type = dictionary(int64(), int8()); auto ext_arr = ExampleUuid(); - auto l_int_arr = ArrayFromJSON(int32(), "[1, 2, 3, 4]"); - auto l_int_arr2 = ArrayFromJSON(int32(), "[4, 5, 6, 7]"); - auto r_int_arr = ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); + auto l_int_arr = arrow::ArrayFromJSON(int32(), "[1, 2, 3, 4]"); + auto l_int_arr2 = arrow::ArrayFromJSON(int32(), "[4, 5, 6, 7]"); + auto r_int_arr = arrow::ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); auto l_dict_array = DictArrayFromJSON(dict_type, R"([2, 0, 1, null])", R"([null, 0, 1])"); diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index ac568a00eed..2a0bc345211 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -38,6 +38,7 @@ #include "arrow/table_builder.h" // IWYU pragma: export #include "arrow/tensor.h" // IWYU pragma: export #include "arrow/type.h" // IWYU pragma: export +#include "arrow/util/from_json.h" // IWYU pragma: export #include "arrow/util/key_value_metadata.h" // IWYU pragma: export #include "arrow/visit_array_inline.h" // IWYU pragma: export #include "arrow/visit_scalar_inline.h" // IWYU pragma: export diff --git a/cpp/src/arrow/c/bridge_benchmark.cc b/cpp/src/arrow/c/bridge_benchmark.cc index 85e091704bf..0a4858b65fc 100644 --- a/cpp/src/arrow/c/bridge_benchmark.cc +++ b/cpp/src/arrow/c/bridge_benchmark.cc @@ -22,10 +22,10 @@ #include "arrow/array.h" #include "arrow/c/bridge.h" #include "arrow/c/helpers.h" -#include "arrow/ipc/json_simple.h" #include "arrow/record_batch.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" +#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" namespace arrow::benchmarks { @@ -79,7 +79,7 @@ static void ExportSchema(benchmark::State& state) { // NOLINT non-const referen static void ExportArray(benchmark::State& state) { // NOLINT non-const reference struct ArrowArray c_export; - auto array = ArrayFromJSON(utf8(), R"(["foo", "bar", null])"); + auto array = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", null])"); for (auto _ : state) { ABORT_NOT_OK(::arrow::ExportArray(*array, &c_export)); @@ -123,7 +123,7 @@ static void ExportImportSchema(benchmark::State& state) { // NOLINT non-const r static void ExportImportArray(benchmark::State& state) { // NOLINT non-const reference struct ArrowArray c_export; - auto array = ArrayFromJSON(utf8(), R"(["foo", "bar", null])"); + auto array = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", null])"); auto type = array->type(); for (auto _ : state) { diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 5848dd0b55b..5e2e6ea313f 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -31,7 +31,6 @@ #include "arrow/c/bridge.h" #include "arrow/c/helpers.h" #include "arrow/c/util_internal.h" -#include "arrow/ipc/json_simple.h" #include "arrow/memory_pool.h" #include "arrow/testing/builder.h" #include "arrow/testing/extension_type.h" @@ -42,6 +41,7 @@ #include "arrow/util/binary_view_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/endian.h" +#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging_internal.h" #include "arrow/util/macros.h" @@ -923,7 +923,9 @@ TEST_F(TestArrayExport, Primitive) { } TEST_F(TestArrayExport, PrimitiveSliced) { - auto factory = []() { return ArrayFromJSON(int16(), "[1, 2, null, -3]")->Slice(1, 2); }; + auto factory = []() { + return arrow::ArrayFromJSON(int16(), "[1, 2, null, -3]")->Slice(1, 2); + }; TestPrimitive(factory); } @@ -1004,15 +1006,16 @@ TEST_F(TestArrayExport, List) { TEST_F(TestArrayExport, ListSliced) { { auto factory = []() { - return ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") + return arrow::ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") ->Slice(1, 2); }; TestNested(factory); } { auto factory = []() { - auto values = ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); - auto offsets = ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->Slice(2, 4); + auto values = + arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); + auto offsets = arrow::ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->Slice(2, 4); return ListArray::FromArrays(*offsets, *values); }; TestNested(factory); @@ -1030,16 +1033,18 @@ TEST_F(TestArrayExport, ListView) { TEST_F(TestArrayExport, ListViewSliced) { { auto factory = []() { - return ArrayFromJSON(list_view(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") + return arrow::ArrayFromJSON(list_view(int8()), + "[[1, 2], [3, null], [4, 5, 6], null]") ->Slice(1, 2); }; TestNested(factory); } { auto factory = []() { - auto values = ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); - auto offsets = ArrayFromJSON(int32(), "[5, 2, 0, 3]")->Slice(1, 2); - auto sizes = ArrayFromJSON(int32(), "[2, 3, 6, 1]")->Slice(1, 2); + auto values = + arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); + auto offsets = arrow::ArrayFromJSON(int32(), "[5, 2, 0, 3]")->Slice(1, 2); + auto sizes = arrow::ArrayFromJSON(int32(), "[2, 3, 6, 1]")->Slice(1, 2); return ListViewArray::FromArrays(*offsets, *sizes, *values); }; TestNested(factory); @@ -1076,7 +1081,7 @@ TEST_F(TestArrayExport, Union) { Result> REEFromJSON(const std::shared_ptr& ree_type, const std::string& json) { auto ree_type_ptr = checked_cast(ree_type.get()); - auto array = ArrayFromJSON(ree_type_ptr->value_type(), json); + auto array = arrow::ArrayFromJSON(ree_type_ptr->value_type(), json); ARROW_ASSIGN_OR_RAISE( auto datum, RunEndEncode(array, compute::RunEndEncodeOptions{ree_type_ptr->run_end_type()})); @@ -1105,8 +1110,8 @@ TEST_F(TestArrayExport, RunEndEncodedSliced) { TEST_F(TestArrayExport, Dictionary) { { auto factory = []() { - auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]"); + auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = arrow::ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -1114,8 +1119,9 @@ TEST_F(TestArrayExport, Dictionary) { } { auto factory = []() { - auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = + arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays( dictionary(indices->type(), values->type(), /*ordered=*/true), indices, values); }; @@ -1123,13 +1129,14 @@ TEST_F(TestArrayExport, Dictionary) { } { auto factory = []() -> Result> { - auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = + arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1160,8 +1167,8 @@ TEST_F(TestArrayExport, MoveNested) { TEST_F(TestArrayExport, MoveDictionary) { { auto factory = []() { - auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -1169,13 +1176,14 @@ TEST_F(TestArrayExport, MoveDictionary) { } { auto factory = []() -> Result> { - auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = + arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1196,13 +1204,14 @@ TEST_F(TestArrayExport, MoveChild) { /*child_id=*/1); { auto factory = []() -> Result> { - auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = + arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1223,7 +1232,7 @@ TEST_F(TestArrayExport, ExportArrayAndType) { SchemaExportGuard schema_guard(&c_schema); ArrayExportGuard array_guard(&c_array); - auto array = ArrayFromJSON(int8(), "[1, 2, 3]"); + auto array = arrow::ArrayFromJSON(int8(), "[1, 2, 3]"); ASSERT_OK(ExportArray(*array, &c_array, &c_schema)); const ArrayData& data = *array->data(); array.reset(); @@ -1242,8 +1251,8 @@ TEST_F(TestArrayExport, ExportRecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); schema = schema->WithMetadata(key_value_metadata(kMetadataKeys2, kMetadataValues2)); - auto arr0 = ArrayFromJSON(int16(), "[1, 2, null]"); - auto arr1 = ArrayFromJSON(boolean(), "[false, true, false]"); + auto arr0 = arrow::ArrayFromJSON(int16(), "[1, 2, null]"); + auto arr1 = arrow::ArrayFromJSON(boolean(), "[false, true, false]"); auto batch_factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; @@ -1421,7 +1430,7 @@ class TestDeviceArrayExport : public ::testing::Test { static std::function>()> JSONArrayFactory( const std::shared_ptr& mm, std::shared_ptr type, const char* json) { - return [=]() { return ToDevice(mm, *ArrayFromJSON(type, json)->data()); }; + return [=]() { return ToDevice(mm, *arrow::ArrayFromJSON(type, json)->data()); }; } #ifdef ARROW_COMPUTE @@ -1524,7 +1533,7 @@ TEST_F(TestDeviceArrayExport, PrimitiveSliced) { auto mm = device->default_memory_manager(); auto factory = [=]() { - return (*ToDevice(mm, *ArrayFromJSON(int16(), "[1, 2, null, -3]")->data())) + return (*ToDevice(mm, *arrow::ArrayFromJSON(int16(), "[1, 2, null, -3]")->data())) ->Slice(1, 2); }; TestPrimitive(factory); @@ -1577,9 +1586,9 @@ TEST_F(TestDeviceArrayExport, ListSliced) { { auto factory = [=]() { - return (*ToDevice( - mm, *ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") - ->data())) + return (*ToDevice(mm, *arrow::ArrayFromJSON(list(int8()), + "[[1, 2], [3, null], [4, 5, 6], null]") + ->data())) ->Slice(1, 2); }; TestNested(factory); @@ -1587,11 +1596,13 @@ TEST_F(TestDeviceArrayExport, ListSliced) { { auto factory = [=]() { auto values = - (*ToDevice(mm, - *ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) + (*ToDevice( + mm, + *arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) ->Slice(1, 6); - auto offsets = (*ToDevice(mm, *ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->data())) - ->Slice(2, 4); + auto offsets = + (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->data())) + ->Slice(2, 4); return ListArray::FromArrays(*offsets, *values); }; TestNested(factory); @@ -1614,8 +1625,8 @@ TEST_F(TestDeviceArrayExport, ListViewSliced) { { auto factory = [=]() { - return (*ToDevice(mm, *ArrayFromJSON(list_view(int8()), - "[[1, 2], [3, null], [4, 5, 6], null]") + return (*ToDevice(mm, *arrow::ArrayFromJSON(list_view(int8()), + "[[1, 2], [3, null], [4, 5, 6], null]") ->data())) ->Slice(1, 2); }; @@ -1624,13 +1635,15 @@ TEST_F(TestDeviceArrayExport, ListViewSliced) { { auto factory = [=]() { auto values = - (*ToDevice(mm, - *ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) + (*ToDevice( + mm, + *arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) ->Slice(1, 6); auto offsets = - (*ToDevice(mm, *ArrayFromJSON(int32(), "[5, 2, 0, 3]")->data()))->Slice(1, 2); - auto sizes = - (*ToDevice(mm, *ArrayFromJSON(int32(), "[2, 3, 6, 1]")->data()))->Slice(1, 2); + (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[5, 2, 0, 3]")->data())) + ->Slice(1, 2); + auto sizes = (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[2, 3, 6, 1]")->data())) + ->Slice(1, 2); return ListViewArray::FromArrays(*offsets, *sizes, *values); }; TestNested(factory); @@ -1701,7 +1714,8 @@ TEST_F(TestDeviceArrayExport, ExportArrayAndType) { SchemaExportGuard schema_guard(&c_schema); ArrayExportGuard array_guard(&c_array.array); - auto array = ToDevice(mm, *ArrayFromJSON(int8(), "[1, 2, 3]")->data()).ValueOrDie(); + auto array = + ToDevice(mm, *arrow::ArrayFromJSON(int8(), "[1, 2, 3]")->data()).ValueOrDie(); auto sync = mm->MakeDeviceSyncEvent().ValueOrDie(); ASSERT_OK(ExportDeviceArray(*array, sync, &c_array, &c_schema)); const ArrayData& data = *array->data(); @@ -1724,9 +1738,11 @@ TEST_F(TestDeviceArrayExport, ExportRecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); schema = schema->WithMetadata(key_value_metadata(kMetadataKeys2, kMetadataValues2)); - auto arr0 = ToDevice(mm, *ArrayFromJSON(int16(), "[1, 2, null]")->data()).ValueOrDie(); - auto arr1 = ToDevice(mm, *ArrayFromJSON(boolean(), "[false, true, false]")->data()) - .ValueOrDie(); + auto arr0 = + ToDevice(mm, *arrow::ArrayFromJSON(int16(), "[1, 2, null]")->data()).ValueOrDie(); + auto arr1 = + ToDevice(mm, *arrow::ArrayFromJSON(boolean(), "[false, true, false]")->data()) + .ValueOrDie(); auto batch_factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; auto sync = mm->MakeDeviceSyncEvent().ValueOrDie(); @@ -2800,103 +2816,104 @@ class TestArrayImport : public ::testing::Test { TEST_F(TestArrayImport, Primitive) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(ArrayFromJSON(int8(), "[1, 2, 3]")); + CheckImport(arrow::ArrayFromJSON(int8(), "[1, 2, 3]")); FillPrimitive(5, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(ArrayFromJSON(uint8(), "[1, 2, 3, 4, 5]")); + CheckImport(arrow::ArrayFromJSON(uint8(), "[1, 2, 3, 4, 5]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - CheckImport(ArrayFromJSON(int16(), "[513, 1027, 1541]")); + CheckImport(arrow::ArrayFromJSON(int16(), "[513, 1027, 1541]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - CheckImport(ArrayFromJSON(uint16(), "[513, 1027, 1541]")); + CheckImport(arrow::ArrayFromJSON(uint16(), "[513, 1027, 1541]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_32); - CheckImport(ArrayFromJSON(int32(), "[67305985, 134678021]")); + CheckImport(arrow::ArrayFromJSON(int32(), "[67305985, 134678021]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_32); - CheckImport(ArrayFromJSON(uint32(), "[67305985, 134678021]")); + CheckImport(arrow::ArrayFromJSON(uint32(), "[67305985, 134678021]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_64); - CheckImport(ArrayFromJSON(int64(), "[578437695752307201, 1157159078456920585]")); + CheckImport(arrow::ArrayFromJSON(int64(), "[578437695752307201, 1157159078456920585]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_64); - CheckImport(ArrayFromJSON(uint64(), "[578437695752307201, 1157159078456920585]")); + CheckImport( + arrow::ArrayFromJSON(uint64(), "[578437695752307201, 1157159078456920585]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(ArrayFromJSON(boolean(), "[true, false, false]")); + CheckImport(arrow::ArrayFromJSON(boolean(), "[true, false, false]")); FillPrimitive(6, 0, 0, primitive_buffers_no_nulls5); - CheckImport(ArrayFromJSON(float32(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); + CheckImport(arrow::ArrayFromJSON(float32(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); FillPrimitive(6, 0, 0, primitive_buffers_no_nulls6); - CheckImport(ArrayFromJSON(float64(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); + CheckImport(arrow::ArrayFromJSON(float64(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); // With nulls FillPrimitive(9, -1, 0, primitive_buffers_nulls1_8); - CheckImport(ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); + CheckImport(arrow::ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); FillPrimitive(9, 2, 0, primitive_buffers_nulls1_8); - CheckImport(ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); + CheckImport(arrow::ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls1_16); - CheckImport(ArrayFromJSON(int16(), "[513, null, 1541]")); + CheckImport(arrow::ArrayFromJSON(int16(), "[513, null, 1541]")); FillPrimitive(3, 1, 0, primitive_buffers_nulls1_16); - CheckImport(ArrayFromJSON(int16(), "[513, null, 1541]")); + CheckImport(arrow::ArrayFromJSON(int16(), "[513, null, 1541]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls1_8); - CheckImport(ArrayFromJSON(boolean(), "[true, null, false]")); + CheckImport(arrow::ArrayFromJSON(boolean(), "[true, null, false]")); FillPrimitive(3, 1, 0, primitive_buffers_nulls1_8); - CheckImport(ArrayFromJSON(boolean(), "[true, null, false]")); + CheckImport(arrow::ArrayFromJSON(boolean(), "[true, null, false]")); // Empty array with null data pointers FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(ArrayFromJSON(int32(), "[]")); + CheckImport(arrow::ArrayFromJSON(int32(), "[]")); } TEST_F(TestArrayImport, Temporal) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls7); - CheckImport(ArrayFromJSON(date32(), "[1234, 5678, 9012]")); + CheckImport(arrow::ArrayFromJSON(date32(), "[1234, 5678, 9012]")); FillPrimitive(3, 0, 0, date64_buffers_no_nulls8); - CheckImport(ArrayFromJSON(date64(), "[86400000, 172800000, -86400000]")); + CheckImport(arrow::ArrayFromJSON(date64(), "[86400000, 172800000, -86400000]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(ArrayFromJSON(time32(TimeUnit::SECOND), "[1234, 5678]")); + CheckImport(arrow::ArrayFromJSON(time32(TimeUnit::SECOND), "[1234, 5678]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(ArrayFromJSON(time32(TimeUnit::MILLI), "[1234, 5678]")); + CheckImport(arrow::ArrayFromJSON(time32(TimeUnit::MILLI), "[1234, 5678]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(time64(TimeUnit::MICRO), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::MICRO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(duration(TimeUnit::SECOND), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::SECOND), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(duration(TimeUnit::MILLI), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::MILLI), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(duration(TimeUnit::MICRO), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::MICRO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, 987654321]")); + CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, 987654321]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls7); - CheckImport(ArrayFromJSON(month_interval(), "[1234, 5678, 9012]")); + CheckImport(arrow::ArrayFromJSON(month_interval(), "[1234, 5678, 9012]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(ArrayFromJSON(day_time_interval(), "[[1234, 5678], [9012, 3456]]")); + CheckImport(arrow::ArrayFromJSON(day_time_interval(), "[[1234, 5678], [9012, 3456]]")); const char* json = R"(["1970-01-01","2000-02-29","1900-02-28"])"; FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls1); - CheckImport(ArrayFromJSON(timestamp(TimeUnit::SECOND), json)); + CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::SECOND), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls2); - CheckImport(ArrayFromJSON(timestamp(TimeUnit::MILLI), json)); + CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::MILLI), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls3); - CheckImport(ArrayFromJSON(timestamp(TimeUnit::MICRO), json)); + CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::MICRO), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls4); - CheckImport(ArrayFromJSON(timestamp(TimeUnit::NANO), json)); + CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::NANO), json)); // With nulls FillPrimitive(3, -1, 0, primitive_buffers_nulls7); - CheckImport(ArrayFromJSON(date32(), "[1234, null, 9012]")); + CheckImport(arrow::ArrayFromJSON(date32(), "[1234, null, 9012]")); FillPrimitive(3, -1, 0, date64_buffers_nulls8); - CheckImport(ArrayFromJSON(date64(), "[86400000, null, -86400000]")); + CheckImport(arrow::ArrayFromJSON(date64(), "[86400000, null, -86400000]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls8); - CheckImport(ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, null]")); + CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, null]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls8); - CheckImport(ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, null]")); + CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, null]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls7); - CheckImport(ArrayFromJSON(month_interval(), "[1234, null, 9012]")); + CheckImport(arrow::ArrayFromJSON(month_interval(), "[1234, null, 9012]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls7); - CheckImport(ArrayFromJSON(day_time_interval(), "[[1234, 5678], null]")); + CheckImport(arrow::ArrayFromJSON(day_time_interval(), "[[1234, 5678], null]")); FillPrimitive(3, -1, 0, timestamp_buffers_nulls1); - CheckImport(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC+2"), - R"(["1970-01-01",null,"1900-02-28"])")); + CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC+2"), + R"(["1970-01-01",null,"1900-02-28"])")); } TEST_F(TestArrayImport, Null) { @@ -2908,24 +2925,24 @@ TEST_F(TestArrayImport, Null) { c_struct_.offset = 0; c_struct_.buffers = buffers; c_struct_.n_buffers = n_buffers; - CheckImport(ArrayFromJSON(null(), "[null, null, null]")); + CheckImport(arrow::ArrayFromJSON(null(), "[null, null, null]")); } } TEST_F(TestArrayImport, PrimitiveWithOffset) { FillPrimitive(3, 0, 2, primitive_buffers_no_nulls1_8); - CheckImport(ArrayFromJSON(int8(), "[3, 4, 5]")); + CheckImport(arrow::ArrayFromJSON(int8(), "[3, 4, 5]")); FillPrimitive(3, 0, 1, primitive_buffers_no_nulls1_16); - CheckImport(ArrayFromJSON(uint16(), "[1027, 1541, 2055]")); + CheckImport(arrow::ArrayFromJSON(uint16(), "[1027, 1541, 2055]")); FillPrimitive(4, 0, 7, primitive_buffers_no_nulls1_8); - CheckImport(ArrayFromJSON(boolean(), "[false, false, true, false]")); + CheckImport(arrow::ArrayFromJSON(boolean(), "[false, false, true, false]")); // Empty array with null data pointers FillPrimitive(0, 0, 2, all_buffers_omitted); - CheckImport(ArrayFromJSON(int32(), "[]")); + CheckImport(arrow::ArrayFromJSON(int32(), "[]")); FillPrimitive(0, 0, 3, all_buffers_omitted); - CheckImport(ArrayFromJSON(boolean(), "[]")); + CheckImport(arrow::ArrayFromJSON(boolean(), "[]")); } TEST_F(TestArrayImport, NullWithOffset) { @@ -2935,18 +2952,18 @@ TEST_F(TestArrayImport, NullWithOffset) { c_struct_.offset = 5; c_struct_.n_buffers = 1; c_struct_.buffers = buffers; - CheckImport(ArrayFromJSON(null(), "[null, null, null]")); + CheckImport(arrow::ArrayFromJSON(null(), "[null, null, null]")); } TEST_F(TestArrayImport, String) { FillStringLike(4, 0, 0, string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(binary(), R"(["foo", "", "bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(binary(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, large_string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(large_utf8(), R"(["foo", "", "bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(large_utf8(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, large_string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])")); auto length = static_cast(std::size(binary_view_buffer1)); FillStringViewLike(length, 0, 0, binary_view_buffers_no_nulls1, 2); @@ -2954,186 +2971,191 @@ TEST_F(TestArrayImport, String) { // Empty array with null data pointers FillStringLike(0, 0, 0, string_buffers_omitted); - CheckImport(ArrayFromJSON(utf8(), "[]")); + CheckImport(arrow::ArrayFromJSON(utf8(), "[]")); FillStringLike(0, 0, 0, large_string_buffers_omitted); - CheckImport(ArrayFromJSON(large_binary(), "[]")); + CheckImport(arrow::ArrayFromJSON(large_binary(), "[]")); } TEST_F(TestArrayImport, StringWithOffset) { FillStringLike(3, 0, 1, string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(utf8(), R"(["", "bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])")); FillStringLike(2, 0, 2, large_string_buffers_no_nulls1); - CheckImport(ArrayFromJSON(large_utf8(), R"(["bar", "quux"])")); + CheckImport(arrow::ArrayFromJSON(large_utf8(), R"(["bar", "quux"])")); // Empty array with null data pointers FillStringLike(0, 0, 1, string_buffers_omitted); - CheckImport(ArrayFromJSON(utf8(), "[]")); + CheckImport(arrow::ArrayFromJSON(utf8(), "[]")); } TEST_F(TestArrayImport, FixedSizeBinary) { FillPrimitive(2, 0, 0, primitive_buffers_no_nulls2); - CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])")); + CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls3); - CheckImport(ArrayFromJSON(decimal128(15, 4), R"(["12345.6789", "98765.4321"])")); + CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), R"(["12345.6789", "98765.4321"])")); // Empty array with null data pointers FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), "[]")); FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(ArrayFromJSON(decimal128(15, 4), "[]")); + CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), "[]")); } TEST_F(TestArrayImport, FixedSizeBinaryWithOffset) { FillPrimitive(1, 0, 1, primitive_buffers_no_nulls2); - CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["def"])")); + CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), R"(["def"])")); FillPrimitive(1, 0, 1, primitive_buffers_no_nulls3); - CheckImport(ArrayFromJSON(decimal128(15, 4), R"(["98765.4321"])")); + CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), R"(["98765.4321"])")); // Empty array with null data pointers FillPrimitive(0, 0, 1, all_buffers_omitted); - CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), "[]")); FillPrimitive(0, 0, 1, all_buffers_omitted); - CheckImport(ArrayFromJSON(decimal128(15, 4), "[]")); + CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), "[]")); } TEST_F(TestArrayImport, List) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(5, 0, 0, list_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); + CheckImport(arrow::ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListLike(3, 1, 0, list_buffers_nulls1); - CheckImport(ArrayFromJSON(list(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); + CheckImport( + arrow::ArrayFromJSON(list(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); // Large list FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListLike(3, 0, 0, large_list_buffers_no_nulls1); CheckImport( - ArrayFromJSON(large_list(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); + arrow::ArrayFromJSON(large_list(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); // Fixed-size list FillPrimitive(AddChild(), 9, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data); - CheckImport( - ArrayFromJSON(fixed_size_list(int8(), 3), "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), + "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]")); // Empty child array with null data pointers FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillFixedSizeListLike(0, 0, 0, buffers_no_nulls_no_data); - CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), "[]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), "[]")); } TEST_F(TestArrayImport, NestedList) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(AddChild(), 5, 0, 0, list_buffers_no_nulls1); FillListLike(3, 0, 0, large_list_buffers_no_nulls1); - CheckImport(ArrayFromJSON(large_list(list(int8())), - "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); + CheckImport(arrow::ArrayFromJSON(large_list(list(int8())), + "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); FillPrimitive(AddChild(), 6, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(AddChild(), 2, 0, 0, buffers_no_nulls_no_data); FillListLike(2, 0, 0, list_buffers_no_nulls1); - CheckImport( - ArrayFromJSON(list(fixed_size_list(int8(), 3)), "[[[1, 2, 3], [4, 5, 6]], []]")); + CheckImport(arrow::ArrayFromJSON(list(fixed_size_list(int8(), 3)), + "[[[1, 2, 3], [4, 5, 6]], []]")); } TEST_F(TestArrayImport, ListWithOffset) { // Offset in child FillPrimitive(AddChild(), 8, 0, 1, primitive_buffers_no_nulls1_8); FillListLike(5, 0, 0, list_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); + CheckImport(arrow::ArrayFromJSON(list(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); FillPrimitive(AddChild(), 9, 0, 1, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data); - CheckImport( - ArrayFromJSON(fixed_size_list(int8(), 3), "[[2, 3, 4], [5, 6, 7], [8, 9, 10]]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), + "[[2, 3, 4], [5, 6, 7], [8, 9, 10]]")); // Offset in parent FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(4, 0, 1, list_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); + CheckImport(arrow::ArrayFromJSON(list(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 9, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 1, buffers_no_nulls_no_data); - CheckImport( - ArrayFromJSON(fixed_size_list(int8(), 3), "[[4, 5, 6], [7, 8, 9], [10, 11, 12]]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), + "[[4, 5, 6], [7, 8, 9], [10, 11, 12]]")); // Both FillPrimitive(AddChild(), 8, 0, 2, primitive_buffers_no_nulls1_8); FillListLike(4, 0, 1, list_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); + CheckImport(arrow::ArrayFromJSON(list(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); FillPrimitive(AddChild(), 9, 0, 2, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 1, buffers_no_nulls_no_data); - CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), - "[[6, 7, 8], [9, 10, 11], [12, 13, 14]]")); + CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), + "[[6, 7, 8], [9, 10, 11], [12, 13, 14]]")); } TEST_F(TestArrayImport, ListView) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(5, 0, 0, list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list_view(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); + CheckImport( + arrow::ArrayFromJSON(list_view(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListView(3, 1, 0, list_view_buffers_nulls1); - CheckImport( - ArrayFromJSON(list_view(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); + CheckImport(arrow::ArrayFromJSON(list_view(int16()), + "[[513, 1027], null, [1541, 2055, 2569]]")); // Large list-view FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListView(3, 0, 0, large_list_view_buffers_no_nulls1); - CheckImport( - ArrayFromJSON(large_list_view(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); + CheckImport(arrow::ArrayFromJSON(large_list_view(int16()), + "[[513, 1027], [], [1541, 2055, 2569]]")); } TEST_F(TestArrayImport, NestedListView) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(AddChild(), 5, 0, 0, list_view_buffers_no_nulls1); FillListView(3, 0, 0, large_list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(large_list_view(list_view(int8())), - "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); + CheckImport(arrow::ArrayFromJSON(large_list_view(list_view(int8())), + "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); FillPrimitive(AddChild(), 6, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(AddChild(), 2, 0, 0, buffers_no_nulls_no_data); FillListView(2, 0, 0, list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list_view(fixed_size_list(int8(), 3)), - "[[[1, 2, 3], [4, 5, 6]], []]")); + CheckImport(arrow::ArrayFromJSON(list_view(fixed_size_list(int8(), 3)), + "[[[1, 2, 3], [4, 5, 6]], []]")); } TEST_F(TestArrayImport, ListViewWithOffset) { // Offset in child FillPrimitive(AddChild(), 8, 0, 1, primitive_buffers_no_nulls1_8); FillListView(5, 0, 0, list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list_view(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); + CheckImport( + arrow::ArrayFromJSON(list_view(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); // Offset in parent FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(4, 0, 1, list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list_view(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); + CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); // Both FillPrimitive(AddChild(), 8, 0, 2, primitive_buffers_no_nulls1_8); FillListView(4, 0, 1, list_view_buffers_no_nulls1); - CheckImport(ArrayFromJSON(list_view(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); + CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); } TEST_F(TestArrayImport, Struct) { FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_16); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - auto expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), - R"([["foo", 513], ["", null], ["bar", 1541]])"); + auto expected = + arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), + R"([["foo", 513], ["", null], ["bar", 1541]])"); CheckImport(expected); FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 0, primitive_buffers_no_nulls1_16); FillStructLike(3, -1, 0, 2, buffers_nulls_no_data1); - expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), + expected = + arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), R"([["foo", 513], null, ["bar", 1541]])"); CheckImport(expected); FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 0, primitive_buffers_no_nulls1_16); FillStructLike(3, -1, 0, 2, buffers_nulls_no_data1); - expected = ArrayFromJSON( + expected = arrow::ArrayFromJSON( struct_({field("strs", utf8(), /*nullable=*/false), field("ints", uint16())}), R"([["foo", 513], null, ["bar", 1541]])"); CheckImport(expected); @@ -3187,7 +3209,7 @@ TEST_F(TestArrayImport, RunEndEncodedWithOffset) { TEST_F(TestArrayImport, SparseUnion) { auto type = sparse_union({field("strs", utf8()), field("ints", int8())}, {43, 42}); auto expected = - ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "bar"], [43, "quux"]])"); + arrow::ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "bar"], [43, "quux"]])"); FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 4, -1, 0, primitive_buffers_nulls1_8); @@ -3202,7 +3224,7 @@ TEST_F(TestArrayImport, SparseUnion) { CheckImport(expected); // Empty array with null data pointers - expected = ArrayFromJSON(type, "[]"); + expected = arrow::ArrayFromJSON(type, "[]"); FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted); FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillUnionLike(UnionMode::SPARSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false); @@ -3213,8 +3235,8 @@ TEST_F(TestArrayImport, SparseUnion) { TEST_F(TestArrayImport, DenseUnion) { auto type = dense_union({field("strs", utf8()), field("ints", int8())}, {43, 42}); - auto expected = - ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "foo"], [43, ""], [42, 3]])"); + auto expected = arrow::ArrayFromJSON( + type, R"([[42, 1], [42, null], [43, "foo"], [43, ""], [42, 3]])"); FillStringLike(AddChild(), 2, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_8); @@ -3229,7 +3251,7 @@ TEST_F(TestArrayImport, DenseUnion) { CheckImport(expected); // Empty array with null data pointers - expected = ArrayFromJSON(type, "[]"); + expected = arrow::ArrayFromJSON(type, "[]"); FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted); FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillUnionLike(UnionMode::DENSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false); @@ -3243,16 +3265,17 @@ TEST_F(TestArrayImport, StructWithOffset) { FillStringLike(AddChild(), 3, 0, 1, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 2, primitive_buffers_no_nulls1_8); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - auto expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), - R"([["", 3], ["bar", 4], ["quux", 5]])"); + auto expected = + arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), + R"([["", 3], ["bar", 4], ["quux", 5]])"); CheckImport(expected); // Parent and child FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 4, 0, 2, primitive_buffers_no_nulls1_8); FillStructLike(3, 0, 1, 2, buffers_no_nulls_no_data); - expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), - R"([["", 4], ["bar", 5], ["quux", 6]])"); + expected = arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), + R"([["", 4], ["bar", 5], ["quux", 6]])"); CheckImport(expected); } @@ -3261,7 +3284,7 @@ TEST_F(TestArrayImport, Map) { FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_8); FillStructLike(AddChild(), 5, 0, 0, 2, buffers_no_nulls_no_data); FillListLike(3, 1, 0, list_buffers_nulls1); - auto expected = ArrayFromJSON( + auto expected = arrow::ArrayFromJSON( map(utf8(), uint8()), R"([[["foo", 1], ["", 2]], null, [["bar", 3], ["quux", 4], ["xyzzy", 5]]])"); CheckImport(expected); @@ -3272,8 +3295,8 @@ TEST_F(TestArrayImport, Dictionary) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto dict_values = ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); - auto indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + auto dict_values = arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); + auto indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN( auto expected, DictionaryArray::FromArrays(dictionary(int8(), utf8()), indices, dict_values)); @@ -3295,8 +3318,8 @@ TEST_F(TestArrayImport, NestedDictionary) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto dict_values = ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6]]"); - auto indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + auto dict_values = arrow::ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6]]"); + auto indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN(auto expected, DictionaryArray::FromArrays(dictionary(int8(), list(int8())), indices, dict_values)); @@ -3307,12 +3330,12 @@ TEST_F(TestArrayImport, NestedDictionary) { FillDictionary(LastChild()); FillListLike(3, 0, 0, list_buffers_no_nulls1); - dict_values = ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); - indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + dict_values = arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); + indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN( auto dict_array, DictionaryArray::FromArrays(dictionary(int8(), utf8()), indices, dict_values)); - auto offsets = ArrayFromJSON(int32(), "[0, 2, 2, 5]"); + auto offsets = arrow::ArrayFromJSON(int32(), "[0, 2, 2, 5]"); ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *dict_array)); CheckImport(expected); } @@ -3322,16 +3345,16 @@ TEST_F(TestArrayImport, DictionaryWithOffset) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto expected = DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0]", - R"(["", "bar", "quux"])"); + auto expected = arrow::DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0]", + R"(["", "bar", "quux"])"); CheckImport(expected); FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(4, 0, 2, primitive_buffers_no_nulls4); FillDictionary(); - expected = DictArrayFromJSON(dictionary(int8(), utf8()), "[0, 1, 3, 0]", - R"(["foo", "", "bar", "quux"])"); + expected = arrow::DictArrayFromJSON(dictionary(int8(), utf8()), "[0, 1, 3, 0]", + R"(["foo", "", "bar", "quux"])"); CheckImport(expected); } @@ -3340,8 +3363,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { // smallint FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - auto expected = - ExtensionType::WrapArray(smallint(), ArrayFromJSON(int16(), "[513, 1027, 1541]")); + auto expected = ExtensionType::WrapArray( + smallint(), arrow::ArrayFromJSON(int16(), "[513, 1027, 1541]")); CheckImport(expected); // dict_extension_type @@ -3349,8 +3372,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto storage = DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0, 1, 3, 0]", - R"(["foo", "", "bar", "quux"])"); + auto storage = arrow::DictArrayFromJSON( + dictionary(int8(), utf8()), "[1, 2, 0, 1, 3, 0]", R"(["foo", "", "bar", "quux"])"); expected = ExtensionType::WrapArray(dict_extension_type(), storage); CheckImport(expected); @@ -3358,8 +3381,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { FillPrimitive(AddChild(), 3, 0, /*offset=*/0, primitive_buffers_no_nulls6); FillPrimitive(AddChild(), 3, 0, /*offset=*/3, primitive_buffers_no_nulls6); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - expected = MakeComplex128(ArrayFromJSON(float64(), "[0.0, 1.5, -2.0]"), - ArrayFromJSON(float64(), "[3.0, 4.0, 5.0]")); + expected = MakeComplex128(arrow::ArrayFromJSON(float64(), "[0.0, 1.5, -2.0]"), + arrow::ArrayFromJSON(float64(), "[3.0, 4.0, 5.0]")); CheckImport(expected); } @@ -3420,7 +3443,7 @@ TEST_F(TestArrayImport, ListViewNoError) { // Null offsets pointer FillPrimitive(AddChild(), 0, 0, 0, primitive_buffers_no_nulls1_8); FillListView(0, 0, 0, all_buffers_omitted); - CheckImport(ArrayFromJSON(list_view(int8()), "[]")); + CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[]")); } TEST_F(TestArrayImport, MapError) { @@ -3470,8 +3493,8 @@ TEST_F(TestArrayImport, RecursionError) { TEST_F(TestArrayImport, ImportRecordBatch) { auto schema = ::arrow::schema( {field("strs", utf8(), /*nullable=*/false), field("ints", uint16())}); - auto expected_strs = ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); - auto expected_ints = ArrayFromJSON(uint16(), "[513, null, 1541]"); + auto expected_strs = arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); + auto expected_ints = arrow::ArrayFromJSON(uint16(), "[513, null, 1541]"); FillStringLike(AddChild(), 3, 0, 1, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_16); @@ -3506,7 +3529,7 @@ TEST_F(TestArrayImport, ImportArrayAndType) { ArrayReleaseCallback array_cb(&c_struct_); ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_struct_, &schema_builder.c_struct_)); - AssertArraysEqual(*array, *ArrayFromJSON(int8(), "[1, 2, 3]")); + AssertArraysEqual(*array, *arrow::ArrayFromJSON(int8(), "[1, 2, 3]")); schema_cb.AssertCalled(); // was released array_cb.AssertNotCalled(); ASSERT_TRUE(ArrowArrayIsReleased(&c_struct_)); // was moved @@ -3531,8 +3554,8 @@ TEST_F(TestArrayImport, ImportArrayAndTypeError) { TEST_F(TestArrayImport, ImportRecordBatchAndSchema) { // Test importing both record batch and its schema at the same time auto schema = ::arrow::schema({field("strs", utf8()), field("ints", uint16())}); - auto expected_strs = ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); - auto expected_ints = ArrayFromJSON(uint16(), "[513, null, 1541]"); + auto expected_strs = arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); + auto expected_ints = arrow::ArrayFromJSON(uint16(), "[513, null, 1541]"); SchemaStructBuilder schema_builder; schema_builder.FillPrimitive(schema_builder.AddChild(), "u", "strs"); @@ -3809,7 +3832,7 @@ class TestArrayRoundtrip : public ::testing::Test { void SetUp() override { pool_ = default_memory_pool(); } static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { - return [=]() { return ArrayFromJSON(type, json); }; + return [=]() { return arrow::ArrayFromJSON(type, json); }; } static ArrayFactory SlicedArrayFactory(ArrayFactory factory) { @@ -3962,7 +3985,7 @@ TEST_F(TestArrayRoundtrip, BinaryViewMultipleBuffers) { TEST_F(TestArrayRoundtrip, UnknownNullCount) { TestWithArrayFactory([]() -> Result> { - auto arr = ArrayFromJSON(int32(), "[0, 1, 2]"); + auto arr = arrow::ArrayFromJSON(int32(), "[0, 1, 2]"); if (arr->null_bitmap()) { return Status::Invalid( "Failed precondition: " @@ -3998,7 +4021,7 @@ TEST_F(TestArrayRoundtrip, ListView) { std::shared_ptr sizes; ArrayFromVector(std::vector{2, 2, 3, 1, 2, 0}, &sizes); - auto values = ArrayFromJSON(int8(), "[4, 5, 6, null, 8, null]"); + auto values = arrow::ArrayFromJSON(int8(), "[4, 5, 6, null, 8, null]"); auto result = ListViewArray::FromArrays(*offsets, *sizes, *values, pool_); if (result.ok()) { RETURN_NOT_OK((*result)->ValidateFull()); @@ -4085,9 +4108,10 @@ TEST_F(TestArrayRoundtrip, RunEndEncoded) { auto ree_array, RunEndEncodedArray::Make( run_end_encoded(int64(), list(utf8())), 8, - ArrayFromJSON(int64(), "[1, 3, 4, 7, 8]"), - ArrayFromJSON(list(utf8()), - R"([["abc", "def"], ["efg"], [], null, ["efg", "hij"]])"))); + arrow::ArrayFromJSON(int64(), "[1, 3, 4, 7, 8]"), + arrow::ArrayFromJSON( + list(utf8()), + R"([["abc", "def"], ["efg"], [], null, ["efg", "hij"]])"))); RETURN_NOT_OK(ree_array->ValidateFull()); return ree_array; }; @@ -4099,8 +4123,8 @@ TEST_F(TestArrayRoundtrip, RunEndEncoded) { TEST_F(TestArrayRoundtrip, Dictionary) { { auto factory = []() { - auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -4109,8 +4133,9 @@ TEST_F(TestArrayRoundtrip, Dictionary) { } { auto factory = []() { - auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = + arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays( dictionary(indices->type(), values->type(), /*ordered=*/true), indices, values); }; @@ -4174,8 +4199,8 @@ TEST_F(TestArrayRoundtrip, UnregisteredExtension) { TEST_F(TestArrayRoundtrip, RecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); - auto arr0 = ArrayFromJSON(int16(), "[1, 2, null]"); - auto arr1 = ArrayFromJSON(boolean(), "[false, true, false]"); + auto arr0 = arrow::ArrayFromJSON(int16(), "[1, 2, null]"); + auto arr1 = arrow::ArrayFromJSON(boolean(), "[false, true, false]"); { auto factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; @@ -4249,7 +4274,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test { static ArrayFactory JSONArrayFactory(const std::shared_ptr& mm, std::shared_ptr type, const char* json) { - return [=]() { return ToDevice(mm, *ArrayFromJSON(type, json)->data()); }; + return [=]() { return ToDevice(mm, *arrow::ArrayFromJSON(type, json)->data()); }; } static ArrayFactory SlicedArrayFactory(ArrayFactory factory) { @@ -4490,8 +4515,8 @@ TEST_F(TestArrayStreamExport, Empty) { TEST_F(TestArrayStreamExport, Simple) { auto schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches( - schema, {ArrayFromJSON(int32(), "[1, 2]"), ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches(schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, schema)); struct ArrowArrayStream c_stream; @@ -4509,8 +4534,8 @@ TEST_F(TestArrayStreamExport, Simple) { TEST_F(TestArrayStreamExport, ArrayLifetime) { auto schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches( - schema, {ArrayFromJSON(int32(), "[1, 2]"), ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches(schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, schema)); struct ArrowArrayStream c_stream; @@ -4590,9 +4615,10 @@ TEST_F(TestArrayStreamExport, ChunkedArrayExportEmpty) { } TEST_F(TestArrayStreamExport, ChunkedArrayExport) { - ASSERT_OK_AND_ASSIGN(auto chunked_array, - ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"), - ArrayFromJSON(int32(), "[4, 5, null]")})); + ASSERT_OK_AND_ASSIGN( + auto chunked_array, + ChunkedArray::Make({arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")})); struct ArrowArrayStream c_stream; struct ArrowSchema c_schema; @@ -4713,8 +4739,9 @@ class TestArrayStreamRoundtrip : public BaseArrayStreamTest { TEST_F(TestArrayStreamRoundtrip, Simple) { auto orig_schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches(orig_schema, {ArrayFromJSON(int32(), "[1, 2]"), - ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = + MakeBatches(orig_schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema)); @@ -4730,8 +4757,9 @@ TEST_F(TestArrayStreamRoundtrip, Simple) { TEST_F(TestArrayStreamRoundtrip, CloseEarly) { auto orig_schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches(orig_schema, {ArrayFromJSON(int32(), "[1, 2]"), - ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = + MakeBatches(orig_schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema)); @@ -4786,9 +4814,9 @@ TEST_F(TestArrayStreamRoundtrip, SchemaError) { } TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtrip) { - ASSERT_OK_AND_ASSIGN(auto src, - ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"), - ArrayFromJSON(int32(), "[4, 5, null]")})); + ASSERT_OK_AND_ASSIGN( + auto src, ChunkedArray::Make({arrow::ArrayFromJSON(int32(), "[1, 2]"), + arrow::ArrayFromJSON(int32(), "[4, 5, null]")})); Roundtrip(src, [&](const std::shared_ptr& dst) { AssertTypeEqual(*dst->type(), *src->type()); @@ -4909,10 +4937,10 @@ TEST_F(TestArrayDeviceStreamExport, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(schema, {arr1, arr2}); @@ -4938,10 +4966,10 @@ TEST_F(TestArrayDeviceStreamExport, ArrayLifetime) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(schema, {arr1, arr2}); @@ -5041,10 +5069,10 @@ TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExport) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({arr1, arr2})); @@ -5217,10 +5245,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); @@ -5242,10 +5270,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, CloseEarly) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); @@ -5294,10 +5322,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtrip) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({arr1, arr2})); @@ -5360,10 +5388,10 @@ TEST_F(TestAsyncDeviceArrayStreamRoundTrip, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN(auto arr2, - ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN( + auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); diff --git a/cpp/src/arrow/compute/kernels/vector_hash_test.cc b/cpp/src/arrow/compute/kernels/vector_hash_test.cc index 0a966a66f4f..b0fa296e007 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash_test.cc @@ -43,8 +43,6 @@ #include "arrow/compute/api.h" #include "arrow/compute/kernels/test_util_internal.h" -#include "arrow/ipc/json_simple.h" - namespace arrow { using internal::checked_cast; diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index ee73ebc5a48..a6bc7afb8ff 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -2140,8 +2140,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin { actual_struct = std::dynamic_pointer_cast(struct_array); } - auto expected_struct = ArrayFromJSON(struct_(expected_physical_schema_->fields()), - file_contents->second); + auto expected_struct = arrow::ArrayFromJSON( + struct_(expected_physical_schema_->fields()), file_contents->second); AssertArraysEqual(*expected_struct, *actual_struct, /*verbose=*/true); } diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 9e0b1d723b9..8cbe30f5ae6 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -38,7 +38,6 @@ function(ADD_ARROW_IPC_TEST REL_TEST_NAME) endfunction() add_arrow_test(feather_test) -add_arrow_ipc_test(json_simple_test) add_arrow_ipc_test(message_internal_test) add_arrow_ipc_test(read_write_test) add_arrow_ipc_test(tensor_test) diff --git a/cpp/src/arrow/ipc/api.h b/cpp/src/arrow/ipc/api.h index b5690aed8da..0828730d905 100644 --- a/cpp/src/arrow/ipc/api.h +++ b/cpp/src/arrow/ipc/api.h @@ -19,7 +19,7 @@ #include "arrow/ipc/dictionary.h" #include "arrow/ipc/feather.h" -#include "arrow/ipc/json_simple.h" #include "arrow/ipc/message.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" +#include "arrow/util/from_json.h" diff --git a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc index 6ccf1155d12..2f6d97ec989 100644 --- a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc +++ b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc @@ -27,13 +27,13 @@ #include "arrow/io/file.h" #include "arrow/io/memory.h" -#include "arrow/ipc/json_simple.h" #include "arrow/ipc/test_common.h" #include "arrow/ipc/writer.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/testing/extension_type.h" #include "arrow/util/compression.h" +#include "arrow/util/from_json.h" #include "arrow/util/io_util.h" #include "arrow/util/key_value_metadata.h" @@ -41,7 +41,7 @@ namespace arrow::ipc { using ::arrow::internal::CreateDir; using ::arrow::internal::PlatformFilename; -using internal::json::ArrayFromJSON; +using ::arrow::util::ArrayFromJSON; Result> MakeExtensionBatch() { auto array = ExampleUuid(); diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index 9eeca32e721..a49e7ed74a2 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -51,7 +51,6 @@ #include "arrow/datum.h" #include "arrow/extension/json.h" #include "arrow/io/memory.h" -#include "arrow/ipc/json_simple.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" #include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep @@ -63,6 +62,7 @@ #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/config.h" +#include "arrow/util/from_json.h" #include "arrow/util/future.h" #include "arrow/util/io_util.h" #include "arrow/util/logging_internal.h" @@ -381,7 +381,7 @@ void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool ve std::shared_ptr ArrayFromJSON(const std::shared_ptr& type, std::string_view json) { - EXPECT_OK_AND_ASSIGN(auto out, ipc::internal::json::ArrayFromJSON(type, json)); + EXPECT_OK_AND_ASSIGN(auto out, util::ArrayFromJSON(type, json)); return out; } @@ -389,15 +389,14 @@ std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, std::string_view indices_json, std::string_view dictionary_json) { std::shared_ptr out; - ABORT_NOT_OK( - ipc::internal::json::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); + ABORT_NOT_OK(util::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); return out; } std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json) { std::shared_ptr out; - ABORT_NOT_OK(ipc::internal::json::ChunkedArrayFromJSON(type, json, &out)); + ABORT_NOT_OK(util::ChunkedArrayFromJSON(type, json, &out)); return out; } @@ -405,7 +404,7 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& std::string_view json) { // Parse as a StructArray auto struct_type = struct_(schema->fields()); - std::shared_ptr struct_array = ArrayFromJSON(struct_type, json); + std::shared_ptr struct_array = arrow::ArrayFromJSON(struct_type, json); // Convert StructArray to RecordBatch return *RecordBatch::FromStructArray(struct_array); @@ -414,7 +413,7 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& std::shared_ptr ScalarFromJSON(const std::shared_ptr& type, std::string_view json) { std::shared_ptr out; - ABORT_NOT_OK(ipc::internal::json::ScalarFromJSON(type, json, &out)); + ABORT_NOT_OK(util::ScalarFromJSON(type, json, &out)); return out; } @@ -422,8 +421,7 @@ std::shared_ptr DictScalarFromJSON(const std::shared_ptr& type std::string_view index_json, std::string_view dictionary_json) { std::shared_ptr out; - ABORT_NOT_OK( - ipc::internal::json::DictScalarFromJSON(type, index_json, dictionary_json, &out)); + ABORT_NOT_OK(util::DictScalarFromJSON(type, index_json, dictionary_json, &out)); return out; } @@ -440,7 +438,7 @@ std::shared_ptr TensorFromJSON(const std::shared_ptr& type, std::string_view data, std::string_view shape, std::string_view strides, std::string_view dim_names) { - std::shared_ptr array = ArrayFromJSON(type, data); + std::shared_ptr array = arrow::ArrayFromJSON(type, data); rj::Document json_shape; json_shape.Parse(shape.data(), shape.length()); @@ -469,7 +467,7 @@ std::shared_ptr TensorFromJSON(const std::shared_ptr& type, const std::vector& shape, const std::vector& strides, const std::vector& dim_names) { - std::shared_ptr array = ArrayFromJSON(type, data); + std::shared_ptr array = arrow::ArrayFromJSON(type, data); return *Tensor::Make(type, array->data()->buffers[1], shape, strides, dim_names); } @@ -1020,19 +1018,19 @@ std::shared_ptr MakeComplex128(const std::shared_ptr& real, } std::shared_ptr ExampleUuid() { - auto arr = ArrayFromJSON( + auto arr = arrow::ArrayFromJSON( fixed_size_binary(16), "[null, \"abcdefghijklmno0\", \"abcdefghijklmno1\", \"abcdefghijklmno2\"]"); return ExtensionType::WrapArray(uuid(), arr); } std::shared_ptr ExampleSmallint() { - auto arr = ArrayFromJSON(int16(), "[-32768, null, 1, 2, 3, 4, 32767]"); + auto arr = arrow::ArrayFromJSON(int16(), "[-32768, null, 1, 2, 3, 4, 32767]"); return ExtensionType::WrapArray(smallint(), arr); } std::shared_ptr ExampleTinyint() { - auto arr = ArrayFromJSON(int8(), "[-128, null, 1, 2, 3, 4, 127]"); + auto arr = arrow::ArrayFromJSON(int8(), "[-128, null, 1, 2, 3, 4, 127]"); return ExtensionType::WrapArray(tinyint(), arr); } @@ -1043,8 +1041,8 @@ std::shared_ptr ExampleDictExtension() { } std::shared_ptr ExampleComplex128() { - auto arr = ArrayFromJSON(struct_({field("", float64()), field("", float64())}), - "[[1.0, -2.5], null, [3.0, -4.5]]"); + auto arr = arrow::ArrayFromJSON(struct_({field("", float64()), field("", float64())}), + "[[1.0, -2.5], null, [3.0, -4.5]]"); return ExtensionType::WrapArray(complex128(), arr); } diff --git a/cpp/src/arrow/testing/matchers.h b/cpp/src/arrow/testing/matchers.h index b4625b3922e..b800cb30c3c 100644 --- a/cpp/src/arrow/testing/matchers.h +++ b/cpp/src/arrow/testing/matchers.h @@ -75,7 +75,7 @@ class AnyOfJSONMatcher { "AnyOfJSON only supported for std::shared_ptr"); Impl(std::shared_ptr type, std::string array_json) : type_(std::move(type)), array_json_(std::move(array_json)) { - array = ArrayFromJSON(type_, array_json_); + array = arrow::ArrayFromJSON(type_, array_json_); } void DescribeTo(std::ostream* os) const override { *os << "matches at least one scalar from "; @@ -415,7 +415,7 @@ DataEqMatcher DataEq(Data&& dat) { /// Constructs an array with ArrayFromJSON against which arguments are matched inline DataEqMatcher DataEqArray(const std::shared_ptr& type, std::string_view json) { - return DataEq(ArrayFromJSON(type, json)); + return DataEq(arrow::ArrayFromJSON(type, json)); } /// Constructs an array from a vector of optionals against which arguments are matched diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 17eea5532cc..fe683ed7c56 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -114,6 +114,10 @@ add_arrow_test(crc32-test EXTRA_LINK_LIBS Boost::headers) +if(ARROW_JSON) + add_arrow_test(from_json_test SOURCES from_json_test.cc) +endif() + add_arrow_benchmark(bit_block_counter_benchmark) add_arrow_benchmark(bit_util_benchmark) add_arrow_benchmark(bitmap_reader_benchmark) diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/util/from_json.cc similarity index 99% rename from cpp/src/arrow/ipc/json_simple.cc rename to cpp/src/arrow/util/from_json.cc index 19f0a6ae1e1..8eb27ce989e 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/util/from_json.cc @@ -31,12 +31,12 @@ #include "arrow/array/builder_time.h" #include "arrow/array/builder_union.h" #include "arrow/chunked_array.h" -#include "arrow/ipc/json_simple.h" #include "arrow/scalar.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/float16.h" +#include "arrow/util/from_json.h" #include "arrow/util/logging_internal.h" #include "arrow/util/value_parsing.h" @@ -55,9 +55,7 @@ namespace arrow { using internal::ParseValue; using util::Float16; -namespace ipc { -namespace internal { -namespace json { +namespace util { using ::arrow::internal::checked_cast; using ::arrow::internal::checked_pointer_cast; @@ -1074,7 +1072,5 @@ Status DictScalarFromJSON(const std::shared_ptr& type, return Status::OK(); } -} // namespace json -} // namespace internal -} // namespace ipc +} // namespace util } // namespace arrow diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/util/from_json.h similarity index 95% rename from cpp/src/arrow/ipc/json_simple.h rename to cpp/src/arrow/util/from_json.h index 3a730ee6a3f..84c9301437e 100644 --- a/cpp/src/arrow/ipc/json_simple.h +++ b/cpp/src/arrow/util/from_json.h @@ -32,9 +32,7 @@ namespace arrow { class Array; class DataType; -namespace ipc { -namespace internal { -namespace json { +namespace util { ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, @@ -65,7 +63,5 @@ ARROW_EXPORT Status DictScalarFromJSON(const std::shared_ptr&, std::string_view index_json, std::string_view dictionary_json, std::shared_ptr* out); -} // namespace json -} // namespace internal -} // namespace ipc +} // namespace util } // namespace arrow diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/util/from_json_test.cc similarity index 99% rename from cpp/src/arrow/ipc/json_simple_test.cc rename to cpp/src/arrow/util/from_json_test.cc index 31312f1ac69..a36f1586f09 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/util/from_json_test.cc @@ -35,7 +35,6 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" #include "arrow/chunked_array.h" -#include "arrow/ipc/json_simple.h" #include "arrow/scalar.h" #include "arrow/testing/builder.h" #include "arrow/testing/gtest_util.h" @@ -45,6 +44,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/float16.h" +#include "arrow/util/from_json.h" #if defined(_MSC_VER) // "warning C4307: '+': integral constant overflow" @@ -55,9 +55,7 @@ namespace arrow { using util::Float16; -namespace ipc { -namespace internal { -namespace json { +namespace util { using ::arrow::internal::BytesToBits; using ::arrow::internal::checked_cast; @@ -916,7 +914,8 @@ TEST(TestMap, IntegerMapToStringList) { auto& key_key_builder = checked_cast(*key_builder.key_builder()); auto& key_item_builder = checked_cast(*key_builder.item_builder()); auto& item_builder = checked_cast(*map_builder.item_builder()); - auto& item_value_builder = checked_cast(*item_builder.value_builder()); + auto& item_value_builder = + checked_cast(*item_builder.value_builder()); ASSERT_OK(map_builder.Append()); ASSERT_OK(key_builder.Append()); @@ -1535,7 +1534,5 @@ TEST(TestDictScalarFromJSON, Errors) { &scalar)); // dict value isn't string } -} // namespace json -} // namespace internal -} // namespace ipc +} // namespace util } // namespace arrow diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc index 7c58bae3342..f6753b20da5 100644 --- a/python/pyarrow/src/arrow/python/gdb.cc +++ b/python/pyarrow/src/arrow/python/gdb.cc @@ -23,7 +23,6 @@ #include "arrow/chunked_array.h" #include "arrow/datum.h" #include "arrow/extension/uuid.h" -#include "arrow/ipc/json_simple.h" #include "arrow/python/gdb.h" #include "arrow/record_batch.h" #include "arrow/scalar.h" @@ -31,6 +30,7 @@ #include "arrow/type.h" #include "arrow/util/debug.h" #include "arrow/util/decimal.h" +#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" @@ -39,9 +39,9 @@ namespace arrow { using extension::uuid; using extension::UuidType; -using ipc::internal::json::ArrayFromJSON; -using ipc::internal::json::ChunkedArrayFromJSON; -using ipc::internal::json::ScalarFromJSON; +using util::ArrayFromJSON; +using util::ChunkedArrayFromJSON; +using util::ScalarFromJSON; namespace gdb { diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index 309b574dc02..8d903891382 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -24,7 +24,7 @@ cdef extern from * namespace "arrow::py" nogil: """ #include "arrow/status.h" #include "arrow/extension_type.h" - #include "arrow/ipc/json_simple.h" + #include "arrow/util/from_json.h" namespace arrow { namespace py { @@ -64,7 +64,7 @@ cdef extern from * namespace "arrow::py" nogil: std::shared_ptr MakeUuidArray() { auto uuid_type = MakeUuidType(); auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; - auto result = ipc::internal::json::ArrayFromJSON(fixed_size_binary(16), json); + auto result = util::ArrayFromJSON(fixed_size_binary(16), json); return ExtensionType::WrapArray(uuid_type, result.ValueOrDie()); } From cf808f9f3524df96d5385cbd557c8f353e1fed5b Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 18 Apr 2025 19:16:07 -0700 Subject: [PATCH 02/33] Add docs for FromJSON helpers --- cpp/src/arrow/util/from_json.h | 16 ++++++++++ docs/source/cpp/api/array.rst | 6 ++++ docs/source/cpp/arrays.rst | 54 ++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+) diff --git a/cpp/src/arrow/util/from_json.h b/cpp/src/arrow/util/from_json.h index 84c9301437e..c3ff4b81f65 100644 --- a/cpp/src/arrow/util/from_json.h +++ b/cpp/src/arrow/util/from_json.h @@ -34,34 +34,50 @@ class DataType; namespace util { +/// \defgroup array-from-json Helpers for constructing Arrays from JSON text +/// +/// These helpers are intended to be used in examples, tests, or for quick +/// prototyping and are not intended to be used where performance matters. +/// +/// @{ + +/// \brief Create an Array from a JSON string ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, const std::string& json); +/// \brief Create an Array from a JSON string ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, std::string_view json); +/// \brief Create an Array from a JSON string ARROW_EXPORT Result> ArrayFromJSON(const std::shared_ptr&, const char* json); +/// \brief Create an ChunkedArray from a JSON string ARROW_EXPORT Status ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json_strings, std::shared_ptr* out); +/// \brief Create an DictionaryArray from a JSON string ARROW_EXPORT Status DictArrayFromJSON(const std::shared_ptr&, std::string_view indices_json, std::string_view dictionary_json, std::shared_ptr* out); +/// \brief Create an Scalar from a JSON string ARROW_EXPORT Status ScalarFromJSON(const std::shared_ptr&, std::string_view json, std::shared_ptr* out); +/// \brief Create an DictScalar from a JSON string ARROW_EXPORT Status DictScalarFromJSON(const std::shared_ptr&, std::string_view index_json, std::string_view dictionary_json, std::shared_ptr* out); +/// @} + } // namespace util } // namespace arrow diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst index b17d1957a8b..8a393516371 100644 --- a/docs/source/cpp/api/array.rst +++ b/docs/source/cpp/api/array.rst @@ -110,3 +110,9 @@ Utilities :project: arrow_cpp :members: :undoc-members: + +.. _api-array-from-json: + +.. doxygengroup:: array-from-json + :project: arrow_cpp + :members: diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index 37550229388..dff8a0fe68d 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -57,6 +57,10 @@ example a ``std::vector``. Instead, several strategies can be used: subclasses help building up array data incrementally, without having to deal with details of the Arrow format yourself. +.. note:: For cases where performance isn't important such as examples or tests, + you may prefer to use the ``*FromJSON`` helpers which can create + Arrays using a JSON text shorthand. See :ref:`fromjson_helpers`. + Using ArrayBuilder and its subclasses ------------------------------------- @@ -223,3 +227,53 @@ to some logical subsequence of the data. This is done by calling the :func:`arrow::Array::Slice` and :func:`arrow::ChunkedArray::Slice` methods, respectively. +.. _fromjson_helpers: + +FromJSON Helpers +================ + +A set of helper functions is provided for concisely creating Arrays and Scalars +from JSON_ text. These helpers are intended to be used in examples, tests, or +for quick prototyping and are not intended to be used where performance matters. + +.. _JSON: https://en.wikipedia.org/wiki/JSON + +Examples for ``ArrayFromJSON``, ``ChunkedArrayFromJSON``, ``DictArrayFromJSON`` +are shown below:: + + // Simple types + auto int32_array = ArrayFromJSON(int32(), "[1, 2, 3]"); + auto float64_array = ArrayFromJSON(float64(), "[4.0, 5.0, 6.0]") + auto bool_array = ArrayFromJSON(boolean(), "[true, false, true]"); + auto string_array = ArrayFromJSON(utf8(), R"(["Hello", "World", null])"); + + // Timestamps can be used from string representations + auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND), + R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])"); + + // List, Map, Struct + auto list_array = ArrayFromJSON( + list(int64()), + "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]" + ); + auto map_array = ArrayFromJSON( + map(boolean(), int32()), + R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])" + ); + auto struct_array = ArrayFromJSON( + struct_({field("one", int32()), field("two", int32())}), + "[[11, 22], null, [null, 33]]" + ); + + // ChunkedArrayFromJSON + ChunkedArrayFromJSON(int32(), {R"([5, 10])", R"([null])", R"([16])"}); + + // DictArrayFromJSON + auto key_array = DictArrayFromJSON( + dictionary(int32(), utf8()), + "[0, 1, 0, 2, 0, 3]", + R"(["k1", "k2", "k3", "k4"])" + ); + +Please see the :ref:`FromJSON API listing ` for the +complete set of helpers. From 644d40759bb20f45bf9204d9ac57928eaf320bfc Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Sun, 20 Apr 2025 20:32:36 -0700 Subject: [PATCH 03/33] Update meson.build --- cpp/src/arrow/meson.build | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index e9c338eac66..0b53b4f5c1d 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -343,11 +343,6 @@ if needs_ipc flatbuffers_dep = dependency('flatbuffers') arrow_ipc_deps = [flatbuffers_dep] - if needs_json - arrow_ipc_srcs += 'ipc/json_simple.cc' - arrow_ipc_deps += rapidjson_dep - endif - arrow_components += { 'arrow_ipc': {'sources': arrow_ipc_srcs, 'dependencies': arrow_ipc_deps}, } @@ -371,6 +366,9 @@ if needs_json 'dependencies': [rapidjson_dep], }, } + + arrow_util_srcs += 'ipc/json_simple.cc' + arrow_util_deps += rapidjson_dep endif arrow_srcs = [] From 6fbb2f3d059aa1fa89f9afc6e39a1b21f4021d56 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 14:52:21 -0700 Subject: [PATCH 04/33] Fix path in meson.build --- cpp/src/arrow/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index 0b53b4f5c1d..285cb6dba0c 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -367,7 +367,7 @@ if needs_json }, } - arrow_util_srcs += 'ipc/json_simple.cc' + arrow_util_srcs += 'util/from_json.cc' arrow_util_deps += rapidjson_dep endif From ce373f52c5bd25e14f03c110331f0cfb65e3e0fe Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 14:56:24 -0700 Subject: [PATCH 05/33] Remove JSON bits from IPC --- cpp/src/arrow/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 1aa43c68fd1..e096b2f4cbd 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -927,11 +927,6 @@ if(ARROW_IPC) foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS}) target_link_libraries(${ARROW_IPC_TARGET} PRIVATE arrow::flatbuffers) endforeach() - if(ARROW_JSON) - foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS}) - target_link_libraries(${ARROW_IPC_TARGET} PRIVATE RapidJSON) - endforeach() - endif() else() set(ARROW_IPC_TARGET_SHARED) set(ARROW_IPC_TARGET_STATIC) From 6479400cf8512fb1e3acfd489ee41d74b9b756bc Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 14:58:26 -0700 Subject: [PATCH 06/33] Use using in hash_join_node_test.cc --- cpp/src/arrow/acero/hash_join_node_test.cc | 147 +++++++++++---------- 1 file changed, 74 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index f4c6ab895c5..2dc0b4f0ac3 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -42,6 +42,7 @@ using testing::UnorderedElementsAreArray; namespace arrow { +using arrow::ArrayFromJSON; using arrow::gen::Constant; using arrow::random::kSeedMax; using arrow::random::RandomArrayGenerator; @@ -1397,16 +1398,16 @@ TEST(HashJoin, Dictionary) { TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::EQ, parallel, // Input - arrow::ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), + ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), DictArrayFromJSON(int8_utf8, R"([4, 2, 3, 0])", R"(["p", "q", "r", null, "r"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), + ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), DictArrayFromJSON(int16_utf8, R"([0, 1, 0, 2])", R"(["r", null, "r", "q"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), + ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), DictArrayFromJSON(int8_utf8, R"([4, 4, 2, 3, 0, null])", R"(["p", "q", "r", null, "r"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), + ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), DictArrayFromJSON(int16_utf8, R"([0, 1, 2, 2, null, 0])", R"(["r", null, "r", "q"])"), 1, swap_sides); @@ -1424,13 +1425,13 @@ TEST(HashJoin, Dictionary) { auto l_key = l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 1])", R"(["b", null, "a"])") - : arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); - auto l_payload = arrow::ArrayFromJSON(utf8(), R"(["x", "y", "z", "y"])"); + : ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); + auto l_payload = ArrayFromJSON(utf8(), R"(["x", "y", "z", "y"])"); auto r_key = r_key_dict ? DictArrayFromJSON(int16_utf8, R"([1, 0, null, 1, 2])", R"([null, "b", "c"])") - : arrow::ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); - auto r_payload = arrow::ArrayFromJSON(utf8(), R"(["p", "r", "p", "q", "s"])"); + : ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); + auto r_payload = ArrayFromJSON(utf8(), R"(["p", "r", "p", "q", "s"])"); // IS comparison function (null is equal to null when matching keys) TestHashJoinDictionaryHelper( @@ -1442,16 +1443,16 @@ TEST(HashJoin, Dictionary) { ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, 1, null])", R"(["b", null, "a"])") - : arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, + : ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", "y", null])"), + ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", "y", null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, 1])", R"(["b", "c"])") - : arrow::ArrayFromJSON(utf8(), + : ArrayFromJSON(utf8(), R"([null, null, "b", "b", null, null, "c"])"), - arrow::ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r", "p", "s"])"), 1, + ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r", "p", "s"])"), 1, swap_sides); // EQ comparison function (null is not matching null) @@ -1463,17 +1464,17 @@ TEST(HashJoin, Dictionary) { l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, null, null, null])", R"(["b", null, "a"])") - : arrow::ArrayFromJSON( + : ArrayFromJSON( utf8(), R"(["a", "a", "b", "b", null, null, null, null])"), - arrow::ArrayFromJSON(utf8(), + ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", null, null, null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, null, 1])", R"(["b", "c"])") - : arrow::ArrayFromJSON( + : ArrayFromJSON( utf8(), R"([null, null, "b", "b", null, null, null, "c"])"), - arrow::ArrayFromJSON(utf8(), + ArrayFromJSON(utf8(), R"([null, null, "p", "q", null, "r", "p", "s"])"), 3, swap_sides); } @@ -1562,17 +1563,17 @@ TEST(HashJoin, Scalars) { TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::EQ, false /*parallel*/, // Input - arrow::ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), + ArrayFromJSON(utf8(), R"(["a", "c", "c", "d"])"), use_scalar_dict ? DictScalarFromJSON(int16_utf8, "1", R"(["z", "x", "y"])") : ScalarFromJSON(utf8(), "\"x\""), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), + ArrayFromJSON(utf8(), R"(["a", "a", "b", "c"])"), use_scalar_dict ? DictScalarFromJSON(int32_utf8, "0", R"(["z", "x", "y"])") : ScalarFromJSON(utf8(), "\"z\""), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "x", "x", "x", "x", null])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["z", "z", "z", "z", null, "z"])"), 1, + ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", "d", null])"), + ArrayFromJSON(utf8(), R"(["x", "x", "x", "x", "x", null])"), + ArrayFromJSON(utf8(), R"(["a", "a", "c", "c", null, "b"])"), + ArrayFromJSON(utf8(), R"(["z", "z", "z", "z", null, "z"])"), 1, false /*swap sides*/); } @@ -1584,14 +1585,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a", null, null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "p", "q", "r"])"), 2, swap_sides); + ArrayFromJSON(utf8(), R"(["a", "a", null, null])"), + ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), + ArrayFromJSON(utf8(), R"(["a", "a", null, "b"])"), + ArrayFromJSON(utf8(), R"(["p", "p", "q", "r"])"), 2, swap_sides); } } @@ -1603,27 +1604,27 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int16_utf8, "2", R"(["a", "b", null])") : ScalarFromJSON(utf8(), "null"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"([null, null, null, null, null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null, null])"), - arrow::ArrayFromJSON(utf8(), R"([null, null, "a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r"])"), 3, swap_sides); + ArrayFromJSON(utf8(), R"([null, null, null, null, null])"), + ArrayFromJSON(utf8(), R"(["x", "y", null, null, null])"), + ArrayFromJSON(utf8(), R"([null, null, "a", null, "b"])"), + ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r"])"), 3, swap_sides); TestHashJoinDictionaryHelper( JoinType::FULL_OUTER, JoinKeyCmp::IS, false /*parallel*/, // Input use_scalar_dict ? DictScalarFromJSON(int16_utf8, "null", R"(["a", "b", null])") : ScalarFromJSON(utf8(), "null"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"([null, null, null, null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), - arrow::ArrayFromJSON(utf8(), R"([null, null, "a", "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["q", "q", "p", "r"])"), 2, swap_sides); + ArrayFromJSON(utf8(), R"([null, null, null, null])"), + ArrayFromJSON(utf8(), R"(["x", "y", null, null])"), + ArrayFromJSON(utf8(), R"([null, null, "a", "b"])"), + ArrayFromJSON(utf8(), R"(["q", "q", "p", "r"])"), 2, swap_sides); } } @@ -1635,13 +1636,13 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"([])"), arrow::ArrayFromJSON(utf8(), R"([])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"([])"), ArrayFromJSON(utf8(), R"([])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"([null, null])"), - arrow::ArrayFromJSON(utf8(), R"([null, null])"), 0, swap_sides); + ArrayFromJSON(utf8(), R"(["a", "a"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"([null, null])"), + ArrayFromJSON(utf8(), R"([null, null])"), 0, swap_sides); } } @@ -1653,14 +1654,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int32_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), DictArrayFromJSON(int32_utf8, R"([2, 2, 1])", R"(["b", null, "a"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "x", "y", "y", null])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "p", "q", "r"])"), 1, swap_sides); + ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), + ArrayFromJSON(utf8(), R"(["x", "x", "y", "y", null])"), + ArrayFromJSON(utf8(), R"(["a", "a", "a", "a", null])"), + ArrayFromJSON(utf8(), R"(["p", "q", "p", "q", "r"])"), 1, swap_sides); } } @@ -1672,14 +1673,14 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", null, "b"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", null, "b"])"), + ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), - arrow::ArrayFromJSON(utf8(), R"(["x", "y"])"), - arrow::ArrayFromJSON(utf8(), R"(["a", "a"])"), - arrow::ArrayFromJSON(utf8(), R"(["p", "p"])"), 2, swap_sides); + ArrayFromJSON(utf8(), R"(["a", "a"])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", "a"])"), + ArrayFromJSON(utf8(), R"(["p", "p"])"), 2, swap_sides); } } } @@ -1688,15 +1689,15 @@ TEST(HashJoin, DictNegative) { // For dictionary keys, all batches must share a single dictionary. // Eventually, differing dictionaries will be unified and indices transposed // during encoding to relieve this restriction. - const auto dictA = arrow::ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); - const auto dictB = arrow::ArrayFromJSON(utf8(), R"(["different", "dictionary"])"); + const auto dictA = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); + const auto dictB = ArrayFromJSON(utf8(), R"(["different", "dictionary"])"); Datum datumFirst = Datum(*DictionaryArray::FromArrays( - arrow::ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); + ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); Datum datumSecondA = Datum(*DictionaryArray::FromArrays( - arrow::ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); + ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); Datum datumSecondB = Datum(*DictionaryArray::FromArrays( - arrow::ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); + ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); for (int i = 0; i < 4; ++i) { BatchesWithSchema l, r; @@ -1795,9 +1796,9 @@ void TestSimpleJoinHelper(BatchesWithSchema input_left, BatchesWithSchema input_ TEST(HashJoin, ExtensionTypesSwissJoin) { // For simpler types swiss join will be used. auto ext_arr = ExampleUuid(); - auto l_int_arr = arrow::ArrayFromJSON(int32(), "[1, 2, 3, 4]"); - auto l_int_arr2 = arrow::ArrayFromJSON(int32(), "[4, 5, 6, 7]"); - auto r_int_arr = arrow::ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); + auto l_int_arr = ArrayFromJSON(int32(), "[1, 2, 3, 4]"); + auto l_int_arr2 = ArrayFromJSON(int32(), "[4, 5, 6, 7]"); + auto r_int_arr = ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); BatchesWithSchema input_left; ASSERT_OK_AND_ASSIGN(ExecBatch left_batches, @@ -1825,9 +1826,9 @@ TEST(HashJoin, ExtensionTypesHashJoin) { // Swiss join doesn't support dictionaries so HashJoin will be used. auto dict_type = dictionary(int64(), int8()); auto ext_arr = ExampleUuid(); - auto l_int_arr = arrow::ArrayFromJSON(int32(), "[1, 2, 3, 4]"); - auto l_int_arr2 = arrow::ArrayFromJSON(int32(), "[4, 5, 6, 7]"); - auto r_int_arr = arrow::ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); + auto l_int_arr = ArrayFromJSON(int32(), "[1, 2, 3, 4]"); + auto l_int_arr2 = ArrayFromJSON(int32(), "[4, 5, 6, 7]"); + auto r_int_arr = ArrayFromJSON(int32(), "[4, 3, 2, null, 1]"); auto l_dict_array = DictArrayFromJSON(dict_type, R"([2, 0, 1, null])", R"([null, 0, 1])"); From 974afd1b30c9e751a767b2886c22bf22ce52eea5 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 15:20:25 -0700 Subject: [PATCH 07/33] Update hash_join_node_test.cc --- cpp/src/arrow/acero/hash_join_node_test.cc | 63 ++++++++++------------ 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 2dc0b4f0ac3..f6d91acca56 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -1422,15 +1422,14 @@ TEST(HashJoin, Dictionary) { auto l_key_dict_type = dict_types[rng.from_range(0, 7)]; auto r_key_dict_type = dict_types[rng.from_range(0, 7)]; - auto l_key = l_key_dict - ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 1])", - R"(["b", null, "a"])") - : ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); + auto l_key = l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 1])", + R"(["b", null, "a"])") + : ArrayFromJSON(utf8(), R"(["a", "a", "b", null])"); auto l_payload = ArrayFromJSON(utf8(), R"(["x", "y", "z", "y"])"); - auto r_key = - r_key_dict ? DictArrayFromJSON(int16_utf8, R"([1, 0, null, 1, 2])", - R"([null, "b", "c"])") - : ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); + auto r_key = r_key_dict + ? DictArrayFromJSON(int16_utf8, R"([1, 0, null, 1, 2])", + R"([null, "b", "c"])") + : ArrayFromJSON(utf8(), R"(["b", null, null, "b", "c"])"); auto r_payload = ArrayFromJSON(utf8(), R"(["p", "r", "p", "q", "s"])"); // IS comparison function (null is equal to null when matching keys) @@ -1439,19 +1438,17 @@ TEST(HashJoin, Dictionary) { // Input l_key, l_payload, r_key, r_payload, // Expected - l_key_dict - ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, 1, + l_key_dict ? DictArrayFromJSON(l_key_dict_type, R"([2, 2, 0, 0, 1, 1, null])", - R"(["b", null, "a"])") - : ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, + R"(["b", null, "a"])") + : ArrayFromJSON(utf8(), R"(["a", "a", "b", "b", null, null, null])"), ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", "y", null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, 1])", R"(["b", "c"])") - : ArrayFromJSON(utf8(), - R"([null, null, "b", "b", null, null, "c"])"), + : ArrayFromJSON(utf8(), R"([null, null, "b", "b", null, null, "c"])"), ArrayFromJSON(utf8(), R"([null, null, "p", "q", "r", "p", "s"])"), 1, swap_sides); @@ -1466,17 +1463,15 @@ TEST(HashJoin, Dictionary) { R"(["b", null, "a"])") : ArrayFromJSON( utf8(), R"(["a", "a", "b", "b", null, null, null, null])"), - ArrayFromJSON(utf8(), - R"(["x", "y", "z", "z", "y", null, null, null])"), + ArrayFromJSON(utf8(), R"(["x", "y", "z", "z", "y", null, null, null])"), r_key_dict ? DictArrayFromJSON(r_key_dict_type, R"([null, null, 0, 0, null, null, null, 1])", R"(["b", "c"])") - : ArrayFromJSON( - utf8(), R"([null, null, "b", "b", null, null, null, "c"])"), - ArrayFromJSON(utf8(), - R"([null, null, "p", "q", null, "r", "p", "s"])"), - 3, swap_sides); + : ArrayFromJSON(utf8(), + R"([null, null, "b", "b", null, null, null, "c"])"), + ArrayFromJSON(utf8(), R"([null, null, "p", "q", null, "r", "p", "s"])"), 3, + swap_sides); } } } @@ -1636,11 +1631,10 @@ TEST(HashJoin, Scalars) { // Input use_scalar_dict ? DictScalarFromJSON(int8_utf8, "1", R"(["b", "a", "c"])") : ScalarFromJSON(utf8(), "\"a\""), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"([])"), ArrayFromJSON(utf8(), R"([])"), + ArrayFromJSON(utf8(), R"(["x", "y"])"), ArrayFromJSON(utf8(), R"([])"), + ArrayFromJSON(utf8(), R"([])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a"])"), - ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["x", "y"])"), ArrayFromJSON(utf8(), R"([null, null])"), ArrayFromJSON(utf8(), R"([null, null])"), 0, swap_sides); } @@ -1677,10 +1671,9 @@ TEST(HashJoin, Scalars) { ArrayFromJSON(utf8(), R"(["a", null, "b"])"), ArrayFromJSON(utf8(), R"(["p", "q", "r"])"), // Expected output - ArrayFromJSON(utf8(), R"(["a", "a"])"), - ArrayFromJSON(utf8(), R"(["x", "y"])"), - ArrayFromJSON(utf8(), R"(["a", "a"])"), - ArrayFromJSON(utf8(), R"(["p", "p"])"), 2, swap_sides); + ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["x", "y"])"), + ArrayFromJSON(utf8(), R"(["a", "a"])"), ArrayFromJSON(utf8(), R"(["p", "p"])"), + 2, swap_sides); } } } @@ -1692,12 +1685,12 @@ TEST(HashJoin, DictNegative) { const auto dictA = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); const auto dictB = ArrayFromJSON(utf8(), R"(["different", "dictionary"])"); - Datum datumFirst = Datum(*DictionaryArray::FromArrays( - ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); - Datum datumSecondA = Datum(*DictionaryArray::FromArrays( - ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); - Datum datumSecondB = Datum(*DictionaryArray::FromArrays( - ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); + Datum datumFirst = Datum( + *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([0, 1, 2, 3])"), dictA)); + Datum datumSecondA = Datum( + *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([3, 2, 2, 3])"), dictA)); + Datum datumSecondB = Datum( + *DictionaryArray::FromArrays(ArrayFromJSON(int32(), R"([0, 1, 1, 0])"), dictB)); for (int i = 0; i < 4; ++i) { BatchesWithSchema l, r; From 8b38e857f4bf48e07e735338fb0456b485ad84b8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 15:29:36 -0700 Subject: [PATCH 08/33] Improve API docs section --- docs/source/cpp/api/array.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst index 8a393516371..a49f6ecc293 100644 --- a/docs/source/cpp/api/array.rst +++ b/docs/source/cpp/api/array.rst @@ -113,6 +113,9 @@ Utilities .. _api-array-from-json: +FromJSON Helpers +---------------- + .. doxygengroup:: array-from-json - :project: arrow_cpp + :content-only: :members: From f3d37dc363db3d03a605cda37d583eba5dfc54ab Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 15:36:54 -0700 Subject: [PATCH 09/33] Point JSON link to RFC 8259 --- docs/source/cpp/arrays.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index dff8a0fe68d..cd63ca5e376 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -236,7 +236,7 @@ A set of helper functions is provided for concisely creating Arrays and Scalars from JSON_ text. These helpers are intended to be used in examples, tests, or for quick prototyping and are not intended to be used where performance matters. -.. _JSON: https://en.wikipedia.org/wiki/JSON +.. _JSON: https://datatracker.ietf.org/doc/html/rfc8259 Examples for ``ArrayFromJSON``, ``ChunkedArrayFromJSON``, ``DictArrayFromJSON`` are shown below:: From 09c6e827469715353884a288ad1ebf741c91b70f Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 15:41:15 -0700 Subject: [PATCH 10/33] Prefer using in bridge_test.cc --- cpp/src/arrow/c/bridge_test.cc | 487 ++++++++++++++++----------------- 1 file changed, 230 insertions(+), 257 deletions(-) diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 5e2e6ea313f..77e560042c6 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -55,6 +55,7 @@ namespace arrow { +using ::arrow::ArrayFromJSON; using internal::ArrayDeviceExportTraits; using internal::ArrayDeviceStreamExportTraits; using internal::ArrayExportGuard; @@ -923,9 +924,7 @@ TEST_F(TestArrayExport, Primitive) { } TEST_F(TestArrayExport, PrimitiveSliced) { - auto factory = []() { - return arrow::ArrayFromJSON(int16(), "[1, 2, null, -3]")->Slice(1, 2); - }; + auto factory = []() { return ArrayFromJSON(int16(), "[1, 2, null, -3]")->Slice(1, 2); }; TestPrimitive(factory); } @@ -1006,16 +1005,15 @@ TEST_F(TestArrayExport, List) { TEST_F(TestArrayExport, ListSliced) { { auto factory = []() { - return arrow::ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") + return ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") ->Slice(1, 2); }; TestNested(factory); } { auto factory = []() { - auto values = - arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); - auto offsets = arrow::ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->Slice(2, 4); + auto values = ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); + auto offsets = ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->Slice(2, 4); return ListArray::FromArrays(*offsets, *values); }; TestNested(factory); @@ -1033,18 +1031,16 @@ TEST_F(TestArrayExport, ListView) { TEST_F(TestArrayExport, ListViewSliced) { { auto factory = []() { - return arrow::ArrayFromJSON(list_view(int8()), - "[[1, 2], [3, null], [4, 5, 6], null]") + return ArrayFromJSON(list_view(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") ->Slice(1, 2); }; TestNested(factory); } { auto factory = []() { - auto values = - arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); - auto offsets = arrow::ArrayFromJSON(int32(), "[5, 2, 0, 3]")->Slice(1, 2); - auto sizes = arrow::ArrayFromJSON(int32(), "[2, 3, 6, 1]")->Slice(1, 2); + auto values = ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->Slice(1, 6); + auto offsets = ArrayFromJSON(int32(), "[5, 2, 0, 3]")->Slice(1, 2); + auto sizes = ArrayFromJSON(int32(), "[2, 3, 6, 1]")->Slice(1, 2); return ListViewArray::FromArrays(*offsets, *sizes, *values); }; TestNested(factory); @@ -1081,7 +1077,7 @@ TEST_F(TestArrayExport, Union) { Result> REEFromJSON(const std::shared_ptr& ree_type, const std::string& json) { auto ree_type_ptr = checked_cast(ree_type.get()); - auto array = arrow::ArrayFromJSON(ree_type_ptr->value_type(), json); + auto array = ArrayFromJSON(ree_type_ptr->value_type(), json); ARROW_ASSIGN_OR_RAISE( auto datum, RunEndEncode(array, compute::RunEndEncodeOptions{ree_type_ptr->run_end_type()})); @@ -1110,8 +1106,8 @@ TEST_F(TestArrayExport, RunEndEncodedSliced) { TEST_F(TestArrayExport, Dictionary) { { auto factory = []() { - auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = arrow::ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -1119,9 +1115,8 @@ TEST_F(TestArrayExport, Dictionary) { } { auto factory = []() { - auto values = - arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays( dictionary(indices->type(), values->type(), /*ordered=*/true), indices, values); }; @@ -1129,14 +1124,13 @@ TEST_F(TestArrayExport, Dictionary) { } { auto factory = []() -> Result> { - auto values = - arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1167,8 +1161,8 @@ TEST_F(TestArrayExport, MoveNested) { TEST_F(TestArrayExport, MoveDictionary) { { auto factory = []() { - auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -1176,14 +1170,13 @@ TEST_F(TestArrayExport, MoveDictionary) { } { auto factory = []() -> Result> { - auto values = - arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1204,14 +1197,13 @@ TEST_F(TestArrayExport, MoveChild) { /*child_id=*/1); { auto factory = []() -> Result> { - auto values = - arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); ARROW_ASSIGN_OR_RAISE( auto dict_array, DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values)); - auto offsets = arrow::ArrayFromJSON(int64(), "[0, 2, 5]"); + auto offsets = ArrayFromJSON(int64(), "[0, 2, 5]"); ARROW_ASSIGN_OR_RAISE(auto arr, LargeListArray::FromArrays(*offsets, *dict_array)); RETURN_NOT_OK(arr->ValidateFull()); return arr; @@ -1232,7 +1224,7 @@ TEST_F(TestArrayExport, ExportArrayAndType) { SchemaExportGuard schema_guard(&c_schema); ArrayExportGuard array_guard(&c_array); - auto array = arrow::ArrayFromJSON(int8(), "[1, 2, 3]"); + auto array = ArrayFromJSON(int8(), "[1, 2, 3]"); ASSERT_OK(ExportArray(*array, &c_array, &c_schema)); const ArrayData& data = *array->data(); array.reset(); @@ -1251,8 +1243,8 @@ TEST_F(TestArrayExport, ExportRecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); schema = schema->WithMetadata(key_value_metadata(kMetadataKeys2, kMetadataValues2)); - auto arr0 = arrow::ArrayFromJSON(int16(), "[1, 2, null]"); - auto arr1 = arrow::ArrayFromJSON(boolean(), "[false, true, false]"); + auto arr0 = ArrayFromJSON(int16(), "[1, 2, null]"); + auto arr1 = ArrayFromJSON(boolean(), "[false, true, false]"); auto batch_factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; @@ -1430,7 +1422,7 @@ class TestDeviceArrayExport : public ::testing::Test { static std::function>()> JSONArrayFactory( const std::shared_ptr& mm, std::shared_ptr type, const char* json) { - return [=]() { return ToDevice(mm, *arrow::ArrayFromJSON(type, json)->data()); }; + return [=]() { return ToDevice(mm, *ArrayFromJSON(type, json)->data()); }; } #ifdef ARROW_COMPUTE @@ -1533,7 +1525,7 @@ TEST_F(TestDeviceArrayExport, PrimitiveSliced) { auto mm = device->default_memory_manager(); auto factory = [=]() { - return (*ToDevice(mm, *arrow::ArrayFromJSON(int16(), "[1, 2, null, -3]")->data())) + return (*ToDevice(mm, *ArrayFromJSON(int16(), "[1, 2, null, -3]")->data())) ->Slice(1, 2); }; TestPrimitive(factory); @@ -1586,9 +1578,9 @@ TEST_F(TestDeviceArrayExport, ListSliced) { { auto factory = [=]() { - return (*ToDevice(mm, *arrow::ArrayFromJSON(list(int8()), - "[[1, 2], [3, null], [4, 5, 6], null]") - ->data())) + return (*ToDevice( + mm, *ArrayFromJSON(list(int8()), "[[1, 2], [3, null], [4, 5, 6], null]") + ->data())) ->Slice(1, 2); }; TestNested(factory); @@ -1596,13 +1588,11 @@ TEST_F(TestDeviceArrayExport, ListSliced) { { auto factory = [=]() { auto values = - (*ToDevice( - mm, - *arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) + (*ToDevice(mm, + *ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) ->Slice(1, 6); - auto offsets = - (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->data())) - ->Slice(2, 4); + auto offsets = (*ToDevice(mm, *ArrayFromJSON(int32(), "[0, 2, 3, 5, 6]")->data())) + ->Slice(2, 4); return ListArray::FromArrays(*offsets, *values); }; TestNested(factory); @@ -1625,8 +1615,8 @@ TEST_F(TestDeviceArrayExport, ListViewSliced) { { auto factory = [=]() { - return (*ToDevice(mm, *arrow::ArrayFromJSON(list_view(int8()), - "[[1, 2], [3, null], [4, 5, 6], null]") + return (*ToDevice(mm, *ArrayFromJSON(list_view(int8()), + "[[1, 2], [3, null], [4, 5, 6], null]") ->data())) ->Slice(1, 2); }; @@ -1635,15 +1625,13 @@ TEST_F(TestDeviceArrayExport, ListViewSliced) { { auto factory = [=]() { auto values = - (*ToDevice( - mm, - *arrow::ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) + (*ToDevice(mm, + *ArrayFromJSON(int16(), "[1, 2, 3, 4, null, 5, 6, 7, 8]")->data())) ->Slice(1, 6); auto offsets = - (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[5, 2, 0, 3]")->data())) - ->Slice(1, 2); - auto sizes = (*ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[2, 3, 6, 1]")->data())) - ->Slice(1, 2); + (*ToDevice(mm, *ArrayFromJSON(int32(), "[5, 2, 0, 3]")->data()))->Slice(1, 2); + auto sizes = + (*ToDevice(mm, *ArrayFromJSON(int32(), "[2, 3, 6, 1]")->data()))->Slice(1, 2); return ListViewArray::FromArrays(*offsets, *sizes, *values); }; TestNested(factory); @@ -1714,8 +1702,7 @@ TEST_F(TestDeviceArrayExport, ExportArrayAndType) { SchemaExportGuard schema_guard(&c_schema); ArrayExportGuard array_guard(&c_array.array); - auto array = - ToDevice(mm, *arrow::ArrayFromJSON(int8(), "[1, 2, 3]")->data()).ValueOrDie(); + auto array = ToDevice(mm, *ArrayFromJSON(int8(), "[1, 2, 3]")->data()).ValueOrDie(); auto sync = mm->MakeDeviceSyncEvent().ValueOrDie(); ASSERT_OK(ExportDeviceArray(*array, sync, &c_array, &c_schema)); const ArrayData& data = *array->data(); @@ -1738,11 +1725,9 @@ TEST_F(TestDeviceArrayExport, ExportRecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); schema = schema->WithMetadata(key_value_metadata(kMetadataKeys2, kMetadataValues2)); - auto arr0 = - ToDevice(mm, *arrow::ArrayFromJSON(int16(), "[1, 2, null]")->data()).ValueOrDie(); - auto arr1 = - ToDevice(mm, *arrow::ArrayFromJSON(boolean(), "[false, true, false]")->data()) - .ValueOrDie(); + auto arr0 = ToDevice(mm, *ArrayFromJSON(int16(), "[1, 2, null]")->data()).ValueOrDie(); + auto arr1 = ToDevice(mm, *ArrayFromJSON(boolean(), "[false, true, false]")->data()) + .ValueOrDie(); auto batch_factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; auto sync = mm->MakeDeviceSyncEvent().ValueOrDie(); @@ -2816,104 +2801,103 @@ class TestArrayImport : public ::testing::Test { TEST_F(TestArrayImport, Primitive) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(arrow::ArrayFromJSON(int8(), "[1, 2, 3]")); + CheckImport(ArrayFromJSON(int8(), "[1, 2, 3]")); FillPrimitive(5, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(arrow::ArrayFromJSON(uint8(), "[1, 2, 3, 4, 5]")); + CheckImport(ArrayFromJSON(uint8(), "[1, 2, 3, 4, 5]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - CheckImport(arrow::ArrayFromJSON(int16(), "[513, 1027, 1541]")); + CheckImport(ArrayFromJSON(int16(), "[513, 1027, 1541]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - CheckImport(arrow::ArrayFromJSON(uint16(), "[513, 1027, 1541]")); + CheckImport(ArrayFromJSON(uint16(), "[513, 1027, 1541]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_32); - CheckImport(arrow::ArrayFromJSON(int32(), "[67305985, 134678021]")); + CheckImport(ArrayFromJSON(int32(), "[67305985, 134678021]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_32); - CheckImport(arrow::ArrayFromJSON(uint32(), "[67305985, 134678021]")); + CheckImport(ArrayFromJSON(uint32(), "[67305985, 134678021]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_64); - CheckImport(arrow::ArrayFromJSON(int64(), "[578437695752307201, 1157159078456920585]")); + CheckImport(ArrayFromJSON(int64(), "[578437695752307201, 1157159078456920585]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls1_64); - CheckImport( - arrow::ArrayFromJSON(uint64(), "[578437695752307201, 1157159078456920585]")); + CheckImport(ArrayFromJSON(uint64(), "[578437695752307201, 1157159078456920585]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_8); - CheckImport(arrow::ArrayFromJSON(boolean(), "[true, false, false]")); + CheckImport(ArrayFromJSON(boolean(), "[true, false, false]")); FillPrimitive(6, 0, 0, primitive_buffers_no_nulls5); - CheckImport(arrow::ArrayFromJSON(float32(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); + CheckImport(ArrayFromJSON(float32(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); FillPrimitive(6, 0, 0, primitive_buffers_no_nulls6); - CheckImport(arrow::ArrayFromJSON(float64(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); + CheckImport(ArrayFromJSON(float64(), "[0.0, 1.5, -2.0, 3.0, 4.0, 5.0]")); // With nulls FillPrimitive(9, -1, 0, primitive_buffers_nulls1_8); - CheckImport(arrow::ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); + CheckImport(ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); FillPrimitive(9, 2, 0, primitive_buffers_nulls1_8); - CheckImport(arrow::ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); + CheckImport(ArrayFromJSON(int8(), "[1, null, 3, 4, null, 6, 7, 8, 9]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls1_16); - CheckImport(arrow::ArrayFromJSON(int16(), "[513, null, 1541]")); + CheckImport(ArrayFromJSON(int16(), "[513, null, 1541]")); FillPrimitive(3, 1, 0, primitive_buffers_nulls1_16); - CheckImport(arrow::ArrayFromJSON(int16(), "[513, null, 1541]")); + CheckImport(ArrayFromJSON(int16(), "[513, null, 1541]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls1_8); - CheckImport(arrow::ArrayFromJSON(boolean(), "[true, null, false]")); + CheckImport(ArrayFromJSON(boolean(), "[true, null, false]")); FillPrimitive(3, 1, 0, primitive_buffers_nulls1_8); - CheckImport(arrow::ArrayFromJSON(boolean(), "[true, null, false]")); + CheckImport(ArrayFromJSON(boolean(), "[true, null, false]")); // Empty array with null data pointers FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(int32(), "[]")); + CheckImport(ArrayFromJSON(int32(), "[]")); } TEST_F(TestArrayImport, Temporal) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls7); - CheckImport(arrow::ArrayFromJSON(date32(), "[1234, 5678, 9012]")); + CheckImport(ArrayFromJSON(date32(), "[1234, 5678, 9012]")); FillPrimitive(3, 0, 0, date64_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(date64(), "[86400000, 172800000, -86400000]")); + CheckImport(ArrayFromJSON(date64(), "[86400000, 172800000, -86400000]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(arrow::ArrayFromJSON(time32(TimeUnit::SECOND), "[1234, 5678]")); + CheckImport(ArrayFromJSON(time32(TimeUnit::SECOND), "[1234, 5678]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(arrow::ArrayFromJSON(time32(TimeUnit::MILLI), "[1234, 5678]")); + CheckImport(ArrayFromJSON(time32(TimeUnit::MILLI), "[1234, 5678]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::MICRO), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(time64(TimeUnit::MICRO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::SECOND), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(duration(TimeUnit::SECOND), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::MILLI), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(duration(TimeUnit::MILLI), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::MICRO), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(duration(TimeUnit::MICRO), "[123456789, 987654321]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls8); - CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, 987654321]")); + CheckImport(ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, 987654321]")); FillPrimitive(3, 0, 0, primitive_buffers_no_nulls7); - CheckImport(arrow::ArrayFromJSON(month_interval(), "[1234, 5678, 9012]")); + CheckImport(ArrayFromJSON(month_interval(), "[1234, 5678, 9012]")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls7); - CheckImport(arrow::ArrayFromJSON(day_time_interval(), "[[1234, 5678], [9012, 3456]]")); + CheckImport(ArrayFromJSON(day_time_interval(), "[[1234, 5678], [9012, 3456]]")); const char* json = R"(["1970-01-01","2000-02-29","1900-02-28"])"; FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::SECOND), json)); + CheckImport(ArrayFromJSON(timestamp(TimeUnit::SECOND), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls2); - CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::MILLI), json)); + CheckImport(ArrayFromJSON(timestamp(TimeUnit::MILLI), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls3); - CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::MICRO), json)); + CheckImport(ArrayFromJSON(timestamp(TimeUnit::MICRO), json)); FillPrimitive(3, 0, 0, timestamp_buffers_no_nulls4); - CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::NANO), json)); + CheckImport(ArrayFromJSON(timestamp(TimeUnit::NANO), json)); // With nulls FillPrimitive(3, -1, 0, primitive_buffers_nulls7); - CheckImport(arrow::ArrayFromJSON(date32(), "[1234, null, 9012]")); + CheckImport(ArrayFromJSON(date32(), "[1234, null, 9012]")); FillPrimitive(3, -1, 0, date64_buffers_nulls8); - CheckImport(arrow::ArrayFromJSON(date64(), "[86400000, null, -86400000]")); + CheckImport(ArrayFromJSON(date64(), "[86400000, null, -86400000]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls8); - CheckImport(arrow::ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, null]")); + CheckImport(ArrayFromJSON(time64(TimeUnit::NANO), "[123456789, null]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls8); - CheckImport(arrow::ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, null]")); + CheckImport(ArrayFromJSON(duration(TimeUnit::NANO), "[123456789, null]")); FillPrimitive(3, -1, 0, primitive_buffers_nulls7); - CheckImport(arrow::ArrayFromJSON(month_interval(), "[1234, null, 9012]")); + CheckImport(ArrayFromJSON(month_interval(), "[1234, null, 9012]")); FillPrimitive(2, -1, 0, primitive_buffers_nulls7); - CheckImport(arrow::ArrayFromJSON(day_time_interval(), "[[1234, 5678], null]")); + CheckImport(ArrayFromJSON(day_time_interval(), "[[1234, 5678], null]")); FillPrimitive(3, -1, 0, timestamp_buffers_nulls1); - CheckImport(arrow::ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC+2"), - R"(["1970-01-01",null,"1900-02-28"])")); + CheckImport(ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC+2"), + R"(["1970-01-01",null,"1900-02-28"])")); } TEST_F(TestArrayImport, Null) { @@ -2925,24 +2909,24 @@ TEST_F(TestArrayImport, Null) { c_struct_.offset = 0; c_struct_.buffers = buffers; c_struct_.n_buffers = n_buffers; - CheckImport(arrow::ArrayFromJSON(null(), "[null, null, null]")); + CheckImport(ArrayFromJSON(null(), "[null, null, null]")); } } TEST_F(TestArrayImport, PrimitiveWithOffset) { FillPrimitive(3, 0, 2, primitive_buffers_no_nulls1_8); - CheckImport(arrow::ArrayFromJSON(int8(), "[3, 4, 5]")); + CheckImport(ArrayFromJSON(int8(), "[3, 4, 5]")); FillPrimitive(3, 0, 1, primitive_buffers_no_nulls1_16); - CheckImport(arrow::ArrayFromJSON(uint16(), "[1027, 1541, 2055]")); + CheckImport(ArrayFromJSON(uint16(), "[1027, 1541, 2055]")); FillPrimitive(4, 0, 7, primitive_buffers_no_nulls1_8); - CheckImport(arrow::ArrayFromJSON(boolean(), "[false, false, true, false]")); + CheckImport(ArrayFromJSON(boolean(), "[false, false, true, false]")); // Empty array with null data pointers FillPrimitive(0, 0, 2, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(int32(), "[]")); + CheckImport(ArrayFromJSON(int32(), "[]")); FillPrimitive(0, 0, 3, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(boolean(), "[]")); + CheckImport(ArrayFromJSON(boolean(), "[]")); } TEST_F(TestArrayImport, NullWithOffset) { @@ -2952,18 +2936,18 @@ TEST_F(TestArrayImport, NullWithOffset) { c_struct_.offset = 5; c_struct_.n_buffers = 1; c_struct_.buffers = buffers; - CheckImport(arrow::ArrayFromJSON(null(), "[null, null, null]")); + CheckImport(ArrayFromJSON(null(), "[null, null, null]")); } TEST_F(TestArrayImport, String) { FillStringLike(4, 0, 0, string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])")); + CheckImport(ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(binary(), R"(["foo", "", "bar", "quux"])")); + CheckImport(ArrayFromJSON(binary(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, large_string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_utf8(), R"(["foo", "", "bar", "quux"])")); + CheckImport(ArrayFromJSON(large_utf8(), R"(["foo", "", "bar", "quux"])")); FillStringLike(4, 0, 0, large_string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])")); + CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])")); auto length = static_cast(std::size(binary_view_buffer1)); FillStringViewLike(length, 0, 0, binary_view_buffers_no_nulls1, 2); @@ -2971,191 +2955,186 @@ TEST_F(TestArrayImport, String) { // Empty array with null data pointers FillStringLike(0, 0, 0, string_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(utf8(), "[]")); + CheckImport(ArrayFromJSON(utf8(), "[]")); FillStringLike(0, 0, 0, large_string_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(large_binary(), "[]")); + CheckImport(ArrayFromJSON(large_binary(), "[]")); } TEST_F(TestArrayImport, StringWithOffset) { FillStringLike(3, 0, 1, string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])")); + CheckImport(ArrayFromJSON(utf8(), R"(["", "bar", "quux"])")); FillStringLike(2, 0, 2, large_string_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_utf8(), R"(["bar", "quux"])")); + CheckImport(ArrayFromJSON(large_utf8(), R"(["bar", "quux"])")); // Empty array with null data pointers FillStringLike(0, 0, 1, string_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(utf8(), "[]")); + CheckImport(ArrayFromJSON(utf8(), "[]")); } TEST_F(TestArrayImport, FixedSizeBinary) { FillPrimitive(2, 0, 0, primitive_buffers_no_nulls2); - CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])")); + CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])")); FillPrimitive(2, 0, 0, primitive_buffers_no_nulls3); - CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), R"(["12345.6789", "98765.4321"])")); + CheckImport(ArrayFromJSON(decimal128(15, 4), R"(["12345.6789", "98765.4321"])")); // Empty array with null data pointers FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), "[]")); + CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]")); FillPrimitive(0, 0, 0, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), "[]")); + CheckImport(ArrayFromJSON(decimal128(15, 4), "[]")); } TEST_F(TestArrayImport, FixedSizeBinaryWithOffset) { FillPrimitive(1, 0, 1, primitive_buffers_no_nulls2); - CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), R"(["def"])")); + CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["def"])")); FillPrimitive(1, 0, 1, primitive_buffers_no_nulls3); - CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), R"(["98765.4321"])")); + CheckImport(ArrayFromJSON(decimal128(15, 4), R"(["98765.4321"])")); // Empty array with null data pointers FillPrimitive(0, 0, 1, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(fixed_size_binary(3), "[]")); + CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]")); FillPrimitive(0, 0, 1, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(decimal128(15, 4), "[]")); + CheckImport(ArrayFromJSON(decimal128(15, 4), "[]")); } TEST_F(TestArrayImport, List) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(5, 0, 0, list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); + CheckImport(ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListLike(3, 1, 0, list_buffers_nulls1); - CheckImport( - arrow::ArrayFromJSON(list(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); + CheckImport(ArrayFromJSON(list(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); // Large list FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListLike(3, 0, 0, large_list_buffers_no_nulls1); CheckImport( - arrow::ArrayFromJSON(large_list(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); + ArrayFromJSON(large_list(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); // Fixed-size list FillPrimitive(AddChild(), 9, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data); - CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), - "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]")); + CheckImport( + ArrayFromJSON(fixed_size_list(int8(), 3), "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]")); // Empty child array with null data pointers FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillFixedSizeListLike(0, 0, 0, buffers_no_nulls_no_data); - CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), "[]")); + CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), "[]")); } TEST_F(TestArrayImport, NestedList) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(AddChild(), 5, 0, 0, list_buffers_no_nulls1); FillListLike(3, 0, 0, large_list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_list(list(int8())), - "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); + CheckImport(ArrayFromJSON(large_list(list(int8())), + "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); FillPrimitive(AddChild(), 6, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(AddChild(), 2, 0, 0, buffers_no_nulls_no_data); FillListLike(2, 0, 0, list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list(fixed_size_list(int8(), 3)), - "[[[1, 2, 3], [4, 5, 6]], []]")); + CheckImport( + ArrayFromJSON(list(fixed_size_list(int8(), 3)), "[[[1, 2, 3], [4, 5, 6]], []]")); } TEST_F(TestArrayImport, ListWithOffset) { // Offset in child FillPrimitive(AddChild(), 8, 0, 1, primitive_buffers_no_nulls1_8); FillListLike(5, 0, 0, list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); + CheckImport(ArrayFromJSON(list(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); FillPrimitive(AddChild(), 9, 0, 1, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data); - CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), - "[[2, 3, 4], [5, 6, 7], [8, 9, 10]]")); + CheckImport( + ArrayFromJSON(fixed_size_list(int8(), 3), "[[2, 3, 4], [5, 6, 7], [8, 9, 10]]")); // Offset in parent FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListLike(4, 0, 1, list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); + CheckImport(ArrayFromJSON(list(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 9, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 1, buffers_no_nulls_no_data); - CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), - "[[4, 5, 6], [7, 8, 9], [10, 11, 12]]")); + CheckImport( + ArrayFromJSON(fixed_size_list(int8(), 3), "[[4, 5, 6], [7, 8, 9], [10, 11, 12]]")); // Both FillPrimitive(AddChild(), 8, 0, 2, primitive_buffers_no_nulls1_8); FillListLike(4, 0, 1, list_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); + CheckImport(ArrayFromJSON(list(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); FillPrimitive(AddChild(), 9, 0, 2, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(3, 0, 1, buffers_no_nulls_no_data); - CheckImport(arrow::ArrayFromJSON(fixed_size_list(int8(), 3), - "[[6, 7, 8], [9, 10, 11], [12, 13, 14]]")); + CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), + "[[6, 7, 8], [9, 10, 11], [12, 13, 14]]")); } TEST_F(TestArrayImport, ListView) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(5, 0, 0, list_view_buffers_no_nulls1); - CheckImport( - arrow::ArrayFromJSON(list_view(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); + CheckImport(ArrayFromJSON(list_view(int8()), "[[1, 2], [], [3, 4, 5], [6], [7, 8]]")); FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListView(3, 1, 0, list_view_buffers_nulls1); - CheckImport(arrow::ArrayFromJSON(list_view(int16()), - "[[513, 1027], null, [1541, 2055, 2569]]")); + CheckImport( + ArrayFromJSON(list_view(int16()), "[[513, 1027], null, [1541, 2055, 2569]]")); // Large list-view FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_16); FillListView(3, 0, 0, large_list_view_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_list_view(int16()), - "[[513, 1027], [], [1541, 2055, 2569]]")); + CheckImport( + ArrayFromJSON(large_list_view(int16()), "[[513, 1027], [], [1541, 2055, 2569]]")); } TEST_F(TestArrayImport, NestedListView) { FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(AddChild(), 5, 0, 0, list_view_buffers_no_nulls1); FillListView(3, 0, 0, large_list_view_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(large_list_view(list_view(int8())), - "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); + CheckImport(ArrayFromJSON(large_list_view(list_view(int8())), + "[[[1, 2], []], [], [[3, 4, 5], [6], [7, 8]]]")); FillPrimitive(AddChild(), 6, 0, 0, primitive_buffers_no_nulls1_8); FillFixedSizeListLike(AddChild(), 2, 0, 0, buffers_no_nulls_no_data); FillListView(2, 0, 0, list_view_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list_view(fixed_size_list(int8(), 3)), - "[[[1, 2, 3], [4, 5, 6]], []]")); + CheckImport(ArrayFromJSON(list_view(fixed_size_list(int8(), 3)), + "[[[1, 2, 3], [4, 5, 6]], []]")); } TEST_F(TestArrayImport, ListViewWithOffset) { // Offset in child FillPrimitive(AddChild(), 8, 0, 1, primitive_buffers_no_nulls1_8); FillListView(5, 0, 0, list_view_buffers_no_nulls1); - CheckImport( - arrow::ArrayFromJSON(list_view(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); + CheckImport(ArrayFromJSON(list_view(int8()), "[[2, 3], [], [4, 5, 6], [7], [8, 9]]")); // Offset in parent FillPrimitive(AddChild(), 8, 0, 0, primitive_buffers_no_nulls1_8); FillListView(4, 0, 1, list_view_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); + CheckImport(ArrayFromJSON(list_view(int8()), "[[], [3, 4, 5], [6], [7, 8]]")); // Both FillPrimitive(AddChild(), 8, 0, 2, primitive_buffers_no_nulls1_8); FillListView(4, 0, 1, list_view_buffers_no_nulls1); - CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); + CheckImport(ArrayFromJSON(list_view(int8()), "[[], [5, 6, 7], [8], [9, 10]]")); } TEST_F(TestArrayImport, Struct) { FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_16); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - auto expected = - arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), - R"([["foo", 513], ["", null], ["bar", 1541]])"); + auto expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), + R"([["foo", 513], ["", null], ["bar", 1541]])"); CheckImport(expected); FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 0, primitive_buffers_no_nulls1_16); FillStructLike(3, -1, 0, 2, buffers_nulls_no_data1); - expected = - arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), + expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", uint16())}), R"([["foo", 513], null, ["bar", 1541]])"); CheckImport(expected); FillStringLike(AddChild(), 3, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 0, primitive_buffers_no_nulls1_16); FillStructLike(3, -1, 0, 2, buffers_nulls_no_data1); - expected = arrow::ArrayFromJSON( + expected = ArrayFromJSON( struct_({field("strs", utf8(), /*nullable=*/false), field("ints", uint16())}), R"([["foo", 513], null, ["bar", 1541]])"); CheckImport(expected); @@ -3209,7 +3188,7 @@ TEST_F(TestArrayImport, RunEndEncodedWithOffset) { TEST_F(TestArrayImport, SparseUnion) { auto type = sparse_union({field("strs", utf8()), field("ints", int8())}, {43, 42}); auto expected = - arrow::ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "bar"], [43, "quux"]])"); + ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "bar"], [43, "quux"]])"); FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 4, -1, 0, primitive_buffers_nulls1_8); @@ -3224,7 +3203,7 @@ TEST_F(TestArrayImport, SparseUnion) { CheckImport(expected); // Empty array with null data pointers - expected = arrow::ArrayFromJSON(type, "[]"); + expected = ArrayFromJSON(type, "[]"); FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted); FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillUnionLike(UnionMode::SPARSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false); @@ -3235,8 +3214,8 @@ TEST_F(TestArrayImport, SparseUnion) { TEST_F(TestArrayImport, DenseUnion) { auto type = dense_union({field("strs", utf8()), field("ints", int8())}, {43, 42}); - auto expected = arrow::ArrayFromJSON( - type, R"([[42, 1], [42, null], [43, "foo"], [43, ""], [42, 3]])"); + auto expected = + ArrayFromJSON(type, R"([[42, 1], [42, null], [43, "foo"], [43, ""], [42, 3]])"); FillStringLike(AddChild(), 2, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_8); @@ -3251,7 +3230,7 @@ TEST_F(TestArrayImport, DenseUnion) { CheckImport(expected); // Empty array with null data pointers - expected = arrow::ArrayFromJSON(type, "[]"); + expected = ArrayFromJSON(type, "[]"); FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted); FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted); FillUnionLike(UnionMode::DENSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false); @@ -3265,17 +3244,16 @@ TEST_F(TestArrayImport, StructWithOffset) { FillStringLike(AddChild(), 3, 0, 1, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, 0, 2, primitive_buffers_no_nulls1_8); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - auto expected = - arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), - R"([["", 3], ["bar", 4], ["quux", 5]])"); + auto expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), + R"([["", 3], ["bar", 4], ["quux", 5]])"); CheckImport(expected); // Parent and child FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(AddChild(), 4, 0, 2, primitive_buffers_no_nulls1_8); FillStructLike(3, 0, 1, 2, buffers_no_nulls_no_data); - expected = arrow::ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), - R"([["", 4], ["bar", 5], ["quux", 6]])"); + expected = ArrayFromJSON(struct_({field("strs", utf8()), field("ints", int8())}), + R"([["", 4], ["bar", 5], ["quux", 6]])"); CheckImport(expected); } @@ -3284,7 +3262,7 @@ TEST_F(TestArrayImport, Map) { FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls1_8); FillStructLike(AddChild(), 5, 0, 0, 2, buffers_no_nulls_no_data); FillListLike(3, 1, 0, list_buffers_nulls1); - auto expected = arrow::ArrayFromJSON( + auto expected = ArrayFromJSON( map(utf8(), uint8()), R"([[["foo", 1], ["", 2]], null, [["bar", 3], ["quux", 4], ["xyzzy", 5]]])"); CheckImport(expected); @@ -3295,8 +3273,8 @@ TEST_F(TestArrayImport, Dictionary) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto dict_values = arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); - auto indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + auto dict_values = ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); + auto indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN( auto expected, DictionaryArray::FromArrays(dictionary(int8(), utf8()), indices, dict_values)); @@ -3318,8 +3296,8 @@ TEST_F(TestArrayImport, NestedDictionary) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto dict_values = arrow::ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6]]"); - auto indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + auto dict_values = ArrayFromJSON(list(int8()), "[[1, 2], [], [3, 4, 5], [6]]"); + auto indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN(auto expected, DictionaryArray::FromArrays(dictionary(int8(), list(int8())), indices, dict_values)); @@ -3330,12 +3308,12 @@ TEST_F(TestArrayImport, NestedDictionary) { FillDictionary(LastChild()); FillListLike(3, 0, 0, list_buffers_no_nulls1); - dict_values = arrow::ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); - indices = arrow::ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); + dict_values = ArrayFromJSON(utf8(), R"(["foo", "", "bar", "quux"])"); + indices = ArrayFromJSON(int8(), "[1, 2, 0, 1, 3, 0]"); ASSERT_OK_AND_ASSIGN( auto dict_array, DictionaryArray::FromArrays(dictionary(int8(), utf8()), indices, dict_values)); - auto offsets = arrow::ArrayFromJSON(int32(), "[0, 2, 2, 5]"); + auto offsets = ArrayFromJSON(int32(), "[0, 2, 2, 5]"); ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *dict_array)); CheckImport(expected); } @@ -3345,16 +3323,16 @@ TEST_F(TestArrayImport, DictionaryWithOffset) { FillPrimitive(3, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto expected = arrow::DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0]", - R"(["", "bar", "quux"])"); + auto expected = DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0]", + R"(["", "bar", "quux"])"); CheckImport(expected); FillStringLike(AddChild(), 4, 0, 0, string_buffers_no_nulls1); FillPrimitive(4, 0, 2, primitive_buffers_no_nulls4); FillDictionary(); - expected = arrow::DictArrayFromJSON(dictionary(int8(), utf8()), "[0, 1, 3, 0]", - R"(["foo", "", "bar", "quux"])"); + expected = DictArrayFromJSON(dictionary(int8(), utf8()), "[0, 1, 3, 0]", + R"(["foo", "", "bar", "quux"])"); CheckImport(expected); } @@ -3363,8 +3341,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { // smallint FillPrimitive(3, 0, 0, primitive_buffers_no_nulls1_16); - auto expected = ExtensionType::WrapArray( - smallint(), arrow::ArrayFromJSON(int16(), "[513, 1027, 1541]")); + auto expected = + ExtensionType::WrapArray(smallint(), ArrayFromJSON(int16(), "[513, 1027, 1541]")); CheckImport(expected); // dict_extension_type @@ -3372,8 +3350,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { FillPrimitive(6, 0, 0, primitive_buffers_no_nulls4); FillDictionary(); - auto storage = arrow::DictArrayFromJSON( - dictionary(int8(), utf8()), "[1, 2, 0, 1, 3, 0]", R"(["foo", "", "bar", "quux"])"); + auto storage = DictArrayFromJSON(dictionary(int8(), utf8()), "[1, 2, 0, 1, 3, 0]", + R"(["foo", "", "bar", "quux"])"); expected = ExtensionType::WrapArray(dict_extension_type(), storage); CheckImport(expected); @@ -3381,8 +3359,8 @@ TEST_F(TestArrayImport, RegisteredExtension) { FillPrimitive(AddChild(), 3, 0, /*offset=*/0, primitive_buffers_no_nulls6); FillPrimitive(AddChild(), 3, 0, /*offset=*/3, primitive_buffers_no_nulls6); FillStructLike(3, 0, 0, 2, buffers_no_nulls_no_data); - expected = MakeComplex128(arrow::ArrayFromJSON(float64(), "[0.0, 1.5, -2.0]"), - arrow::ArrayFromJSON(float64(), "[3.0, 4.0, 5.0]")); + expected = MakeComplex128(ArrayFromJSON(float64(), "[0.0, 1.5, -2.0]"), + ArrayFromJSON(float64(), "[3.0, 4.0, 5.0]")); CheckImport(expected); } @@ -3443,7 +3421,7 @@ TEST_F(TestArrayImport, ListViewNoError) { // Null offsets pointer FillPrimitive(AddChild(), 0, 0, 0, primitive_buffers_no_nulls1_8); FillListView(0, 0, 0, all_buffers_omitted); - CheckImport(arrow::ArrayFromJSON(list_view(int8()), "[]")); + CheckImport(ArrayFromJSON(list_view(int8()), "[]")); } TEST_F(TestArrayImport, MapError) { @@ -3493,8 +3471,8 @@ TEST_F(TestArrayImport, RecursionError) { TEST_F(TestArrayImport, ImportRecordBatch) { auto schema = ::arrow::schema( {field("strs", utf8(), /*nullable=*/false), field("ints", uint16())}); - auto expected_strs = arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); - auto expected_ints = arrow::ArrayFromJSON(uint16(), "[513, null, 1541]"); + auto expected_strs = ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); + auto expected_ints = ArrayFromJSON(uint16(), "[513, null, 1541]"); FillStringLike(AddChild(), 3, 0, 1, string_buffers_no_nulls1); FillPrimitive(AddChild(), 3, -1, 0, primitive_buffers_nulls1_16); @@ -3529,7 +3507,7 @@ TEST_F(TestArrayImport, ImportArrayAndType) { ArrayReleaseCallback array_cb(&c_struct_); ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_struct_, &schema_builder.c_struct_)); - AssertArraysEqual(*array, *arrow::ArrayFromJSON(int8(), "[1, 2, 3]")); + AssertArraysEqual(*array, *ArrayFromJSON(int8(), "[1, 2, 3]")); schema_cb.AssertCalled(); // was released array_cb.AssertNotCalled(); ASSERT_TRUE(ArrowArrayIsReleased(&c_struct_)); // was moved @@ -3554,8 +3532,8 @@ TEST_F(TestArrayImport, ImportArrayAndTypeError) { TEST_F(TestArrayImport, ImportRecordBatchAndSchema) { // Test importing both record batch and its schema at the same time auto schema = ::arrow::schema({field("strs", utf8()), field("ints", uint16())}); - auto expected_strs = arrow::ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); - auto expected_ints = arrow::ArrayFromJSON(uint16(), "[513, null, 1541]"); + auto expected_strs = ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"); + auto expected_ints = ArrayFromJSON(uint16(), "[513, null, 1541]"); SchemaStructBuilder schema_builder; schema_builder.FillPrimitive(schema_builder.AddChild(), "u", "strs"); @@ -3832,7 +3810,7 @@ class TestArrayRoundtrip : public ::testing::Test { void SetUp() override { pool_ = default_memory_pool(); } static ArrayFactory JSONArrayFactory(std::shared_ptr type, const char* json) { - return [=]() { return arrow::ArrayFromJSON(type, json); }; + return [=]() { return ArrayFromJSON(type, json); }; } static ArrayFactory SlicedArrayFactory(ArrayFactory factory) { @@ -3985,7 +3963,7 @@ TEST_F(TestArrayRoundtrip, BinaryViewMultipleBuffers) { TEST_F(TestArrayRoundtrip, UnknownNullCount) { TestWithArrayFactory([]() -> Result> { - auto arr = arrow::ArrayFromJSON(int32(), "[0, 1, 2]"); + auto arr = ArrayFromJSON(int32(), "[0, 1, 2]"); if (arr->null_bitmap()) { return Status::Invalid( "Failed precondition: " @@ -4021,7 +3999,7 @@ TEST_F(TestArrayRoundtrip, ListView) { std::shared_ptr sizes; ArrayFromVector(std::vector{2, 2, 3, 1, 2, 0}, &sizes); - auto values = arrow::ArrayFromJSON(int8(), "[4, 5, 6, null, 8, null]"); + auto values = ArrayFromJSON(int8(), "[4, 5, 6, null, 8, null]"); auto result = ListViewArray::FromArrays(*offsets, *sizes, *values, pool_); if (result.ok()) { RETURN_NOT_OK((*result)->ValidateFull()); @@ -4108,10 +4086,9 @@ TEST_F(TestArrayRoundtrip, RunEndEncoded) { auto ree_array, RunEndEncodedArray::Make( run_end_encoded(int64(), list(utf8())), 8, - arrow::ArrayFromJSON(int64(), "[1, 3, 4, 7, 8]"), - arrow::ArrayFromJSON( - list(utf8()), - R"([["abc", "def"], ["efg"], [], null, ["efg", "hij"]])"))); + ArrayFromJSON(int64(), "[1, 3, 4, 7, 8]"), + ArrayFromJSON(list(utf8()), + R"([["abc", "def"], ["efg"], [], null, ["efg", "hij"]])"))); RETURN_NOT_OK(ree_array->ValidateFull()); return ree_array; }; @@ -4123,8 +4100,8 @@ TEST_F(TestArrayRoundtrip, RunEndEncoded) { TEST_F(TestArrayRoundtrip, Dictionary) { { auto factory = []() { - auto values = arrow::ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()), indices, values); }; @@ -4133,9 +4110,8 @@ TEST_F(TestArrayRoundtrip, Dictionary) { } { auto factory = []() { - auto values = - arrow::ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); - auto indices = arrow::ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); + auto values = ArrayFromJSON(list(utf8()), R"([["abc", "def"], ["efg"], []])"); + auto indices = ArrayFromJSON(int32(), "[0, 2, 1, null, 1]"); return DictionaryArray::FromArrays( dictionary(indices->type(), values->type(), /*ordered=*/true), indices, values); }; @@ -4199,8 +4175,8 @@ TEST_F(TestArrayRoundtrip, UnregisteredExtension) { TEST_F(TestArrayRoundtrip, RecordBatch) { auto schema = ::arrow::schema( {field("ints", int16()), field("bools", boolean(), /*nullable=*/false)}); - auto arr0 = arrow::ArrayFromJSON(int16(), "[1, 2, null]"); - auto arr1 = arrow::ArrayFromJSON(boolean(), "[false, true, false]"); + auto arr0 = ArrayFromJSON(int16(), "[1, 2, null]"); + auto arr1 = ArrayFromJSON(boolean(), "[false, true, false]"); { auto factory = [&]() { return RecordBatch::Make(schema, 3, {arr0, arr1}); }; @@ -4274,7 +4250,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test { static ArrayFactory JSONArrayFactory(const std::shared_ptr& mm, std::shared_ptr type, const char* json) { - return [=]() { return ToDevice(mm, *arrow::ArrayFromJSON(type, json)->data()); }; + return [=]() { return ToDevice(mm, *ArrayFromJSON(type, json)->data()); }; } static ArrayFactory SlicedArrayFactory(ArrayFactory factory) { @@ -4515,8 +4491,8 @@ TEST_F(TestArrayStreamExport, Empty) { TEST_F(TestArrayStreamExport, Simple) { auto schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches(schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches( + schema, {ArrayFromJSON(int32(), "[1, 2]"), ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, schema)); struct ArrowArrayStream c_stream; @@ -4534,8 +4510,8 @@ TEST_F(TestArrayStreamExport, Simple) { TEST_F(TestArrayStreamExport, ArrayLifetime) { auto schema = arrow::schema({field("ints", int32())}); - auto batches = MakeBatches(schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches( + schema, {ArrayFromJSON(int32(), "[1, 2]"), ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, schema)); struct ArrowArrayStream c_stream; @@ -4615,10 +4591,9 @@ TEST_F(TestArrayStreamExport, ChunkedArrayExportEmpty) { } TEST_F(TestArrayStreamExport, ChunkedArrayExport) { - ASSERT_OK_AND_ASSIGN( - auto chunked_array, - ChunkedArray::Make({arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")})); + ASSERT_OK_AND_ASSIGN(auto chunked_array, + ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"), + ArrayFromJSON(int32(), "[4, 5, null]")})); struct ArrowArrayStream c_stream; struct ArrowSchema c_schema; @@ -4739,9 +4714,8 @@ class TestArrayStreamRoundtrip : public BaseArrayStreamTest { TEST_F(TestArrayStreamRoundtrip, Simple) { auto orig_schema = arrow::schema({field("ints", int32())}); - auto batches = - MakeBatches(orig_schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches(orig_schema, {ArrayFromJSON(int32(), "[1, 2]"), + ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema)); @@ -4757,9 +4731,8 @@ TEST_F(TestArrayStreamRoundtrip, Simple) { TEST_F(TestArrayStreamRoundtrip, CloseEarly) { auto orig_schema = arrow::schema({field("ints", int32())}); - auto batches = - MakeBatches(orig_schema, {arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")}); + auto batches = MakeBatches(orig_schema, {ArrayFromJSON(int32(), "[1, 2]"), + ArrayFromJSON(int32(), "[4, 5, null]")}); ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema)); @@ -4814,9 +4787,9 @@ TEST_F(TestArrayStreamRoundtrip, SchemaError) { } TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtrip) { - ASSERT_OK_AND_ASSIGN( - auto src, ChunkedArray::Make({arrow::ArrayFromJSON(int32(), "[1, 2]"), - arrow::ArrayFromJSON(int32(), "[4, 5, null]")})); + ASSERT_OK_AND_ASSIGN(auto src, + ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"), + ArrayFromJSON(int32(), "[4, 5, null]")})); Roundtrip(src, [&](const std::shared_ptr& dst) { AssertTypeEqual(*dst->type(), *src->type()); @@ -4937,10 +4910,10 @@ TEST_F(TestArrayDeviceStreamExport, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(schema, {arr1, arr2}); @@ -4966,10 +4939,10 @@ TEST_F(TestArrayDeviceStreamExport, ArrayLifetime) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(schema, {arr1, arr2}); @@ -5069,10 +5042,10 @@ TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExport) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({arr1, arr2})); @@ -5245,10 +5218,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); @@ -5270,10 +5243,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, CloseEarly) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); @@ -5322,10 +5295,10 @@ TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtrip) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({arr1, arr2})); @@ -5388,10 +5361,10 @@ TEST_F(TestAsyncDeviceArrayStreamRoundTrip, Simple) { auto mm = device->default_memory_manager(); ASSERT_OK_AND_ASSIGN(auto arr1, - ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[1, 2]")->data())); + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); ASSERT_EQ(device->device_type(), arr1->device_type()); - ASSERT_OK_AND_ASSIGN( - auto arr2, ToDevice(mm, *arrow::ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); ASSERT_EQ(device->device_type(), arr2->device_type()); auto orig_schema = arrow::schema({field("ints", int32())}); auto batches = MakeBatches(orig_schema, {arr1, arr2}); From 0a342e3eee73378a9020704002c63b677ef313f6 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 15:42:54 -0700 Subject: [PATCH 11/33] Update cpp/src/arrow/util/from_json_test.cc Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/util/from_json_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/from_json_test.cc b/cpp/src/arrow/util/from_json_test.cc index a36f1586f09..15fc920fa0b 100644 --- a/cpp/src/arrow/util/from_json_test.cc +++ b/cpp/src/arrow/util/from_json_test.cc @@ -915,7 +915,7 @@ TEST(TestMap, IntegerMapToStringList) { auto& key_item_builder = checked_cast(*key_builder.item_builder()); auto& item_builder = checked_cast(*map_builder.item_builder()); auto& item_value_builder = - checked_cast(*item_builder.value_builder()); + checked_cast(*item_builder.value_builder()); ASSERT_OK(map_builder.Append()); ASSERT_OK(key_builder.Append()); From 3254825682030b1ad63fc7a5b43ccf3530f96bbb Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 16:04:24 -0700 Subject: [PATCH 12/33] Remove from_json.h from api.h --- cpp/src/arrow/acero/hash_join_node_test.cc | 1 - cpp/src/arrow/api.h | 1 - cpp/src/arrow/c/bridge_test.cc | 2 -- 3 files changed, 4 deletions(-) diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index f6d91acca56..654fd59c45d 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -42,7 +42,6 @@ using testing::UnorderedElementsAreArray; namespace arrow { -using arrow::ArrayFromJSON; using arrow::gen::Constant; using arrow::random::kSeedMax; using arrow::random::RandomArrayGenerator; diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 2a0bc345211..ac568a00eed 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -38,7 +38,6 @@ #include "arrow/table_builder.h" // IWYU pragma: export #include "arrow/tensor.h" // IWYU pragma: export #include "arrow/type.h" // IWYU pragma: export -#include "arrow/util/from_json.h" // IWYU pragma: export #include "arrow/util/key_value_metadata.h" // IWYU pragma: export #include "arrow/visit_array_inline.h" // IWYU pragma: export #include "arrow/visit_scalar_inline.h" // IWYU pragma: export diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 77e560042c6..75d5d1f428b 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -41,7 +41,6 @@ #include "arrow/util/binary_view_util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/endian.h" -#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging_internal.h" #include "arrow/util/macros.h" @@ -55,7 +54,6 @@ namespace arrow { -using ::arrow::ArrayFromJSON; using internal::ArrayDeviceExportTraits; using internal::ArrayDeviceStreamExportTraits; using internal::ArrayExportGuard; From b4a9ef4cc75ff6eca671dca60f456b52e05da0b8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 16:21:51 -0700 Subject: [PATCH 13/33] Add `class` keyword back in to disambiguate We need the class keyword here because the namespace change causes a collision with StringBuilder the class and StringBuilder the function (in string_builder.h). --- cpp/src/arrow/util/from_json_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/from_json_test.cc b/cpp/src/arrow/util/from_json_test.cc index 15fc920fa0b..a36f1586f09 100644 --- a/cpp/src/arrow/util/from_json_test.cc +++ b/cpp/src/arrow/util/from_json_test.cc @@ -915,7 +915,7 @@ TEST(TestMap, IntegerMapToStringList) { auto& key_item_builder = checked_cast(*key_builder.item_builder()); auto& item_builder = checked_cast(*map_builder.item_builder()); auto& item_value_builder = - checked_cast(*item_builder.value_builder()); + checked_cast(*item_builder.value_builder()); ASSERT_OK(map_builder.Append()); ASSERT_OK(key_builder.Append()); From 2691fe79c7667c2597017fa5c334762e9f13aa9d Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 21 Apr 2025 18:19:07 -0700 Subject: [PATCH 14/33] Minor edits to user guide code sample --- docs/source/cpp/arrays.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index cd63ca5e376..2d933ac6814 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -247,7 +247,7 @@ are shown below:: auto bool_array = ArrayFromJSON(boolean(), "[true, false, true]"); auto string_array = ArrayFromJSON(utf8(), R"(["Hello", "World", null])"); - // Timestamps can be used from string representations + // Timestamps can be created from string representations auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND), R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])"); @@ -257,7 +257,7 @@ are shown below:: "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]" ); auto map_array = ArrayFromJSON( - map(boolean(), int32()), + map(utf8(), int32()), R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])" ); auto struct_array = ArrayFromJSON( From 3c06d36f44aca10d4e5769f2df5050986ae437ab Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 22 Apr 2025 08:16:37 -0700 Subject: [PATCH 15/33] Update cpp/src/arrow/util/from_json.h Co-authored-by: Enrico Minack --- cpp/src/arrow/util/from_json.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/from_json.h b/cpp/src/arrow/util/from_json.h index c3ff4b81f65..2e0b7de63b0 100644 --- a/cpp/src/arrow/util/from_json.h +++ b/cpp/src/arrow/util/from_json.h @@ -72,7 +72,7 @@ ARROW_EXPORT Status ScalarFromJSON(const std::shared_ptr&, std::string_view json, std::shared_ptr* out); -/// \brief Create an DictScalar from a JSON string +/// \brief Create an DictionaryScalar from a JSON string ARROW_EXPORT Status DictScalarFromJSON(const std::shared_ptr&, std::string_view index_json, std::string_view dictionary_json, std::shared_ptr* out); From 1bb08ebe57be18c2519d59f093b89a7a83b144e5 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 22 Apr 2025 16:29:15 +0000 Subject: [PATCH 16/33] Update user guide docs per review comment --- docs/source/cpp/arrays.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index 2d933ac6814..4cd682d1f0a 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -235,6 +235,9 @@ FromJSON Helpers A set of helper functions is provided for concisely creating Arrays and Scalars from JSON_ text. These helpers are intended to be used in examples, tests, or for quick prototyping and are not intended to be used where performance matters. +Most users will want to use the API described in :doc:`json` which provides a +performant way to create :class:`arrow::Table` and :class:`arrow::RecordBatch` +objects from line-separated JSON files. .. _JSON: https://datatracker.ietf.org/doc/html/rfc8259 From 11da4d40f05e97ad744df58c3d4b0db267e6cbb0 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 22 Apr 2025 09:32:29 -0700 Subject: [PATCH 17/33] Use explicit namespace on StringBuilder --- cpp/src/arrow/util/from_json_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/from_json_test.cc b/cpp/src/arrow/util/from_json_test.cc index a36f1586f09..26011cfacf6 100644 --- a/cpp/src/arrow/util/from_json_test.cc +++ b/cpp/src/arrow/util/from_json_test.cc @@ -915,7 +915,7 @@ TEST(TestMap, IntegerMapToStringList) { auto& key_item_builder = checked_cast(*key_builder.item_builder()); auto& item_builder = checked_cast(*map_builder.item_builder()); auto& item_value_builder = - checked_cast(*item_builder.value_builder()); + checked_cast(*item_builder.value_builder()); ASSERT_OK(map_builder.Append()); ASSERT_OK(key_builder.Append()); From 28e2c332bfd5d530caac10374cc261de6ae1af92 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 1 May 2025 17:33:07 -0700 Subject: [PATCH 18/33] Rename util::*FromJSON to json::*FromJSONString --- cpp/src/arrow/CMakeLists.txt | 5 +- cpp/src/arrow/c/bridge_benchmark.cc | 2 +- cpp/src/arrow/ipc/api.h | 2 +- cpp/src/arrow/ipc/generate_fuzz_corpus.cc | 2 +- cpp/src/arrow/json/CMakeLists.txt | 1 + .../from_json.cc => json/from_string.cc} | 63 +-- .../{util/from_json.h => json/from_string.h} | 38 +- .../from_string_test.cc} | 409 ++++++++++-------- cpp/src/arrow/meson.build | 4 +- cpp/src/arrow/testing/gtest_util.cc | 12 +- cpp/src/arrow/util/CMakeLists.txt | 4 - python/pyarrow/src/arrow/python/gdb.cc | 2 +- python/pyarrow/tests/extensions.pyx | 2 +- 13 files changed, 286 insertions(+), 260 deletions(-) rename cpp/src/arrow/{util/from_json.cc => json/from_string.cc} (94%) rename cpp/src/arrow/{util/from_json.h => json/from_string.h} (56%) rename cpp/src/arrow/{util/from_json_test.cc => json/from_string_test.cc} (77%) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index e096b2f4cbd..3c1dbc46a24 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -551,10 +551,6 @@ if(ARROW_HAVE_NEON) list(APPEND ARROW_UTIL_SRCS util/bpacking_neon.cc) endif() -if(ARROW_JSON) - list(APPEND ARROW_UTIL_SRCS util/from_json.cc) -endif() - if(ARROW_WITH_BROTLI) list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc) endif() @@ -940,6 +936,7 @@ if(ARROW_JSON) json/chunked_builder.cc json/chunker.cc json/converter.cc + json/from_string.cc json/object_parser.cc json/object_writer.cc json/parser.cc diff --git a/cpp/src/arrow/c/bridge_benchmark.cc b/cpp/src/arrow/c/bridge_benchmark.cc index 0a4858b65fc..2df31318ab6 100644 --- a/cpp/src/arrow/c/bridge_benchmark.cc +++ b/cpp/src/arrow/c/bridge_benchmark.cc @@ -22,10 +22,10 @@ #include "arrow/array.h" #include "arrow/c/bridge.h" #include "arrow/c/helpers.h" +#include "arrow/json/from_string.h" #include "arrow/record_batch.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" -#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" namespace arrow::benchmarks { diff --git a/cpp/src/arrow/ipc/api.h b/cpp/src/arrow/ipc/api.h index 0828730d905..84784b8a135 100644 --- a/cpp/src/arrow/ipc/api.h +++ b/cpp/src/arrow/ipc/api.h @@ -22,4 +22,4 @@ #include "arrow/ipc/message.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" -#include "arrow/util/from_json.h" +#include "arrow/json/from_string.h" diff --git a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc index 2f6d97ec989..24291ee10af 100644 --- a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc +++ b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc @@ -29,11 +29,11 @@ #include "arrow/io/memory.h" #include "arrow/ipc/test_common.h" #include "arrow/ipc/writer.h" +#include "arrow/json/from_string.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/testing/extension_type.h" #include "arrow/util/compression.h" -#include "arrow/util/from_json.h" #include "arrow/util/io_util.h" #include "arrow/util/key_value_metadata.h" diff --git a/cpp/src/arrow/json/CMakeLists.txt b/cpp/src/arrow/json/CMakeLists.txt index 95b299d8f0c..fa7d0607848 100644 --- a/cpp/src/arrow/json/CMakeLists.txt +++ b/cpp/src/arrow/json/CMakeLists.txt @@ -20,6 +20,7 @@ add_arrow_test(test chunked_builder_test.cc chunker_test.cc converter_test.cc + from_string_test.cc parser_test.cc reader_test.cc PREFIX diff --git a/cpp/src/arrow/util/from_json.cc b/cpp/src/arrow/json/from_string.cc similarity index 94% rename from cpp/src/arrow/util/from_json.cc rename to cpp/src/arrow/json/from_string.cc index 8eb27ce989e..23c25c9bad1 100644 --- a/cpp/src/arrow/util/from_json.cc +++ b/cpp/src/arrow/json/from_string.cc @@ -31,12 +31,12 @@ #include "arrow/array/builder_time.h" #include "arrow/array/builder_union.h" #include "arrow/chunked_array.h" +#include "arrow/json/from_string.h" #include "arrow/scalar.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/float16.h" -#include "arrow/util/from_json.h" #include "arrow/util/logging_internal.h" #include "arrow/util/value_parsing.h" @@ -55,7 +55,7 @@ namespace arrow { using internal::ParseValue; using util::Float16; -namespace util { +namespace json { using ::arrow::internal::checked_cast; using ::arrow::internal::checked_pointer_cast; @@ -974,8 +974,8 @@ Status GetConverter(const std::shared_ptr& type, } // namespace -Result> ArrayFromJSON(const std::shared_ptr& type, - std::string_view json_string) { +Result> ArrayFromJSONString(const std::shared_ptr& type, + std::string_view json_string) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); @@ -993,32 +993,33 @@ Result> ArrayFromJSON(const std::shared_ptr& ty return out; } -Result> ArrayFromJSON(const std::shared_ptr& type, - const std::string& json_string) { - return ArrayFromJSON(type, std::string_view(json_string)); +Result> ArrayFromJSONString(const std::shared_ptr& type, + const std::string& json_string) { + return ArrayFromJSONString(type, std::string_view(json_string)); } -Result> ArrayFromJSON(const std::shared_ptr& type, - const char* json_string) { - return ArrayFromJSON(type, std::string_view(json_string)); +Result> ArrayFromJSONString(const std::shared_ptr& type, + const char* json_string) { + return ArrayFromJSONString(type, std::string_view(json_string)); } -Status ChunkedArrayFromJSON(const std::shared_ptr& type, - const std::vector& json_strings, - std::shared_ptr* out) { +Status ChunkedArrayFromJSONString(const std::shared_ptr& type, + const std::vector& json_strings, + std::shared_ptr* out) { ArrayVector out_chunks; out_chunks.reserve(json_strings.size()); for (const std::string& chunk_json : json_strings) { out_chunks.emplace_back(); - ARROW_ASSIGN_OR_RAISE(out_chunks.back(), ArrayFromJSON(type, chunk_json)); + ARROW_ASSIGN_OR_RAISE(out_chunks.back(), ArrayFromJSONString(type, chunk_json)); } *out = std::make_shared(std::move(out_chunks), type); return Status::OK(); } -Status DictArrayFromJSON(const std::shared_ptr& type, - std::string_view indices_json, std::string_view dictionary_json, - std::shared_ptr* out) { +Status DictArrayFromJSONString(const std::shared_ptr& type, + std::string_view indices_json, + std::string_view dictionary_json, + std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type); } @@ -1026,16 +1027,16 @@ Status DictArrayFromJSON(const std::shared_ptr& type, const auto& dictionary_type = checked_cast(*type); ARROW_ASSIGN_OR_RAISE(auto indices, - ArrayFromJSON(dictionary_type.index_type(), indices_json)); - ARROW_ASSIGN_OR_RAISE(auto dictionary, - ArrayFromJSON(dictionary_type.value_type(), dictionary_json)); + ArrayFromJSONString(dictionary_type.index_type(), indices_json)); + ARROW_ASSIGN_OR_RAISE(auto dictionary, ArrayFromJSONString(dictionary_type.value_type(), + dictionary_json)); return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary)) .Value(out); } -Status ScalarFromJSON(const std::shared_ptr& type, std::string_view json_string, - std::shared_ptr* out) { +Status ScalarFromJSONString(const std::shared_ptr& type, + std::string_view json_string, std::shared_ptr* out) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); @@ -1053,24 +1054,26 @@ Status ScalarFromJSON(const std::shared_ptr& type, std::string_view js return array->GetScalar(0).Value(out); } -Status DictScalarFromJSON(const std::shared_ptr& type, - std::string_view index_json, std::string_view dictionary_json, - std::shared_ptr* out) { +Status DictScalarFromJSONString(const std::shared_ptr& type, + std::string_view index_json, + std::string_view dictionary_json, + std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { - return Status::TypeError("DictScalarFromJSON requires dictionary type, got ", *type); + return Status::TypeError("DictScalarFromJSONString requires dictionary type, got ", + *type); } const auto& dictionary_type = checked_cast(*type); std::shared_ptr index; std::shared_ptr dictionary; - RETURN_NOT_OK(ScalarFromJSON(dictionary_type.index_type(), index_json, &index)); - ARROW_ASSIGN_OR_RAISE(dictionary, - ArrayFromJSON(dictionary_type.value_type(), dictionary_json)); + RETURN_NOT_OK(ScalarFromJSONString(dictionary_type.index_type(), index_json, &index)); + ARROW_ASSIGN_OR_RAISE( + dictionary, ArrayFromJSONString(dictionary_type.value_type(), dictionary_json)); *out = DictionaryScalar::Make(std::move(index), std::move(dictionary)); return Status::OK(); } -} // namespace util +} // namespace json } // namespace arrow diff --git a/cpp/src/arrow/util/from_json.h b/cpp/src/arrow/json/from_string.h similarity index 56% rename from cpp/src/arrow/util/from_json.h rename to cpp/src/arrow/json/from_string.h index 2e0b7de63b0..5493b393148 100644 --- a/cpp/src/arrow/util/from_json.h +++ b/cpp/src/arrow/json/from_string.h @@ -32,7 +32,7 @@ namespace arrow { class Array; class DataType; -namespace util { +namespace json { /// \defgroup array-from-json Helpers for constructing Arrays from JSON text /// @@ -43,41 +43,45 @@ namespace util { /// \brief Create an Array from a JSON string ARROW_EXPORT -Result> ArrayFromJSON(const std::shared_ptr&, - const std::string& json); +Result> ArrayFromJSONString(const std::shared_ptr&, + const std::string& json); /// \brief Create an Array from a JSON string ARROW_EXPORT -Result> ArrayFromJSON(const std::shared_ptr&, - std::string_view json); +Result> ArrayFromJSONString(const std::shared_ptr&, + std::string_view json); /// \brief Create an Array from a JSON string ARROW_EXPORT -Result> ArrayFromJSON(const std::shared_ptr&, - const char* json); +Result> ArrayFromJSONString(const std::shared_ptr&, + const char* json); /// \brief Create an ChunkedArray from a JSON string ARROW_EXPORT -Status ChunkedArrayFromJSON(const std::shared_ptr& type, - const std::vector& json_strings, - std::shared_ptr* out); +Status ChunkedArrayFromJSONString(const std::shared_ptr& type, + const std::vector& json_strings, + std::shared_ptr* out); /// \brief Create an DictionaryArray from a JSON string ARROW_EXPORT -Status DictArrayFromJSON(const std::shared_ptr&, std::string_view indices_json, - std::string_view dictionary_json, std::shared_ptr* out); +Status DictArrayFromJSONString(const std::shared_ptr&, + std::string_view indices_json, + std::string_view dictionary_json, + std::shared_ptr* out); /// \brief Create an Scalar from a JSON string ARROW_EXPORT -Status ScalarFromJSON(const std::shared_ptr&, std::string_view json, - std::shared_ptr* out); +Status ScalarFromJSONString(const std::shared_ptr&, std::string_view json, + std::shared_ptr* out); /// \brief Create an DictionaryScalar from a JSON string ARROW_EXPORT -Status DictScalarFromJSON(const std::shared_ptr&, std::string_view index_json, - std::string_view dictionary_json, std::shared_ptr* out); +Status DictScalarFromJSONString(const std::shared_ptr&, + std::string_view index_json, + std::string_view dictionary_json, + std::shared_ptr* out); /// @} -} // namespace util +} // namespace json } // namespace arrow diff --git a/cpp/src/arrow/util/from_json_test.cc b/cpp/src/arrow/json/from_string_test.cc similarity index 77% rename from cpp/src/arrow/util/from_json_test.cc rename to cpp/src/arrow/json/from_string_test.cc index 26011cfacf6..d9fa53f68cb 100644 --- a/cpp/src/arrow/util/from_json_test.cc +++ b/cpp/src/arrow/json/from_string_test.cc @@ -35,6 +35,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" #include "arrow/chunked_array.h" +#include "arrow/json/from_string.h" #include "arrow/scalar.h" #include "arrow/testing/builder.h" #include "arrow/testing/gtest_util.h" @@ -44,7 +45,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" #include "arrow/util/float16.h" -#include "arrow/util/from_json.h" #if defined(_MSC_VER) // "warning C4307: '+': integral constant overflow" @@ -55,7 +55,7 @@ namespace arrow { using util::Float16; -namespace util { +namespace json { using ::arrow::internal::BytesToBits; using ::arrow::internal::checked_cast; @@ -107,7 +107,7 @@ void AssertJSONArray(const std::shared_ptr& type, const std::string& j const std::vector& values) { std::shared_ptr expected; - ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSON(type, json)); + ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSONString(type, json)); ASSERT_OK(actual->ValidateFull()); ArrayFromVector(type, values, &expected); AssertArraysEqual(*expected, *actual); @@ -119,7 +119,7 @@ void AssertJSONArray(const std::shared_ptr& type, const std::string& j const std::vector& values) { std::shared_ptr expected; - ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSON(type, json)); + ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSONString(type, json)); ASSERT_OK(actual->ValidateFull()); ArrayFromVector(type, is_valid, values, &expected); AssertArraysEqual(*expected, *actual); @@ -133,11 +133,11 @@ void AssertJSONDictArray(const std::shared_ptr& index_type, auto type = dictionary(index_type, value_type); ASSERT_OK_AND_ASSIGN(auto expected_indices, - ArrayFromJSON(index_type, expected_indices_json)); + ArrayFromJSONString(index_type, expected_indices_json)); ASSERT_OK_AND_ASSIGN(auto expected_values, - ArrayFromJSON(value_type, expected_values_json)); + ArrayFromJSONString(value_type, expected_values_json)); - ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSON(type, json)); + ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSONString(type, json)); ASSERT_OK(actual->ValidateFull()); const auto& dict_array = checked_cast(*actual); @@ -151,7 +151,7 @@ void AssertJSONScalar(const std::shared_ptr& type, const std::string& SCOPED_TRACE(json); std::shared_ptr actual, expected; - ASSERT_OK(ScalarFromJSON(type, json, &actual)); + ASSERT_OK(ScalarFromJSONString(type, json, &actual)); if (is_valid) { ASSERT_OK_AND_ASSIGN(expected, MakeScalar(type, value)); } else { @@ -210,13 +210,13 @@ TYPED_TEST_P(TestIntegers, Basics) { TYPED_TEST_P(TestIntegers, Errors) { std::shared_ptr array; auto type = this->type(); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "0")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "{}")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"0\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "0")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "{}")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0.0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"0\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); } TYPED_TEST_P(TestIntegers, OutOfBounds) { @@ -227,23 +227,23 @@ TYPED_TEST_P(TestIntegers, OutOfBounds) { auto type = this->type(); if (type->id() == Type::UINT64) { - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[18446744073709551616]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[18446744073709551616]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[-1]")); } else if (type->id() == Type::INT64) { - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[9223372036854775808]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-9223372036854775809]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[9223372036854775808]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[-9223372036854775809]")); } else if (std::is_signed::value) { const auto lower = SafeSignedAdd(std::numeric_limits::min(), -1); const auto upper = SafeSignedAdd(std::numeric_limits::max(), +1); auto json_string = JSONArray(lower); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string)); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, json_string)); json_string = JSONArray(upper); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string)); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, json_string)); } else { const auto upper = static_cast(std::numeric_limits::max()) + 1; auto json_string = JSONArray(upper); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, json_string)); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[-1]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, json_string)); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[-1]")); } } @@ -319,8 +319,8 @@ TYPED_TEST_P(TestStrings, Errors) { auto type = this->type(); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[]]")); } TYPED_TEST_P(TestStrings, Dictionary) { @@ -352,9 +352,9 @@ TEST(TestNull, Errors) { std::shared_ptr type = null(); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[NaN]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[NaN]")); } TEST(TestBoolean, Basics) { @@ -374,8 +374,8 @@ TEST(TestBoolean, Errors) { std::shared_ptr type = boolean(); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0.0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"true\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0.0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"true\"]")); } TEST(TestFloat, Basics) { @@ -389,7 +389,7 @@ TEST(TestFloat, Basics) { // Check NaN separately as AssertArraysEqual simply memcmp's array contents // and NaNs can have many bit representations. - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[NaN]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[NaN]")); ASSERT_OK(actual->ValidateFull()); float value = checked_cast(*actual).Value(0); ASSERT_TRUE(std::isnan(value)); @@ -399,7 +399,7 @@ TEST(TestFloat, Errors) { std::shared_ptr type = float32(); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[true]")); } TEST(TestDouble, Basics) { @@ -411,7 +411,7 @@ TEST(TestDouble, Basics) { AssertJSONArray(type, "[-0.0, Inf, -Inf, null]", {true, true, true, false}, {-0.0, INFINITY, -INFINITY, 0.0}); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[NaN]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[NaN]")); ASSERT_OK(actual->ValidateFull()); double value = checked_cast(*actual).Value(0); ASSERT_TRUE(std::isnan(value)); @@ -421,7 +421,7 @@ TEST(TestDouble, Errors) { std::shared_ptr type = float64(); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[true]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[true]")); } TEST(TestTimestamp, Basics) { @@ -514,11 +514,11 @@ TEST(TestFixedSizeBinary, Errors) { std::shared_ptr type = fixed_size_binary(3); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[]]")); // Invalid length - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"abcd\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"abcd\"]")); } TEST(TestFixedSizeBinary, Dictionary) { @@ -530,14 +530,14 @@ TEST(TestFixedSizeBinary, Dictionary) { // Invalid length std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(dictionary(int8(), type), R"(["x"])")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(dictionary(int8(), type), R"(["x"])")); } template void TestDecimalBasic(std::shared_ptr type) { std::shared_ptr expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); { DecimalBuilder builder(type); @@ -545,7 +545,7 @@ void TestDecimalBasic(std::shared_ptr type) { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[\"123.4567\", \"-78.9000\"]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[\"123.4567\", \"-78.9000\"]")); ASSERT_OK(actual->ValidateFull()); { DecimalBuilder builder(type); @@ -555,7 +555,7 @@ void TestDecimalBasic(std::shared_ptr type) { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[\"123.4567\", null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[\"123.4567\", null]")); ASSERT_OK(actual->ValidateFull()); { DecimalBuilder builder(type); @@ -587,11 +587,11 @@ TEST(TestDecimal, Errors) { {decimal32(8, 4), decimal64(10, 4), decimal128(10, 4), decimal256(10, 4)}) { std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[12.3456]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[12.3456]")); // Bad scale - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.345\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"12.34560\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"12.345\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"12.34560\"]")); } } @@ -621,7 +621,7 @@ class TestVarLengthListArray : public ::testing::Test { std::shared_ptr type = std::make_shared(int64()); std::shared_ptr offsets, sizes, values, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0}, &offsets); ArrayFromVector({}, &values); @@ -634,7 +634,7 @@ class TestVarLengthListArray : public ::testing::Test { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[4, 5], [], [6]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[4, 5], [], [6]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0, 2, 2, 3}, &offsets); ArrayFromVector({4, 5, 6}, &values); @@ -647,7 +647,7 @@ class TestVarLengthListArray : public ::testing::Test { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[], [null], [6, null]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[], [null], [6, null]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0, 0, 1, 3}, &offsets); auto is_valid = std::vector{false, true, false}; @@ -661,7 +661,7 @@ class TestVarLengthListArray : public ::testing::Test { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[null, [], null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[null, [], null]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -679,9 +679,9 @@ class TestVarLengthListArray : public ::testing::Test { std::shared_ptr type = std::make_shared(int64()); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0.0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[9223372036854775808]]")); } void TestNullList() { @@ -689,7 +689,7 @@ class TestVarLengthListArray : public ::testing::Test { std::shared_ptr type = std::make_shared(null()); std::shared_ptr offsets, sizes, values, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0}, &offsets); values = std::make_shared(0); @@ -702,7 +702,7 @@ class TestVarLengthListArray : public ::testing::Test { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[], [null], [null, null]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[], [null], [null, null]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0, 0, 1, 3}, &offsets); values = std::make_shared(3); @@ -715,7 +715,7 @@ class TestVarLengthListArray : public ::testing::Test { } AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[null, [], null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[null, [], null]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -735,7 +735,8 @@ class TestVarLengthListArray : public ::testing::Test { std::make_shared(std::make_shared(uint8())); std::shared_ptr offsets, sizes, values, nested, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[[4], [5, 6]], [[7, 8, 9]]]")); + ASSERT_OK_AND_ASSIGN(actual, + ArrayFromJSONString(type, "[[[4], [5, 6]], [[7, 8, 9]]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0, 1, 3, 6}, &offsets); ArrayFromVector({4, 5, 6, 7, 8, 9}, &values); @@ -758,7 +759,7 @@ class TestVarLengthListArray : public ::testing::Test { AssertArraysEqual(*expected, *actual); ASSERT_OK_AND_ASSIGN( - actual, ArrayFromJSON(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]")); + actual, ArrayFromJSONString(type, "[[], [[]], [[4], [], [5, 6]], [[7, 8, 9]]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({0, 0, 1, 1, 3, 6}, &offsets); ArrayFromVector({4, 5, 6, 7, 8, 9}, &values); @@ -780,7 +781,7 @@ class TestVarLengthListArray : public ::testing::Test { ASSERT_EQ(actual->length(), 4); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[null, [null], [[null]]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[null, [null], [[null]]]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -819,7 +820,7 @@ TEST(TestMap, IntegerToInteger) { [] ] )"; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, input)); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, input)); std::unique_ptr builder; ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder)); @@ -850,11 +851,12 @@ TEST(TestMap, StringToInteger) { [] ] )"; - ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSON(type, input)); + ASSERT_OK_AND_ASSIGN(auto actual, ArrayFromJSONString(type, input)); std::vector offsets = {0, 2, 2, 3, 3}; ASSERT_OK_AND_ASSIGN(auto expected_keys, - ArrayFromJSON(utf8(), R"(["joe", "mark", "cap"])")); - ASSERT_OK_AND_ASSIGN(auto expected_values, ArrayFromJSON(int32(), "[0, null, 8]")); + ArrayFromJSONString(utf8(), R"(["joe", "mark", "cap"])")); + ASSERT_OK_AND_ASSIGN(auto expected_values, + ArrayFromJSONString(int32(), "[0, null, 8]")); ASSERT_OK_AND_ASSIGN(auto expected_null_bitmap, BytesToBits(std::vector({1, 0, 1, 1}))); auto expected = @@ -868,18 +870,18 @@ TEST(TestMap, Errors) { std::shared_ptr array; // list of pairs isn't an array - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); // pair isn't an array - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[null]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[null]]")); // pair with length != 2 - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0]]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 1, 2]]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[[0]]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[[0, 1, 2]]]")); // null key - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[null, 0]]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[[null, 0]]]")); // key or value fails to convert - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0.0, 0]]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[[0, 0.0]]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[[0.0, 0]]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[[0, 0.0]]]")); } TEST(TestMap, IntegerMapToStringList) { @@ -905,7 +907,7 @@ TEST(TestMap, IntegerMapToStringList) { null ] )"; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, input)); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, input)); std::unique_ptr builder; ASSERT_OK(MakeBuilder(default_memory_pool(), type, &builder)); @@ -948,27 +950,27 @@ TEST(TestFixedSizeList, IntegerList) { auto type = fixed_size_list(int64(), 2); std::shared_ptr values, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({}, &values); expected = std::make_shared(type, 0, values); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[4, 5], [0, 0], [6, 7]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[4, 5], [0, 0], [6, 7]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({4, 5, 0, 0, 6, 7}, &values); expected = std::make_shared(type, 3, values); AssertArraysEqual(*expected, *actual); ASSERT_OK_AND_ASSIGN(actual, - ArrayFromJSON(type, "[[null, null], [0, null], [6, null]]")); + ArrayFromJSONString(type, "[[null, null], [0, null], [6, null]]")); ASSERT_OK(actual->ValidateFull()); auto is_valid = std::vector{false, false, true, false, true, false}; ArrayFromVector(is_valid, {0, 0, 0, 0, 6, 0}, &values); expected = std::make_shared(type, 3, values); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[null, [null, null], null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[null, [null, null], null]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -989,10 +991,10 @@ TEST(TestFixedSizeList, IntegerListErrors) { std::shared_ptr type = fixed_size_list(int64(), 2); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0.0, 1.0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[9223372036854775808, 0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0.0, 1.0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[9223372036854775808, 0]]")); } TEST(TestFixedSizeList, NullList) { @@ -1000,20 +1002,20 @@ TEST(TestFixedSizeList, NullList) { std::shared_ptr type = fixed_size_list(null(), 2); std::shared_ptr values, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); values = std::make_shared(0); expected = std::make_shared(type, 0, values); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, - ArrayFromJSON(type, "[[null, null], [null, null], [null, null]]")); + ASSERT_OK_AND_ASSIGN( + actual, ArrayFromJSONString(type, "[[null, null], [null, null], [null, null]]")); ASSERT_OK(actual->ValidateFull()); values = std::make_shared(6); expected = std::make_shared(type, 3, values); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[null, [null, null], null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[null, [null, null], null]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -1036,14 +1038,15 @@ TEST(TestFixedSizeList, IntegerListList) { std::shared_ptr type = fixed_size_list(nested_type, 1); std::shared_ptr values, nested, expected, actual; - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[[1, 4]], [[2, 5]], [[3, 6]]]")); + ASSERT_OK_AND_ASSIGN(actual, + ArrayFromJSONString(type, "[[[1, 4]], [[2, 5]], [[3, 6]]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({1, 4, 2, 5, 3, 6}, &values); nested = std::make_shared(nested_type, 3, values); expected = std::make_shared(type, 3, nested); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[[1, null]], [null], null]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[[1, null]], [null], null]")); ASSERT_OK(actual->ValidateFull()); { std::unique_ptr builder; @@ -1078,7 +1081,7 @@ TEST(TestStruct, SimpleStruct) { std::vector> children; // Trivial - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({}, &a); ArrayFromVector({}, &b); @@ -1092,11 +1095,12 @@ TEST(TestStruct, SimpleStruct) { children.assign({a, b}); expected = std::make_shared(type, 2, children); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[[5, true], [6, false]]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[[5, true], [6, false]]")); ASSERT_OK(actual->ValidateFull()); AssertArraysEqual(*expected, *actual); ASSERT_OK_AND_ASSIGN( - actual, ArrayFromJSON(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]")); + actual, + ArrayFromJSONString(type, "[{\"a\": 5, \"b\": true}, {\"b\": false, \"a\": 6}]")); ASSERT_OK(actual->ValidateFull()); AssertArraysEqual(*expected, *actual); @@ -1110,12 +1114,14 @@ TEST(TestStruct, SimpleStruct) { expected = std::make_shared(type, 4, children, null_bitmap, 1); ASSERT_OK_AND_ASSIGN( - actual, ArrayFromJSON(type, "[null, [5, null], [null, false], [null, null]]")); + actual, + ArrayFromJSONString(type, "[null, [5, null], [null, false], [null, null]]")); ASSERT_OK(actual->ValidateFull()); AssertArraysEqual(*expected, *actual); // When using object notation, null members can be omitted ASSERT_OK_AND_ASSIGN( - actual, ArrayFromJSON(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]")); + actual, + ArrayFromJSONString(type, "[null, {\"a\": 5, \"b\": null}, {\"b\": false}, {}]")); ASSERT_OK(actual->ValidateFull()); AssertArraysEqual(*expected, *actual); } @@ -1132,7 +1138,7 @@ TEST(TestStruct, NestedStruct) { std::vector is_valid; std::vector> children(2); - ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSON(type, "[]")); + ASSERT_OK_AND_ASSIGN(actual, ArrayFromJSONString(type, "[]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({}, &children[0]); ArrayFromVector({}, &children[1]); @@ -1141,8 +1147,8 @@ TEST(TestStruct, NestedStruct) { expected = std::make_shared(type, 0, children); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, - ArrayFromJSON(type, "[[[5, true], 1.5], [[6, false], -3e2]]")); + ASSERT_OK_AND_ASSIGN( + actual, ArrayFromJSONString(type, "[[[5, true], 1.5], [[6, false], -3e2]]")); ASSERT_OK(actual->ValidateFull()); ArrayFromVector({5, 6}, &children[0]); ArrayFromVector({true, false}, &children[1]); @@ -1151,8 +1157,8 @@ TEST(TestStruct, NestedStruct) { expected = std::make_shared(type, 2, children); AssertArraysEqual(*expected, *actual); - ASSERT_OK_AND_ASSIGN(actual, - ArrayFromJSON(type, "[null, [[5, null], null], [null, -3e2]]")); + ASSERT_OK_AND_ASSIGN( + actual, ArrayFromJSONString(type, "[null, [[5, null], null], [null, -3e2]]")); ASSERT_OK(actual->ValidateFull()); is_valid = {false, true, false}; ArrayFromVector(is_valid, {0, 5, 0}, &children[0]); @@ -1173,12 +1179,12 @@ TEST(TestStruct, Errors) { std::shared_ptr type = struct_({field_a, field_b}); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[0, true]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, true, 1]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[true, 0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"b\": 0, \"a\": true}]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[{\"c\": 0}]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[0, true]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0, true, 1]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[true, 0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[{\"b\": 0, \"a\": true}]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[{\"c\": 0}]")); } TEST(TestDenseUnion, Basics) { @@ -1188,14 +1194,17 @@ TEST(TestDenseUnion, Basics) { auto type = dense_union({field_a, field_b}, {4, 8}); ASSERT_OK_AND_ASSIGN( auto array_parsed, - ArrayFromJSON(type, "[null, [4, 122], [8, true], [4, null], null, [8, false]]")); + ArrayFromJSONString(type, + "[null, [4, 122], [8, true], [4, null], null, [8, false]]")); auto array = checked_pointer_cast(array_parsed); - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[4, 4, 8, 4, 4, 8]")); + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[4, 4, 8, 4, 4, 8]")); ASSERT_OK_AND_ASSIGN(auto expected_offsets, - ArrayFromJSON(int32(), "[0, 1, 0, 2, 3, 1]")); - ASSERT_OK_AND_ASSIGN(auto expected_a, ArrayFromJSON(int8(), "[null, 122, null, null]")); - ASSERT_OK_AND_ASSIGN(auto expected_b, ArrayFromJSON(boolean(), "[true, false]")); + ArrayFromJSONString(int32(), "[0, 1, 0, 2, 3, 1]")); + ASSERT_OK_AND_ASSIGN(auto expected_a, + ArrayFromJSONString(int8(), "[null, 122, null, null]")); + ASSERT_OK_AND_ASSIGN(auto expected_b, ArrayFromJSONString(boolean(), "[true, false]")); ASSERT_OK_AND_ASSIGN( auto expected, DenseUnionArray::Make(*expected_types, *expected_offsets, @@ -1216,13 +1225,14 @@ TEST(TestSparseUnion, Basics) { auto type = sparse_union({field_a, field_b}, {4, 8}); ASSERT_OK_AND_ASSIGN( auto array, - ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]")); + ArrayFromJSONString(type, "[[4, 122], [8, true], [4, null], null, [8, false]]")); - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]")); + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[4, 8, 4, 4, 8]")); ASSERT_OK_AND_ASSIGN(auto expected_a, - ArrayFromJSON(int8(), "[122, null, null, null, null]")); + ArrayFromJSONString(int8(), "[122, null, null, null, null]")); ASSERT_OK_AND_ASSIGN(auto expected_b, - ArrayFromJSON(boolean(), "[null, true, null, null, false]")); + ArrayFromJSONString(boolean(), "[null, true, null, null, false]")); ASSERT_OK_AND_ASSIGN(auto expected, SparseUnionArray::Make(*expected_types, {expected_a, expected_b}, @@ -1236,23 +1246,27 @@ TEST(TestDenseUnion, ListOfUnion) { auto field_b = field("b", boolean()); auto union_type = dense_union({field_a, field_b}, {4, 8}); auto list_type = list(union_type); - ASSERT_OK_AND_ASSIGN(auto parsed_array, ArrayFromJSON(list_type, - "[" - "[[4, 122], [8, true]]," - "[[4, null], null, [8, false]]" - "]")); + ASSERT_OK_AND_ASSIGN(auto parsed_array, + ArrayFromJSONString(list_type, + "[" + "[[4, 122], [8, true]]," + "[[4, null], null, [8, false]]" + "]")); auto array = checked_pointer_cast(parsed_array); - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]")); - ASSERT_OK_AND_ASSIGN(auto expected_offsets, ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]")); - ASSERT_OK_AND_ASSIGN(auto expected_a, ArrayFromJSON(int8(), "[122, null, null]")); - ASSERT_OK_AND_ASSIGN(auto expected_b, ArrayFromJSON(boolean(), "[true, false]")); + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[4, 8, 4, 4, 8]")); + ASSERT_OK_AND_ASSIGN(auto expected_offsets, + ArrayFromJSONString(int32(), "[0, 0, 1, 2, 1]")); + ASSERT_OK_AND_ASSIGN(auto expected_a, ArrayFromJSONString(int8(), "[122, null, null]")); + ASSERT_OK_AND_ASSIGN(auto expected_b, ArrayFromJSONString(boolean(), "[true, false]")); ASSERT_OK_AND_ASSIGN( auto expected_values, DenseUnionArray::Make(*expected_types, *expected_offsets, {expected_a, expected_b}, {"a", "b"}, {4, 8})); - ASSERT_OK_AND_ASSIGN(auto expected_list_offsets, ArrayFromJSON(int32(), "[0, 2, 5]")); + ASSERT_OK_AND_ASSIGN(auto expected_list_offsets, + ArrayFromJSONString(int32(), "[0, 2, 5]")); ASSERT_OK_AND_ASSIGN(auto expected, ListArray::FromArrays(*expected_list_offsets, *expected_values)); @@ -1271,22 +1285,24 @@ TEST(TestSparseUnion, ListOfUnion) { auto field_b = field("b", boolean()); auto union_type = sparse_union({field_a, field_b}, {4, 8}); auto list_type = list(union_type); - ASSERT_OK_AND_ASSIGN(auto array, ArrayFromJSON(list_type, - "[" - "[[4, 122], [8, true]]," - "[[4, null], null, [8, false]]" - "]")); - - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]")); + ASSERT_OK_AND_ASSIGN(auto array, ArrayFromJSONString(list_type, + "[" + "[[4, 122], [8, true]]," + "[[4, null], null, [8, false]]" + "]")); + + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[4, 8, 4, 4, 8]")); ASSERT_OK_AND_ASSIGN(auto expected_a, - ArrayFromJSON(int8(), "[122, null, null, null, null]")); + ArrayFromJSONString(int8(), "[122, null, null, null, null]")); ASSERT_OK_AND_ASSIGN(auto expected_b, - ArrayFromJSON(boolean(), "[null, true, null, null, false]")); + ArrayFromJSONString(boolean(), "[null, true, null, null, false]")); ASSERT_OK_AND_ASSIGN(auto expected_values, SparseUnionArray::Make(*expected_types, {expected_a, expected_b}, {"a", "b"}, {4, 8})); - ASSERT_OK_AND_ASSIGN(auto expected_list_offsets, ArrayFromJSON(int32(), "[0, 2, 5]")); + ASSERT_OK_AND_ASSIGN(auto expected_list_offsets, + ArrayFromJSONString(int32(), "[0, 2, 5]")); ASSERT_OK_AND_ASSIGN(auto expected, ListArray::FromArrays(*expected_list_offsets, *expected_values)); @@ -1300,8 +1316,9 @@ TEST(TestDenseUnion, UnionOfStructs) { field("foxtrot", list(int8()))})), field("q", struct_({field("quebec", utf8())}))}; auto type = dense_union(fields, {0, 23, 47}); - ASSERT_OK_AND_ASSIGN(auto array_parsed, - ArrayFromJSON(type, R"([[0, {"alpha": 0.0, "bravo": "charlie"}], + ASSERT_OK_AND_ASSIGN( + auto array_parsed, + ArrayFromJSONString(type, R"([[0, {"alpha": 0.0, "bravo": "charlie"}], [23, {"whiskey": 99}], [0, {"bravo": "mike"}], null, @@ -1309,18 +1326,21 @@ TEST(TestDenseUnion, UnionOfStructs) { ])")); auto array = checked_pointer_cast(array_parsed); - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[0, 23, 0, 0, 23]")); - ASSERT_OK_AND_ASSIGN(auto expected_offsets, ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_0, ArrayFromJSON(fields[0]->type(), R"([ + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[0, 23, 0, 0, 23]")); + ASSERT_OK_AND_ASSIGN(auto expected_offsets, + ArrayFromJSONString(int32(), "[0, 0, 1, 2, 1]")); + ASSERT_OK_AND_ASSIGN(auto expected_fields_0, ArrayFromJSONString(fields[0]->type(), R"([ {"alpha": 0.0, "bravo": "charlie"}, {"bravo": "mike"}, null ])")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_1, ArrayFromJSON(fields[1]->type(), R"([ + ASSERT_OK_AND_ASSIGN(auto expected_fields_1, ArrayFromJSONString(fields[1]->type(), R"([ {"whiskey": 99}, {"tango": 8.25, "foxtrot": [0, 2, 3]} ])")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_2, ArrayFromJSON(fields[2]->type(), "[]")); + ASSERT_OK_AND_ASSIGN(auto expected_fields_2, + ArrayFromJSONString(fields[2]->type(), "[]")); ArrayVector expected_fields = {expected_fields_0, expected_fields_1, expected_fields_2}; ASSERT_OK_AND_ASSIGN( @@ -1345,7 +1365,7 @@ TEST(TestSparseUnion, UnionOfStructs) { field("foxtrot", list(int8()))})), field("q", struct_({field("quebec", utf8())}))}; auto type = sparse_union(fields, {0, 23, 47}); - ASSERT_OK_AND_ASSIGN(auto array, ArrayFromJSON(type, R"([ + ASSERT_OK_AND_ASSIGN(auto array, ArrayFromJSONString(type, R"([ [0, {"alpha": 0.0, "bravo": "charlie"}], [23, {"whiskey": 99}], [0, {"bravo": "mike"}], @@ -1353,23 +1373,25 @@ TEST(TestSparseUnion, UnionOfStructs) { [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}] ])")); - ASSERT_OK_AND_ASSIGN(auto expected_types, ArrayFromJSON(int8(), "[0, 23, 0, 0, 23]")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_0, ArrayFromJSON(fields[0]->type(), R"([ + ASSERT_OK_AND_ASSIGN(auto expected_types, + ArrayFromJSONString(int8(), "[0, 23, 0, 0, 23]")); + ASSERT_OK_AND_ASSIGN(auto expected_fields_0, ArrayFromJSONString(fields[0]->type(), R"([ {"alpha": 0.0, "bravo": "charlie"}, null, {"bravo": "mike"}, null, null ])")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_1, ArrayFromJSON(fields[1]->type(), R"([ + ASSERT_OK_AND_ASSIGN(auto expected_fields_1, ArrayFromJSONString(fields[1]->type(), R"([ null, {"whiskey": 99}, null, null, {"tango": 8.25, "foxtrot": [0, 2, 3]} ])")); - ASSERT_OK_AND_ASSIGN(auto expected_fields_2, - ArrayFromJSON(fields[2]->type(), "[null, null, null, null, null]")) + ASSERT_OK_AND_ASSIGN( + auto expected_fields_2, + ArrayFromJSONString(fields[2]->type(), "[null, null, null, null, null]")) ArrayVector expected_fields = {expected_fields_0, expected_fields_1, expected_fields_2}; ASSERT_OK_AND_ASSIGN(auto expected, @@ -1385,13 +1407,15 @@ TEST(TestDenseUnion, Errors) { std::shared_ptr type = dense_union({field_a, field_b}, {4, 8}); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 99]]")); // 0 is not one of {4, 8} - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]")); // "" is not a valid int8() + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"not a valid type_id\"]")); + ASSERT_RAISES(Invalid, + ArrayFromJSONString(type, "[[0, 99]]")); // 0 is not one of {4, 8} + ASSERT_RAISES(Invalid, + ArrayFromJSONString(type, "[[4, \"\"]]")); // "" is not a valid int8() - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"not a pair\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[8, true, 1]]")); } TEST(TestSparseUnion, Errors) { @@ -1400,13 +1424,13 @@ TEST(TestSparseUnion, Errors) { std::shared_ptr type = sparse_union({field_a, field_b}, {4, 8}); std::shared_ptr array; - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a valid type_id\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0, 99]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[4, \"\"]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"not a valid type_id\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0, 99]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[4, \"\"]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[\"not a pair\"]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[0]]")); - ASSERT_RAISES(Invalid, ArrayFromJSON(type, "[[8, true, 1]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[\"not a pair\"]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[0]]")); + ASSERT_RAISES(Invalid, ArrayFromJSONString(type, "[[8, true, 1]]")); } TEST(TestNestedDictionary, ListOfDict) { @@ -1418,14 +1442,14 @@ TEST(TestNestedDictionary, ListOfDict) { std::shared_ptr array, expected, indices, values, dicts, offsets; ASSERT_OK_AND_ASSIGN( - array, ArrayFromJSON(type, R"([["ab", "cd", null], null, ["cd", "cd"]])")); + array, ArrayFromJSONString(type, R"([["ab", "cd", null], null, ["cd", "cd"]])")); ASSERT_OK(array->ValidateFull()); // Build expected array - ASSERT_OK_AND_ASSIGN(indices, ArrayFromJSON(index_type, "[0, 1, null, 1, 1]")); - ASSERT_OK_AND_ASSIGN(values, ArrayFromJSON(value_type, R"(["ab", "cd"])")); + ASSERT_OK_AND_ASSIGN(indices, ArrayFromJSONString(index_type, "[0, 1, null, 1, 1]")); + ASSERT_OK_AND_ASSIGN(values, ArrayFromJSONString(value_type, R"(["ab", "cd"])")); ASSERT_OK_AND_ASSIGN(dicts, DictionaryArray::FromArrays(dict_type, indices, values)); - ASSERT_OK_AND_ASSIGN(offsets, ArrayFromJSON(int32(), "[0, null, 3, 5]")); + ASSERT_OK_AND_ASSIGN(offsets, ArrayFromJSONString(int32(), "[0, null, 3, 5]")); ASSERT_OK_AND_ASSIGN(expected, ListArray::FromArrays(*offsets, *dicts)); AssertArraysEqual(*expected, *array, /*verbose=*/true); @@ -1436,9 +1460,10 @@ TEST(TestDictArrayFromJSON, Basics) { auto array = DictArrayFromJSON(type, "[null, 2, 1, 0]", R"(["whiskey", "tango", "foxtrot"])"); - ASSERT_OK_AND_ASSIGN(auto expected_indices, ArrayFromJSON(int32(), "[null, 2, 1, 0]")); + ASSERT_OK_AND_ASSIGN(auto expected_indices, + ArrayFromJSONString(int32(), "[null, 2, 1, 0]")); ASSERT_OK_AND_ASSIGN(auto expected_dictionary, - ArrayFromJSON(utf8(), R"(["whiskey", "tango", "foxtrot"])")); + ArrayFromJSONString(utf8(), R"(["whiskey", "tango", "foxtrot"])")); ASSERT_ARRAYS_EQUAL(DictionaryArray(type, expected_indices, expected_dictionary), *array); @@ -1448,27 +1473,27 @@ TEST(TestDictArrayFromJSON, Errors) { auto type = dictionary(int32(), utf8()); std::shared_ptr array; - ASSERT_RAISES(Invalid, - DictArrayFromJSON(type, "[\"not a valid index\"]", "[\"\"]", &array)); - ASSERT_RAISES(Invalid, DictArrayFromJSON(type, "[0, 1]", "[1]", - &array)); // dict value isn't string + ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[\"not a valid index\"]", + "[\"\"]", &array)); + ASSERT_RAISES(Invalid, DictArrayFromJSONString(type, "[0, 1]", "[1]", + &array)); // dict value isn't string } TEST(TestChunkedArrayFromJSON, Basics) { auto type = int32(); std::shared_ptr chunked_array; - ASSERT_OK(ChunkedArrayFromJSON(type, {}, &chunked_array)); + ASSERT_OK(ChunkedArrayFromJSONString(type, {}, &chunked_array)); ASSERT_OK(chunked_array->ValidateFull()); ASSERT_EQ(chunked_array->num_chunks(), 0); AssertTypeEqual(type, chunked_array->type()); - ASSERT_OK(ChunkedArrayFromJSON(type, {"[1, 2]", "[3, null, 4]"}, &chunked_array)); + ASSERT_OK(ChunkedArrayFromJSONString(type, {"[1, 2]", "[3, null, 4]"}, &chunked_array)); ASSERT_OK(chunked_array->ValidateFull()); ASSERT_EQ(chunked_array->num_chunks(), 2); std::shared_ptr expected_chunk; - ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSON(type, "[1, 2]")); + ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[1, 2]")); AssertArraysEqual(*expected_chunk, *chunked_array->chunk(0), /*verbose=*/true); - ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSON(type, "[3, null, 4]")); + ASSERT_OK_AND_ASSIGN(expected_chunk, ArrayFromJSONString(type, "[3, null, 4]")); AssertArraysEqual(*expected_chunk, *chunked_array->chunk(1), /*verbose=*/true); } @@ -1491,29 +1516,31 @@ TEST(TestScalarFromJSON, Basics) { AssertJSONScalar(boolean(), "1", true, true); AssertJSONScalar(float64(), "1.0", true, 1.0); AssertJSONScalar(float64(), "-0.0", true, -0.0); - ASSERT_OK(ScalarFromJSON(float64(), "NaN", &scalar)); + ASSERT_OK(ScalarFromJSONString(float64(), "NaN", &scalar)); ASSERT_TRUE(std::isnan(checked_cast(*scalar).value)); - ASSERT_OK(ScalarFromJSON(float64(), "Inf", &scalar)); + ASSERT_OK(ScalarFromJSONString(float64(), "Inf", &scalar)); ASSERT_TRUE(std::isinf(checked_cast(*scalar).value)); } TEST(TestScalarFromJSON, Errors) { std::shared_ptr scalar; - ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[0]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[9223372036854775808]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(int64(), "[-9223372036854775809]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[18446744073709551616]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(uint64(), "[-1]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "0", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(binary(), "[]", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "0.0", &scalar)); - ASSERT_RAISES(Invalid, ScalarFromJSON(boolean(), "\"true\"", &scalar)); -} - -TEST(TestDictScalarFromJSON, Basics) { + ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[0]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(int64(), "[9223372036854775808]", &scalar)); + ASSERT_RAISES(Invalid, + ScalarFromJSONString(int64(), "[-9223372036854775809]", &scalar)); + ASSERT_RAISES(Invalid, + ScalarFromJSONString(uint64(), "[18446744073709551616]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(uint64(), "[-1]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "0", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(binary(), "[]", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "0.0", &scalar)); + ASSERT_RAISES(Invalid, ScalarFromJSONString(boolean(), "\"true\"", &scalar)); +} + +TEST(TestDictScalarFromJSONString, Basics) { auto type = dictionary(int32(), utf8()); auto dict = R"(["whiskey", "tango", "foxtrot"])"; - ASSERT_OK_AND_ASSIGN(auto expected_dictionary, ArrayFromJSON(utf8(), dict)); + ASSERT_OK_AND_ASSIGN(auto expected_dictionary, ArrayFromJSONString(utf8(), dict)); for (auto index : {"null", "2", "1", "0"}) { auto scalar = DictScalarFromJSON(type, index, dict); @@ -1524,15 +1551,15 @@ TEST(TestDictScalarFromJSON, Basics) { } } -TEST(TestDictScalarFromJSON, Errors) { +TEST(TestDictScalarFromJSONString, Errors) { auto type = dictionary(int32(), utf8()); std::shared_ptr scalar; - ASSERT_RAISES(Invalid, - DictScalarFromJSON(type, "\"not a valid index\"", "[\"\"]", &scalar)); - ASSERT_RAISES(Invalid, DictScalarFromJSON(type, "0", "[1]", - &scalar)); // dict value isn't string + ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "\"not a valid index\"", "[\"\"]", + &scalar)); + ASSERT_RAISES(Invalid, DictScalarFromJSONString(type, "0", "[1]", + &scalar)); // dict value isn't string } -} // namespace util +} // namespace json } // namespace arrow diff --git a/cpp/src/arrow/meson.build b/cpp/src/arrow/meson.build index 285cb6dba0c..ac5099d3906 100644 --- a/cpp/src/arrow/meson.build +++ b/cpp/src/arrow/meson.build @@ -358,6 +358,7 @@ if needs_json 'json/chunked_builder.cc', 'json/chunker.cc', 'json/converter.cc', + 'json/from_string.cc', 'json/object_parser.cc', 'json/object_writer.cc', 'json/parser.cc', @@ -366,9 +367,6 @@ if needs_json 'dependencies': [rapidjson_dep], }, } - - arrow_util_srcs += 'util/from_json.cc' - arrow_util_deps += rapidjson_dep endif arrow_srcs = [] diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index a49e7ed74a2..9ba59385960 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -53,6 +53,7 @@ #include "arrow/io/memory.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" +#include "arrow/json/from_string.h" #include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep #include "arrow/pretty_print.h" #include "arrow/record_batch.h" @@ -62,7 +63,6 @@ #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/config.h" -#include "arrow/util/from_json.h" #include "arrow/util/future.h" #include "arrow/util/io_util.h" #include "arrow/util/logging_internal.h" @@ -381,7 +381,7 @@ void AssertDatumsApproxEqual(const Datum& expected, const Datum& actual, bool ve std::shared_ptr ArrayFromJSON(const std::shared_ptr& type, std::string_view json) { - EXPECT_OK_AND_ASSIGN(auto out, util::ArrayFromJSON(type, json)); + EXPECT_OK_AND_ASSIGN(auto out, json::ArrayFromJSONString(type, json)); return out; } @@ -389,14 +389,14 @@ std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, std::string_view indices_json, std::string_view dictionary_json) { std::shared_ptr out; - ABORT_NOT_OK(util::DictArrayFromJSON(type, indices_json, dictionary_json, &out)); + ABORT_NOT_OK(json::DictArrayFromJSONString(type, indices_json, dictionary_json, &out)); return out; } std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json) { std::shared_ptr out; - ABORT_NOT_OK(util::ChunkedArrayFromJSON(type, json, &out)); + ABORT_NOT_OK(json::ChunkedArrayFromJSONString(type, json, &out)); return out; } @@ -413,7 +413,7 @@ std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& std::shared_ptr ScalarFromJSON(const std::shared_ptr& type, std::string_view json) { std::shared_ptr out; - ABORT_NOT_OK(util::ScalarFromJSON(type, json, &out)); + ABORT_NOT_OK(json::ScalarFromJSONString(type, json, &out)); return out; } @@ -421,7 +421,7 @@ std::shared_ptr DictScalarFromJSON(const std::shared_ptr& type std::string_view index_json, std::string_view dictionary_json) { std::shared_ptr out; - ABORT_NOT_OK(util::DictScalarFromJSON(type, index_json, dictionary_json, &out)); + ABORT_NOT_OK(json::DictScalarFromJSONString(type, index_json, dictionary_json, &out)); return out; } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index fe683ed7c56..17eea5532cc 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -114,10 +114,6 @@ add_arrow_test(crc32-test EXTRA_LINK_LIBS Boost::headers) -if(ARROW_JSON) - add_arrow_test(from_json_test SOURCES from_json_test.cc) -endif() - add_arrow_benchmark(bit_block_counter_benchmark) add_arrow_benchmark(bit_util_benchmark) add_arrow_benchmark(bitmap_reader_benchmark) diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc index f6753b20da5..c88358a71b0 100644 --- a/python/pyarrow/src/arrow/python/gdb.cc +++ b/python/pyarrow/src/arrow/python/gdb.cc @@ -23,6 +23,7 @@ #include "arrow/chunked_array.h" #include "arrow/datum.h" #include "arrow/extension/uuid.h" +#include "arrow/json/from_string.h" #include "arrow/python/gdb.h" #include "arrow/record_batch.h" #include "arrow/scalar.h" @@ -30,7 +31,6 @@ #include "arrow/type.h" #include "arrow/util/debug.h" #include "arrow/util/decimal.h" -#include "arrow/util/from_json.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index 8d903891382..47fb26e0c69 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -24,7 +24,7 @@ cdef extern from * namespace "arrow::py" nogil: """ #include "arrow/status.h" #include "arrow/extension_type.h" - #include "arrow/util/from_json.h" + #include "arrow/json/from_string.h" namespace arrow { namespace py { From b1f984c3349ee0cc54a6432715dc577f2b14c378 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:11:37 -0700 Subject: [PATCH 19/33] Update generate_fuzz_corpus.cc --- cpp/src/arrow/ipc/generate_fuzz_corpus.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc index 24291ee10af..123b6981b28 100644 --- a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc +++ b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc @@ -41,7 +41,7 @@ namespace arrow::ipc { using ::arrow::internal::CreateDir; using ::arrow::internal::PlatformFilename; -using ::arrow::util::ArrayFromJSON; +using ::arrow::json::ArrayFromJSONString; Result> MakeExtensionBatch() { auto array = ExampleUuid(); @@ -60,7 +60,7 @@ Result> MakeMapBatch() { [] ] )"; - ARROW_ASSIGN_OR_RAISE(array, ArrayFromJSON(map(int16(), int32()), json_input)); + ARROW_ASSIGN_OR_RAISE(array, ArrayFromJSONString(map(int16(), int32()), json_input)); auto schema = ::arrow::schema({field("f0", array->type())}); return RecordBatch::Make(schema, array->length(), {array}); } From 24e757b26c87c64c4ef8b4e18fcfdfb1cc639bc8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:35:26 -0700 Subject: [PATCH 20/33] Move anonymous part of from_string.cc into an internal name namespace Before this change, Converter was ambiguous in unity builds. I opted for this instead of renaming. --- cpp/src/arrow/json/from_string.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/json/from_string.cc b/cpp/src/arrow/json/from_string.cc index 23c25c9bad1..006a7299feb 100644 --- a/cpp/src/arrow/json/from_string.cc +++ b/cpp/src/arrow/json/from_string.cc @@ -60,7 +60,7 @@ namespace json { using ::arrow::internal::checked_cast; using ::arrow::internal::checked_pointer_cast; -namespace { +namespace internal { constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; @@ -972,15 +972,15 @@ Status GetConverter(const std::shared_ptr& type, return Status::OK(); } -} // namespace +} // namespace internal Result> ArrayFromJSONString(const std::shared_ptr& type, std::string_view json_string) { - std::shared_ptr converter; + std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); rj::Document json_doc; - json_doc.Parse(json_string.data(), json_string.length()); + json_doc.Parse(json_string.data(), json_string.length()); if (json_doc.HasParseError()) { return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", GetParseError_En(json_doc.GetParseError())); @@ -1037,11 +1037,11 @@ Status DictArrayFromJSONString(const std::shared_ptr& type, Status ScalarFromJSONString(const std::shared_ptr& type, std::string_view json_string, std::shared_ptr* out) { - std::shared_ptr converter; + std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); rj::Document json_doc; - json_doc.Parse(json_string.data(), json_string.length()); + json_doc.Parse(json_string.data(), json_string.length()); if (json_doc.HasParseError()) { return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", GetParseError_En(json_doc.GetParseError())); From 38a944a7bdbf37ac52a8b06e963288cb13832b6d Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:36:14 -0700 Subject: [PATCH 21/33] Remove from_string from ipc/api.h CI caught this, I think I should've caught it earlier. --- cpp/src/arrow/ipc/api.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/ipc/api.h b/cpp/src/arrow/ipc/api.h index 84784b8a135..3047180fb1a 100644 --- a/cpp/src/arrow/ipc/api.h +++ b/cpp/src/arrow/ipc/api.h @@ -22,4 +22,3 @@ #include "arrow/ipc/message.h" #include "arrow/ipc/reader.h" #include "arrow/ipc/writer.h" -#include "arrow/json/from_string.h" From a585a1317c79b26a629203842cb6e0ab98f21f97 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:40:23 -0700 Subject: [PATCH 22/33] Update gdb.cc --- python/pyarrow/src/arrow/python/gdb.cc | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc index c88358a71b0..38383b86f49 100644 --- a/python/pyarrow/src/arrow/python/gdb.cc +++ b/python/pyarrow/src/arrow/python/gdb.cc @@ -39,9 +39,9 @@ namespace arrow { using extension::uuid; using extension::UuidType; -using util::ArrayFromJSON; -using util::ChunkedArrayFromJSON; -using util::ScalarFromJSON; +using json::ArrayFromJSONString; +using json::ChunkedArrayFromJSONString; +using json::ScalarFromJSONString; namespace gdb { @@ -61,7 +61,7 @@ class CustomStatusDetail : public StatusDetail { std::shared_ptr SliceArrayFromJSON(const std::shared_ptr& ty, std::string_view json, int64_t offset = 0, int64_t length = -1) { - auto array = *ArrayFromJSON(ty, json); + auto array = *ArrayFromJSONString(ty, json); if (length != -1) { return array->Slice(offset, length); } else { @@ -320,13 +320,13 @@ void TestSession() { Buffer::FromString(" "), fixed_size_binary(3), /*is_valid=*/false}; std::shared_ptr dict_array; - dict_array = *ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])"); + dict_array = *ArrayFromJSONString(utf8(), R"(["foo", "bar", "quux"])"); DictionaryScalar dict_scalar{{std::make_shared(42), dict_array}, dictionary(int8(), utf8())}; DictionaryScalar dict_scalar_null{dictionary(int8(), utf8())}; - std::shared_ptr list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])"); - std::shared_ptr list_zero_length = *ArrayFromJSON(int32(), R"([])"); + std::shared_ptr list_value_array = *ArrayFromJSONString(int32(), R"([4, 5, 6])"); + std::shared_ptr list_zero_length = *ArrayFromJSONString(int32(), R"([])"); ListScalar list_scalar{list_value_array}; ListScalar list_scalar_null{list_zero_length, list(int32()), /*is_valid=*/false}; LargeListScalar large_list_scalar{list_value_array}; @@ -364,8 +364,8 @@ void TestSession() { /*is_valid=*/false}; std::shared_ptr heap_map_scalar; - ARROW_CHECK_OK( - ScalarFromJSON(map(utf8(), int32()), R"([["a", 5], ["b", 6]])", &heap_map_scalar)); + ARROW_CHECK_OK(ScalarFromJSONString(map(utf8(), int32()), R"([["a", 5], ["b", 6]])", + &heap_map_scalar)); auto heap_map_scalar_null = MakeNullScalar(heap_map_scalar->type); // Array and ArrayData @@ -465,15 +465,15 @@ void TestSession() { // ChunkedArray ArrayVector array_chunks(2); - array_chunks[0] = *ArrayFromJSON(int32(), "[1, 2]"); - array_chunks[1] = *ArrayFromJSON(int32(), "[3, null, 4]"); + array_chunks[0] = *ArrayFromJSONString(int32(), "[1, 2]"); + array_chunks[1] = *ArrayFromJSONString(int32(), "[3, null, 4]"); ChunkedArray chunked_array{array_chunks}; // RecordBatch auto batch_schema = schema({field("ints", int32()), field("strs", utf8())}); ArrayVector batch_columns{2}; - batch_columns[0] = *ArrayFromJSON(int32(), "[1, 2, 3]"); - batch_columns[1] = *ArrayFromJSON(utf8(), R"(["abc", null, "def"])"); + batch_columns[0] = *ArrayFromJSONString(int32(), "[1, 2, 3]"); + batch_columns[1] = *ArrayFromJSONString(utf8(), R"(["abc", null, "def"])"); auto batch = RecordBatch::Make(batch_schema, /*num_rows=*/3, batch_columns); auto batch_with_metadata = batch->ReplaceSchemaMetadata( key_value_metadata({"key1", "key2", "key3"}, {"value1", "value2", "value3"})); @@ -481,8 +481,8 @@ void TestSession() { // Table ChunkedArrayVector table_columns{2}; ARROW_CHECK_OK( - ChunkedArrayFromJSON(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0])); - ARROW_CHECK_OK(ChunkedArrayFromJSON( + ChunkedArrayFromJSONString(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0])); + ARROW_CHECK_OK(ChunkedArrayFromJSONString( utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"}, &table_columns[1])); auto table = Table::Make(batch_schema, table_columns); From 22555c87c017157c36b51731ea56f329963931f2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:50:32 -0700 Subject: [PATCH 23/33] Update extensions.pyx --- python/pyarrow/tests/extensions.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index 47fb26e0c69..edcb337c2af 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -64,7 +64,7 @@ cdef extern from * namespace "arrow::py" nogil: std::shared_ptr MakeUuidArray() { auto uuid_type = MakeUuidType(); auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; - auto result = util::ArrayFromJSON(fixed_size_binary(16), json); + auto result = json::ArrayFromJSON(fixed_size_binary(16), json); return ExtensionType::WrapArray(uuid_type, result.ValueOrDie()); } From a965af5645995caae690e5f117df0446c902f5fa Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 13:54:33 -0700 Subject: [PATCH 24/33] Sync docs changes with code changes --- cpp/src/arrow/json/from_string.h | 3 ++- docs/source/cpp/api/array.rst | 8 +++---- docs/source/cpp/arrays.rst | 38 +++++++++++++++----------------- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/json/from_string.h b/cpp/src/arrow/json/from_string.h index 5493b393148..f9c2da44733 100644 --- a/cpp/src/arrow/json/from_string.h +++ b/cpp/src/arrow/json/from_string.h @@ -34,7 +34,8 @@ class DataType; namespace json { -/// \defgroup array-from-json Helpers for constructing Arrays from JSON text +/// \defgroup array-from-json-string Helpers for constructing Arrays from JSON +/// text /// /// These helpers are intended to be used in examples, tests, or for quick /// prototyping and are not intended to be used where performance matters. diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst index a49f6ecc293..b2163450b53 100644 --- a/docs/source/cpp/api/array.rst +++ b/docs/source/cpp/api/array.rst @@ -111,11 +111,11 @@ Utilities :members: :undoc-members: -.. _api-array-from-json: +.. _api-array-from-json-string: -FromJSON Helpers ----------------- +FromJSONString Helpers +---------------------- -.. doxygengroup:: array-from-json +.. doxygengroup:: array-from-json-string :content-only: :members: diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index 4cd682d1f0a..38222f84fe0 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -227,10 +227,8 @@ to some logical subsequence of the data. This is done by calling the :func:`arrow::Array::Slice` and :func:`arrow::ChunkedArray::Slice` methods, respectively. -.. _fromjson_helpers: - -FromJSON Helpers -================ +FromJSONString Helpers +====================== A set of helper functions is provided for concisely creating Arrays and Scalars from JSON_ text. These helpers are intended to be used in examples, tests, or @@ -241,42 +239,42 @@ objects from line-separated JSON files. .. _JSON: https://datatracker.ietf.org/doc/html/rfc8259 -Examples for ``ArrayFromJSON``, ``ChunkedArrayFromJSON``, ``DictArrayFromJSON`` -are shown below:: +Examples for ``ArrayFromJSONString``, ``ChunkedArrayFromJSONString``, +``DictArrayFromJSONString`` are shown below:: // Simple types - auto int32_array = ArrayFromJSON(int32(), "[1, 2, 3]"); - auto float64_array = ArrayFromJSON(float64(), "[4.0, 5.0, 6.0]") - auto bool_array = ArrayFromJSON(boolean(), "[true, false, true]"); - auto string_array = ArrayFromJSON(utf8(), R"(["Hello", "World", null])"); + auto int32_array = ArrayFromJSONString(int32(), "[1, 2, 3]"); + auto float64_array = ArrayFromJSONString(float64(), "[4.0, 5.0, 6.0]") + auto bool_array = ArrayFromJSONString(boolean(), "[true, false, true]"); + auto string_array = ArrayFromJSONString(utf8(), R"(["Hello", "World", null])"); // Timestamps can be created from string representations - auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND), + auto arr = ArrayFromJSONString(timestamp(TimeUnit::SECOND), R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])"); // List, Map, Struct - auto list_array = ArrayFromJSON( + auto list_array = ArrayFromJSONString( list(int64()), "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]" ); - auto map_array = ArrayFromJSON( + auto map_array = ArrayFromJSONString( map(utf8(), int32()), R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])" ); - auto struct_array = ArrayFromJSON( + auto struct_array = ArrayFromJSONString( struct_({field("one", int32()), field("two", int32())}), "[[11, 22], null, [null, 33]]" ); - // ChunkedArrayFromJSON - ChunkedArrayFromJSON(int32(), {R"([5, 10])", R"([null])", R"([16])"}); + // ChunkedArrayFromJSONString + ChunkedArrayFromJSONString(int32(), {R"([5, 10])", R"([null])", R"([16])"}); - // DictArrayFromJSON - auto key_array = DictArrayFromJSON( + // DictArrayFromJSONString + auto key_array = DictArrayFromJSONString( dictionary(int32(), utf8()), "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])" ); -Please see the :ref:`FromJSON API listing ` for the -complete set of helpers. +Please see the :ref:`FromJSONString API listing ` for +the complete set of helpers. From 21d485b43a4f7dd57adf85b56a4bac3b34ae01d4 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 2 May 2025 14:16:20 -0700 Subject: [PATCH 25/33] whoops --- python/pyarrow/tests/extensions.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx index edcb337c2af..b958abae4db 100644 --- a/python/pyarrow/tests/extensions.pyx +++ b/python/pyarrow/tests/extensions.pyx @@ -64,7 +64,7 @@ cdef extern from * namespace "arrow::py" nogil: std::shared_ptr MakeUuidArray() { auto uuid_type = MakeUuidType(); auto json = "[\\"abcdefghijklmno0\\", \\"0onmlkjihgfedcba\\"]"; - auto result = json::ArrayFromJSON(fixed_size_binary(16), json); + auto result = json::ArrayFromJSONString(fixed_size_binary(16), json); return ExtensionType::WrapArray(uuid_type, result.ValueOrDie()); } From fb029527fd79b65aeb00567053da645d6441ad7f Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 5 May 2025 07:54:34 -0700 Subject: [PATCH 26/33] run clang-format on c++ example code --- docs/source/cpp/arrays.rst | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index 38222f84fe0..82927a5e620 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -244,37 +244,30 @@ Examples for ``ArrayFromJSONString``, ``ChunkedArrayFromJSONString``, // Simple types auto int32_array = ArrayFromJSONString(int32(), "[1, 2, 3]"); - auto float64_array = ArrayFromJSONString(float64(), "[4.0, 5.0, 6.0]") + auto float64_array = ArrayFromJSONString(float64(), "[4.0, 5.0, 6.0]"); auto bool_array = ArrayFromJSONString(boolean(), "[true, false, true]"); auto string_array = ArrayFromJSONString(utf8(), R"(["Hello", "World", null])"); // Timestamps can be created from string representations - auto arr = ArrayFromJSONString(timestamp(TimeUnit::SECOND), - R"(["1970-01-01","2000-02-29","3989-07-14","1900-02-28"])"); + auto arr = + ArrayFromJSONString(timestamp(TimeUnit::SECOND), + R"(["1970-01-01", "2000-02-29","3989-07-14","1900-02-28"])"); // List, Map, Struct - auto list_array = ArrayFromJSONString( - list(int64()), - "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]" - ); + auto list_array = + ArrayFromJSONString(list(int64()), "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]"); auto map_array = ArrayFromJSONString( - map(utf8(), int32()), - R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])" - ); - auto struct_array = ArrayFromJSONString( - struct_({field("one", int32()), field("two", int32())}), - "[[11, 22], null, [null, 33]]" - ); - - // ChunkedArrayFromJSONString + map(utf8(), int32()), R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])"); + auto struct_array = + ArrayFromJSONString(struct_({field("one", int32()), field("two", int32())}), + "[[11, 22], null, [null, 33]]"); + + // ChunkedArrayFromJSONString ChunkedArrayFromJSONString(int32(), {R"([5, 10])", R"([null])", R"([16])"}); // DictArrayFromJSONString auto key_array = DictArrayFromJSONString( - dictionary(int32(), utf8()), - "[0, 1, 0, 2, 0, 3]", - R"(["k1", "k2", "k3", "k4"])" - ); + dictionary(int32(), utf8()), "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])"); Please see the :ref:`FromJSONString API listing ` for the complete set of helpers. From be2cbc118b001c9febbed337eb1db074d5c228a0 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 5 May 2025 10:41:22 -0700 Subject: [PATCH 27/33] Update docs/source/cpp/arrays.rst Co-authored-by: Benjamin Kietzman --- docs/source/cpp/arrays.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index 82927a5e620..d3c118d9fe2 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -58,7 +58,7 @@ example a ``std::vector``. Instead, several strategies can be used: deal with details of the Arrow format yourself. .. note:: For cases where performance isn't important such as examples or tests, - you may prefer to use the ``*FromJSON`` helpers which can create + you may prefer to use the ``*FromJSONString`` helpers which can create Arrays using a JSON text shorthand. See :ref:`fromjson_helpers`. Using ArrayBuilder and its subclasses From 4a9bc3e4235267e5864532ffc385be3617236001 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 7 May 2025 13:41:57 -0700 Subject: [PATCH 28/33] Update docs/source/cpp/api/array.rst Co-authored-by: Benjamin Kietzman --- docs/source/cpp/api/array.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst index b2163450b53..43601fc2fff 100644 --- a/docs/source/cpp/api/array.rst +++ b/docs/source/cpp/api/array.rst @@ -117,5 +117,4 @@ FromJSONString Helpers ---------------------- .. doxygengroup:: array-from-json-string - :content-only: :members: From 279f5abfab9f085906a13924ab1d0264155b7730 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 7 May 2025 13:58:40 -0700 Subject: [PATCH 29/33] Update cpp/src/arrow/json/from_string.h Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/json/from_string.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/arrow/json/from_string.h b/cpp/src/arrow/json/from_string.h index f9c2da44733..da4a9dfdd76 100644 --- a/cpp/src/arrow/json/from_string.h +++ b/cpp/src/arrow/json/from_string.h @@ -43,6 +43,12 @@ namespace json { /// @{ /// \brief Create an Array from a JSON string +/// +/// \code {.cpp} +/// std::shared_ptr array = ArrayFromJSONString( +/// int64(), "[2, 3, 5, 7, 11]" +/// ).ValueOrDie(); +/// \endcode ARROW_EXPORT Result> ArrayFromJSONString(const std::shared_ptr&, const std::string& json); From 5cedcfec0e3f398ac5d1e4c33e986a5f7bec882a Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 7 May 2025 13:50:22 -0700 Subject: [PATCH 30/33] Fixes/improvements for docs --- cpp/examples/arrow/CMakeLists.txt | 1 + .../arrow/from_json_string_example.cc | 91 +++++++++++++++++++ cpp/src/arrow/json/from_string.h | 45 +++++++-- docs/source/cpp/arrays.rst | 35 ++----- 4 files changed, 135 insertions(+), 37 deletions(-) create mode 100644 cpp/examples/arrow/from_json_string_example.cc diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index ef4beaaca2c..bf0748f5501 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -19,6 +19,7 @@ add_arrow_example(row_wise_conversion_example) if(ARROW_WITH_RAPIDJSON) add_arrow_example(rapidjson_row_converter EXTRA_LINK_LIBS RapidJSON) + add_arrow_example(from_json_string_example EXTRA_LINK_LIBS RapidJSON) endif() if(ARROW_ACERO) diff --git a/cpp/examples/arrow/from_json_string_example.cc b/cpp/examples/arrow/from_json_string_example.cc new file mode 100644 index 00000000000..d0069af837b --- /dev/null +++ b/cpp/examples/arrow/from_json_string_example.cc @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// This example shows how to use some of the *FromJSONString helpers. + +#include +#include + +#include +#include +#include +#include + +using arrow::json::ArrayFromJSONString; +using arrow::json::ChunkedArrayFromJSONString; +using arrow::json::DictArrayFromJSONString; + +/** + * \brief Run Example + * + * ./debug/from-json-string-example + */ +arrow::Status RunExample() { + // Simple types + ARROW_ASSIGN_OR_RAISE(auto int32_array, + ArrayFromJSONString(arrow::int32(), "[1, 2, 3]")); + ARROW_ASSIGN_OR_RAISE(auto float64_array, + ArrayFromJSONString(arrow::float64(), "[4.0, 5.0, 6.0]")); + ARROW_ASSIGN_OR_RAISE(auto bool_array, + ArrayFromJSONString(arrow::boolean(), "[true, false, true]")); + ARROW_ASSIGN_OR_RAISE( + auto string_array, + ArrayFromJSONString(arrow::utf8(), R"(["Hello", "World", null])")); + + // Timestamps can be created from string representations + ARROW_ASSIGN_OR_RAISE( + auto ts_array, + ArrayFromJSONString(timestamp(arrow::TimeUnit::SECOND), + R"(["1970-01-01", "2000-02-29","3989-07-14","1900-02-28"])")); + + // List, Map, Struct + ARROW_ASSIGN_OR_RAISE( + auto list_array, + ArrayFromJSONString(list(arrow::int64()), + "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]")); + ARROW_ASSIGN_OR_RAISE( + auto map_array, + ArrayFromJSONString(map(arrow::utf8(), arrow::int32()), + R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])")); + ARROW_ASSIGN_OR_RAISE( + auto struct_array, + ArrayFromJSONString( + arrow::struct_({field("one", arrow::int32()), field("two", arrow::int32())}), + "[[11, 22], null, [null, 33]]")); + + // ChunkedArrayFromJSONString + std::shared_ptr chunked_array; + ARROW_RETURN_NOT_OK(ChunkedArrayFromJSONString( + arrow::int32(), {R"([5, 10])", R"([null])", R"([16])"}, &chunked_array)); + + // DictArrayFromJSONString + std::shared_ptr dict_array; + ARROW_RETURN_NOT_OK(DictArrayFromJSONString( + dictionary(arrow::int32(), arrow::utf8()), "[0, 1, 0, 2, 0, 3]", + R"(["k1", "k2", "k3", "k4"])", &dict_array)); + + return arrow::Status::OK(); +} + +int main(int argc, char** argv) { + auto status = RunExample(); + if (!status.ok()) { + std::cerr << status.ToString() << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/cpp/src/arrow/json/from_string.h b/cpp/src/arrow/json/from_string.h index da4a9dfdd76..03c6b1bcdf4 100644 --- a/cpp/src/arrow/json/from_string.h +++ b/cpp/src/arrow/json/from_string.h @@ -34,54 +34,81 @@ class DataType; namespace json { -/// \defgroup array-from-json-string Helpers for constructing Arrays from JSON -/// text +/// \defgroup array-from-json-string FromJSONString Helpers /// /// These helpers are intended to be used in examples, tests, or for quick /// prototyping and are not intended to be used where performance matters. /// +/// See the User Guide for +/// more information. +/// /// @{ /// \brief Create an Array from a JSON string /// /// \code {.cpp} /// std::shared_ptr array = ArrayFromJSONString( -/// int64(), "[2, 3, 5, 7, 11]" +/// int64(), "[2, 3, null, 7, 11]" /// ).ValueOrDie(); /// \endcode ARROW_EXPORT Result> ArrayFromJSONString(const std::shared_ptr&, const std::string& json); -/// \brief Create an Array from a JSON string +/// \copydoc ArrayFromJSONString(const std::shared_ptr&, const std::string&) ARROW_EXPORT Result> ArrayFromJSONString(const std::shared_ptr&, std::string_view json); -/// \brief Create an Array from a JSON string +/// \copydoc ArrayFromJSONString(const std::shared_ptr&, const std::string&) ARROW_EXPORT Result> ArrayFromJSONString(const std::shared_ptr&, const char* json); -/// \brief Create an ChunkedArray from a JSON string +/// \brief Create a ChunkedArray from a JSON string +/// +/// \code {.cpp} +/// std::shared_ptr chunked_array; +/// ChunkedArrayFromJSONString( +/// int64(), {R"([5, 10])", R"([null])", R"([16])"}, &chunked_array +/// ); +/// \endcode ARROW_EXPORT Status ChunkedArrayFromJSONString(const std::shared_ptr& type, const std::vector& json_strings, std::shared_ptr* out); -/// \brief Create an DictionaryArray from a JSON string +/// \brief Create a DictionaryArray from a JSON string +/// +/// \code {.cpp} +/// std::shared_ptr array; +/// DictArrayFromJSONString( +/// dictionary(int32(), utf8()), +/// "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])", +/// &array +/// ); +/// \endcode ARROW_EXPORT Status DictArrayFromJSONString(const std::shared_ptr&, std::string_view indices_json, std::string_view dictionary_json, std::shared_ptr* out); -/// \brief Create an Scalar from a JSON string +/// \brief Create a Scalar from a JSON string +/// \code {.cpp} +/// std::shared_ptr scalar; +/// ScalarFromJSONString(float64(), "42", &scalar); +/// \endcode ARROW_EXPORT Status ScalarFromJSONString(const std::shared_ptr&, std::string_view json, std::shared_ptr* out); -/// \brief Create an DictionaryScalar from a JSON string +/// \brief Create a DictionaryScalar from a JSON string +/// \code {.cpp} +/// std::shared_ptr scalar; +/// DictScalarFromJSONString(dictionary(int32(), utf8()), "3", R"(["k1", "k2", "k3", +/// "k4"])", &scalar); +/// \endcode ARROW_EXPORT Status DictScalarFromJSONString(const std::shared_ptr&, std::string_view index_json, diff --git a/docs/source/cpp/arrays.rst b/docs/source/cpp/arrays.rst index d3c118d9fe2..996472ec131 100644 --- a/docs/source/cpp/arrays.rst +++ b/docs/source/cpp/arrays.rst @@ -240,34 +240,13 @@ objects from line-separated JSON files. .. _JSON: https://datatracker.ietf.org/doc/html/rfc8259 Examples for ``ArrayFromJSONString``, ``ChunkedArrayFromJSONString``, -``DictArrayFromJSONString`` are shown below:: - - // Simple types - auto int32_array = ArrayFromJSONString(int32(), "[1, 2, 3]"); - auto float64_array = ArrayFromJSONString(float64(), "[4.0, 5.0, 6.0]"); - auto bool_array = ArrayFromJSONString(boolean(), "[true, false, true]"); - auto string_array = ArrayFromJSONString(utf8(), R"(["Hello", "World", null])"); - - // Timestamps can be created from string representations - auto arr = - ArrayFromJSONString(timestamp(TimeUnit::SECOND), - R"(["1970-01-01", "2000-02-29","3989-07-14","1900-02-28"])"); - - // List, Map, Struct - auto list_array = - ArrayFromJSONString(list(int64()), "[[null], [], null, [4, 5, 6, 7, 8], [2, 3]]"); - auto map_array = ArrayFromJSONString( - map(utf8(), int32()), R"([[["joe", 0], ["mark", null]], null, [["cap", 8]], []])"); - auto struct_array = - ArrayFromJSONString(struct_({field("one", int32()), field("two", int32())}), - "[[11, 22], null, [null, 33]]"); - - // ChunkedArrayFromJSONString - ChunkedArrayFromJSONString(int32(), {R"([5, 10])", R"([null])", R"([16])"}); - - // DictArrayFromJSONString - auto key_array = DictArrayFromJSONString( - dictionary(int32(), utf8()), "[0, 1, 0, 2, 0, 3]", R"(["k1", "k2", "k3", "k4"])"); +``DictArrayFromJSONString`` are shown below: + +.. literalinclude:: ../../../cpp/examples/arrow/from_json_string_example.cc + :language: cpp + :start-after: arrow::Status RunExample() { + :end-before: return arrow::Status::OK(); + :dedent: 2 Please see the :ref:`FromJSONString API listing ` for the complete set of helpers. From b5ed9b166f145263a8e4b596ff2fa97af365f159 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 8 May 2025 19:51:29 +0000 Subject: [PATCH 31/33] testing theory on ci failure --- python/pyarrow/tests/test_gdb.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index 2ac2f55754f..eb85a87e737 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -633,16 +633,18 @@ def test_scalars_stack(gdb_arrow): check_stack_repr( gdb_arrow, "string_scalar_empty", 'arrow::StringScalar of size 0, value ""') - check_stack_repr( - gdb_arrow, "string_scalar_hehe", - 'arrow::StringScalar of size 6, value "héhé"') + # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 + # check_stack_repr( + # gdb_arrow, "string_scalar_hehe", + # 'arrow::StringScalar of size 6, value "héhé"') # FIXME: excessive escaping ('\\xff' vs. '\x00') check_stack_repr( gdb_arrow, "string_scalar_invalid_chars", r'arrow::StringScalar of size 11, value "abc\x00def\\xffghi"') - check_stack_repr( - gdb_arrow, "large_string_scalar_hehe", - 'arrow::LargeStringScalar of size 6, value "héhé"') + # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 + # check_stack_repr( + # gdb_arrow, "large_string_scalar_hehe", + # 'arrow::LargeStringScalar of size 6, value "héhé"') check_stack_repr( gdb_arrow, "fixed_size_binary_scalar", @@ -982,14 +984,16 @@ def test_arrays_heap(gdb_arrow): gdb_arrow, "heap_large_binary_array", (r'arrow::LargeBinaryArray of length 3, offset 0, null count 1 = {' r'[0] = null, [1] = "abcd", [2] = "\000\037\377"}')) - check_heap_repr( - gdb_arrow, "heap_string_array", - (r'arrow::StringArray of length 3, offset 0, null count 1 = {' - r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) - check_heap_repr( - gdb_arrow, "heap_large_string_array", - (r'arrow::LargeStringArray of length 3, offset 0, null count 1 = {' - r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) + # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 + # check_heap_repr( + # gdb_arrow, "heap_string_array", + # (r'arrow::StringArray of length 3, offset 0, null count 1 = {' + # r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) + # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 + # check_heap_repr( + # gdb_arrow, "heap_large_string_array", + # (r'arrow::LargeStringArray of length 3, offset 0, null count 1 = {' + # r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) check_heap_repr( gdb_arrow, "heap_binary_array_sliced", (r'arrow::BinaryArray of length 1, offset 1, unknown null count = ' From 96ed729375574eaff8203575716b369059b60caa Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 8 May 2025 14:02:17 -0700 Subject: [PATCH 32/33] Revert "testing theory on ci failure" This reverts commit b5ed9b166f145263a8e4b596ff2fa97af365f159. --- python/pyarrow/tests/test_gdb.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index eb85a87e737..2ac2f55754f 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -633,18 +633,16 @@ def test_scalars_stack(gdb_arrow): check_stack_repr( gdb_arrow, "string_scalar_empty", 'arrow::StringScalar of size 0, value ""') - # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 - # check_stack_repr( - # gdb_arrow, "string_scalar_hehe", - # 'arrow::StringScalar of size 6, value "héhé"') + check_stack_repr( + gdb_arrow, "string_scalar_hehe", + 'arrow::StringScalar of size 6, value "héhé"') # FIXME: excessive escaping ('\\xff' vs. '\x00') check_stack_repr( gdb_arrow, "string_scalar_invalid_chars", r'arrow::StringScalar of size 11, value "abc\x00def\\xffghi"') - # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 - # check_stack_repr( - # gdb_arrow, "large_string_scalar_hehe", - # 'arrow::LargeStringScalar of size 6, value "héhé"') + check_stack_repr( + gdb_arrow, "large_string_scalar_hehe", + 'arrow::LargeStringScalar of size 6, value "héhé"') check_stack_repr( gdb_arrow, "fixed_size_binary_scalar", @@ -984,16 +982,14 @@ def test_arrays_heap(gdb_arrow): gdb_arrow, "heap_large_binary_array", (r'arrow::LargeBinaryArray of length 3, offset 0, null count 1 = {' r'[0] = null, [1] = "abcd", [2] = "\000\037\377"}')) - # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 - # check_heap_repr( - # gdb_arrow, "heap_string_array", - # (r'arrow::StringArray of length 3, offset 0, null count 1 = {' - # r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) - # TODO: Uncomment before merge, see https://github.com/apache/arrow/pull/46180#issuecomment-2863924765 - # check_heap_repr( - # gdb_arrow, "heap_large_string_array", - # (r'arrow::LargeStringArray of length 3, offset 0, null count 1 = {' - # r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) + check_heap_repr( + gdb_arrow, "heap_string_array", + (r'arrow::StringArray of length 3, offset 0, null count 1 = {' + r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) + check_heap_repr( + gdb_arrow, "heap_large_string_array", + (r'arrow::LargeStringArray of length 3, offset 0, null count 1 = {' + r'[0] = null, [1] = "héhé", [2] = "invalid \\xff char"}')) check_heap_repr( gdb_arrow, "heap_binary_array_sliced", (r'arrow::BinaryArray of length 1, offset 1, unknown null count = ' From f91dd1cf5facd2abef3aff64b5d86648a58ba393 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Fri, 9 May 2025 10:53:49 -0700 Subject: [PATCH 33/33] Update cpp/examples/arrow/from_json_string_example.cc Co-authored-by: Enrico Minack --- cpp/examples/arrow/from_json_string_example.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/examples/arrow/from_json_string_example.cc b/cpp/examples/arrow/from_json_string_example.cc index d0069af837b..da13d913489 100644 --- a/cpp/examples/arrow/from_json_string_example.cc +++ b/cpp/examples/arrow/from_json_string_example.cc @@ -70,7 +70,7 @@ arrow::Status RunExample() { // ChunkedArrayFromJSONString std::shared_ptr chunked_array; ARROW_RETURN_NOT_OK(ChunkedArrayFromJSONString( - arrow::int32(), {R"([5, 10])", R"([null])", R"([16])"}, &chunked_array)); + arrow::int32(), {"[5, 10]", "[null]", "[16]"}, &chunked_array)); // DictArrayFromJSONString std::shared_ptr dict_array;