Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 2 additions & 20 deletions cpp/src/arrow/ipc/json_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,24 +250,12 @@ static const char* JSON_EXAMPLE = R"example(
{
"name": "foo",
"type": {"name": "int", "isSigned": true, "bitWidth": 32},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 32}
]
}
"nullable": true, "children": []
},
{
"name": "bar",
"type": {"name": "floatingpoint", "precision": "DOUBLE"},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 64}
]
}
"nullable": true, "children": []
}
]
},
Expand Down Expand Up @@ -318,12 +306,6 @@ static const char* JSON_EXAMPLE2 = R"example(
"name": "foo",
"type": {"name": "int", "isSigned": true, "bitWidth": 32},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 32}
]
},
"metadata": [
{"key": "converted_from_time32", "value": "true"}
]
Expand Down
62 changes: 45 additions & 17 deletions cpp/src/arrow/ipc/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/formatting.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/logging.h"
#include "arrow/util/parsing.h"
#include "arrow/util/string.h"
#include "arrow/visitor_inline.h"

Expand Down Expand Up @@ -335,10 +337,8 @@ class SchemaWriter {
Status Visit(const TimeType& type) { return WritePrimitive("time", type); }
Status Visit(const StringType& type) { return WriteVarBytes("utf8", type); }
Status Visit(const BinaryType& type) { return WriteVarBytes("binary", type); }
Status Visit(const LargeStringType& type) { return WriteVarBytes("large_utf8", type); }
Status Visit(const LargeBinaryType& type) {
return WriteVarBytes("large_binary", type);
}
Status Visit(const LargeStringType& type) { return WriteVarBytes("largeutf8", type); }
Status Visit(const LargeBinaryType& type) { return WriteVarBytes("largebinary", type); }
Status Visit(const FixedSizeBinaryType& type) {
return WritePrimitive("fixedsizebinary", type);
}
Expand All @@ -358,7 +358,7 @@ class SchemaWriter {
}

Status Visit(const LargeListType& type) {
WriteName("large_list", type);
WriteName("largelist", type);
return Status::OK();
}

Expand Down Expand Up @@ -525,8 +525,21 @@ class ArrayWriter {
void WriteIntegerField(const char* name, const T* values, int64_t length) {
writer_->Key(name);
writer_->StartArray();
for (int i = 0; i < length; ++i) {
writer_->Int64(values[i]);
if (sizeof(T) < sizeof(int64_t)) {
for (int i = 0; i < length; ++i) {
writer_->Int64(values[i]);
}
} else {
// Represent 64-bit integers as strings, as JSON numbers cannot represent
// them exactly.
::arrow::internal::StringFormatter<typename CTypeTraits<T>::ArrowType> formatter;
auto append = [this](util::string_view v) {
writer_->String(v.data(), static_cast<rj::SizeType>(v.size()));
return Status::OK();
};
for (int i = 0; i < length; ++i) {
DCHECK_OK(formatter(values[i], append));
}
}
writer_->EndArray();
}
Expand Down Expand Up @@ -932,9 +945,9 @@ static Status GetType(const RjObject& json_type,
*type = utf8();
} else if (type_name == "binary") {
*type = binary();
} else if (type_name == "large_utf8") {
} else if (type_name == "largeutf8") {
*type = large_utf8();
} else if (type_name == "large_binary") {
} else if (type_name == "largebinary") {
*type = large_binary();
} else if (type_name == "fixedsizebinary") {
return GetFixedSizeBinary(json_type, type);
Expand All @@ -957,7 +970,7 @@ static Status GetType(const RjObject& json_type,
return Status::Invalid("List must have exactly one child");
}
*type = list(children[0]);
} else if (type_name == "large_list") {
} else if (type_name == "largelist") {
if (children.size() != 1) {
return Status::Invalid("Large list must have exactly one child");
}
Expand Down Expand Up @@ -1299,13 +1312,28 @@ class ArrayReader {
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(length * sizeof(T), pool_));

T* values = reinterpret_cast<T*>(buffer->mutable_data());
for (int i = 0; i < length; ++i) {
const rj::Value& val = json_array[i];
DCHECK(val.IsInt() || val.IsInt64());
if (val.IsInt()) {
values[i] = static_cast<T>(val.GetInt());
} else {
values[i] = static_cast<T>(val.GetInt64());
if (sizeof(T) < sizeof(int64_t)) {
for (int i = 0; i < length; ++i) {
const rj::Value& val = json_array[i];
DCHECK(val.IsInt() || val.IsInt64());
if (val.IsInt()) {
values[i] = static_cast<T>(val.GetInt());
} else {
values[i] = static_cast<T>(val.GetInt64());
}
}
} else {
// Read 64-bit integers as strings, as JSON numbers cannot represent
// them exactly.
::arrow::internal::StringConverter<typename CTypeTraits<T>::ArrowType> converter;
for (int i = 0; i < length; ++i) {
const rj::Value& val = json_array[i];
DCHECK(val.IsString());
if (!converter(val.GetString(), val.GetStringLength(), &values[i])) {
return Status::Invalid("Failed to parse integer: '",
std::string(val.GetString(), val.GetStringLength()),
"'");
}
}
}

Expand Down
16 changes: 2 additions & 14 deletions cpp/src/arrow/ipc/json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -337,24 +337,12 @@ TEST(TestJsonFileReadWrite, MinimalFormatExample) {
{
"name": "foo",
"type": {"name": "int", "isSigned": true, "bitWidth": 32},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 32}
]
}
"nullable": true, "children": []
},
{
"name": "bar",
"type": {"name": "floatingpoint", "precision": "DOUBLE"},
"nullable": true, "children": [],
"typeLayout": {
"vectors": [
{"type": "VALIDITY", "typeBitWidth": 1},
{"type": "DATA", "typeBitWidth": 64}
]
}
"nullable": true, "children": []
}
]
},
Expand Down
8 changes: 4 additions & 4 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
logger.debug(f"Running benchmark {rev_or_path}")

conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
cmake_extras=cmake_extras, **kwargs)

runner_base = BenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf)
Expand Down Expand Up @@ -399,7 +399,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
logger.debug(f"Running benchmark {rev_or_path}")

conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
cmake_extras=cmake_extras, **kwargs)

runner_base = BenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf,
Expand Down Expand Up @@ -497,7 +497,7 @@ def benchmark_diff(ctx, src, preserve, output, cmake_extras,
f"{baseline} (baseline)")

conf = CppBenchmarkRunner.default_configuration(
cmake_extras=cmake_extras, **kwargs)
cmake_extras=cmake_extras, **kwargs)

runner_cont = BenchmarkRunner.from_rev_or_path(
src, root, contender, conf,
Expand Down Expand Up @@ -551,7 +551,7 @@ def _set_default(opt, default):
@click.option('stop_on_error', '-x', '--stop-on-error',
is_flag=True, default=False,
help='Stop on first error')
@click.option('--gold_dirs', multiple=True,
@click.option('--gold-dirs', multiple=True,
help="gold integration test file paths")
@click.option('-k', '--match',
help=("Substring for test names to include in run, "
Expand Down
Loading