Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ if(ARROW_BUILD_BENCHMARKS
set(ARROW_TESTING ON)
endif()

if(ARROW_DATASET_JNI)
set(ARROW_JSON ON)
endif()

if(ARROW_GANDIVA)
set(ARROW_WITH_RE2 ON)
endif()
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,8 @@ if(ARROW_JSON)
json/chunked_builder.cc
json/chunker.cc
json/converter.cc
json/array_parser.cc
json/array_writer.cc
json/object_parser.cc
json/object_writer.cc
json/parser.cc
Expand Down
72 changes: 72 additions & 0 deletions cpp/src/arrow/json/array_parser.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/json/array_parser.h"
#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep

#include <rapidjson/document.h>

namespace arrow {
namespace json {
namespace internal {

namespace rj = arrow::rapidjson;

class ArrayParser::Impl {
public:
Status Parse(arrow::util::string_view json) {
document_.Parse(reinterpret_cast<const rj::Document::Ch*>(json.data()),
static_cast<size_t>(json.size()));

if (document_.HasParseError()) {
return Status::Invalid("Json parse error (offset ", document_.GetErrorOffset(),
"): ", document_.GetParseError());
}
if (!document_.IsArray()) {
return Status::TypeError("Not a json array");
}
return Status::OK();
}

Result<int64_t> GetInt64(int32_t ordinal) const {
if (!document_[ordinal].IsInt64()) {
return Status::TypeError("Value at ordinal '", ordinal, "' is not a int64");
}
return document_[ordinal].GetInt64();
}

Result<uint32_t> Length() const { return document_.GetArray().Size(); }

private:
rj::Document document_;
};

ArrayParser::ArrayParser() : impl_(new ArrayParser::Impl()) {}

ArrayParser::~ArrayParser() = default;

Status ArrayParser::Parse(arrow::util::string_view json) { return impl_->Parse(json); }

Result<uint32_t> ArrayParser::Length() const { return impl_->Length(); }

Result<int64_t> ArrayParser::GetInt64(int32_t ordinal) const {
return impl_->GetInt64(ordinal);
}

} // namespace internal
} // namespace json
} // namespace arrow
50 changes: 50 additions & 0 deletions cpp/src/arrow/json/array_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>

#include "arrow/result.h"
#include "arrow/util/string_view.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace json {
namespace internal {

/// This class is a helper to parse a json array from a string.
/// It uses rapidjson::Document in implementation.
class ARROW_EXPORT ArrayParser {
public:
ArrayParser();
~ArrayParser();

Status Parse(arrow::util::string_view json);

Result<uint32_t> Length() const;

Result<int64_t> GetInt64(int32_t ordinal) const;

private:
class Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace internal
} // namespace json
} // namespace arrow
64 changes: 64 additions & 0 deletions cpp/src/arrow/json/array_writer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/json/array_writer.h"
#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep

#include <rapidjson/document.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>

namespace rj = arrow::rapidjson;

namespace arrow {
namespace json {
namespace internal {

class ArrayWriter::Impl {
public:
Impl() : root_(rj::kArrayType) {}

void AppendInt64(int64_t value) {
rj::Document::AllocatorType& allocator = document_.GetAllocator();

root_.PushBack(value, allocator);
}

std::string Serialize() {
rj::StringBuffer buffer;
rj::Writer<rj::StringBuffer> writer(buffer);
root_.Accept(writer);

return buffer.GetString();
}

private:
rj::Document document_;
rj::Value root_;
};

ArrayWriter::ArrayWriter() : impl_(new ArrayWriter::Impl()) {}

ArrayWriter::~ArrayWriter() = default;

void ArrayWriter::AppendInt64(int64_t value) { impl_->AppendInt64(value); }

std::string ArrayWriter::Serialize() { return impl_->Serialize(); }

} // namespace internal
} // namespace json
} // namespace arrow
46 changes: 46 additions & 0 deletions cpp/src/arrow/json/array_writer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <memory>

#include "arrow/util/visibility.h"

namespace arrow {
namespace json {
namespace internal {

/// This class is a helper to serialize a json array to a string.
/// It uses rapidjson in implementation.
class ARROW_EXPORT ArrayWriter {
public:
ArrayWriter();
~ArrayWriter();

void AppendInt64(int64_t value);

std::string Serialize();

private:
class Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace internal
} // namespace json
} // namespace arrow
5 changes: 5 additions & 0 deletions cpp/src/arrow/record_batch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,4 +364,9 @@ Result<std::shared_ptr<RecordBatchReader>> RecordBatchReader::Make(
return std::make_shared<SimpleRecordBatchReader>(std::move(batches), schema);
}

Result<std::shared_ptr<RecordBatchReader>> RecordBatchReader::Make(
Iterator<std::shared_ptr<RecordBatch>> itr, std::shared_ptr<Schema> schema) {
return std::make_shared<SimpleRecordBatchReader>(std::move(itr), schema);
}

} // namespace arrow
7 changes: 7 additions & 0 deletions cpp/src/arrow/record_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,13 @@ class ARROW_EXPORT RecordBatchReader {
/// element if not provided.
static Result<std::shared_ptr<RecordBatchReader>> Make(
RecordBatchVector batches, std::shared_ptr<Schema> schema = NULLPTR);

/// \brief Create a RecordBatchReader from a RecordBatchIterator.
///
/// \param[in] itr the iterator of RecordBatch to read from
/// \param[in] schema schema to conform to.
static Result<std::shared_ptr<RecordBatchReader>> Make(RecordBatchIterator itr,
std::shared_ptr<Schema> schema);
};

} // namespace arrow
Loading