From 197a84b3ad4810b451de517c6662f1abe3e00aa8 Mon Sep 17 00:00:00 2001 From: Igor Peshansky Date: Fri, 30 Mar 2018 02:26:22 -0400 Subject: [PATCH 1/3] Add streaming JSON parsing. --- src/json.cc | 85 ++++++++++++++++++++++++++++++++++++++++++- src/json.h | 14 +++++++ test/json_unittest.cc | 84 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 182 insertions(+), 1 deletion(-) diff --git a/src/json.cc b/src/json.cc index daa81f1d..87f9cffb 100644 --- a/src/json.cc +++ b/src/json.cc @@ -262,10 +262,23 @@ class ObjectContext : public Context { std::unique_ptr field_name_; }; +class CallbackContext : public Context { + public: + CallbackContext(std::function)> callback) + : Context(nullptr), callback_(callback) {} + void AddValue(std::unique_ptr value) override { + callback_(std::move(value)); + } + private: + std::function)> callback_; +}; + // A builder context that allows building up a JSON object. class JSONBuilder { public: JSONBuilder() : context_(new TopLevelContext()) {} + JSONBuilder(std::function)> callback) + : context_(new CallbackContext(callback)) {} ~JSONBuilder() { delete context_; } void AddValue(std::unique_ptr value) { @@ -415,7 +428,7 @@ yajl_callbacks callbacks = { .yajl_end_array = &handle_end_array, }; -} +} // namespace std::vector> Parser::AllFromStream(std::istream& stream) throw(Exception) @@ -481,4 +494,74 @@ std::unique_ptr Parser::FromString(const std::string& input) return FromStream(stream); } +class Parser::ParseState { + public: + ParseState(std::function)> callback) + : builder_(callback), + handle_(yajl_alloc(&callbacks, NULL, (void*) &builder_)) { + yajl_config(handle_, yajl_allow_comments, 1); + yajl_config(handle_, yajl_allow_multiple_values, 1); + yajl_config(handle_, yajl_allow_partial_values, 1); + //yajl_config(handle_, yajl_allow_trailing_garbage, 1); + //yajl_config(handle_, yajl_dont_validate_strings, 1); + } + + ~ParseState() { + yajl_free(handle_); + } + + yajl_handle& handle() { return handle_; } + + private: + JSONBuilder builder_; + yajl_handle handle_; +}; + +Parser::Parser(std::function)> callback) + : state_(new ParseState(callback)) {} + +Parser::~Parser() {} + +void Parser::ParseStream(std::istream& stream) throw(Exception) { + const int kMax = 65536; + unsigned char data[kMax]; + yajl_status stat; + yajl_handle& handle = state_->handle(); + + for (;;) { + if (stream.eof()) { + break; + } + stream.read(reinterpret_cast(&data[0]), kMax); + size_t count = stream.gcount(); + std::string str((const char*)data, count); + + stat = yajl_parse(handle, data, count); + if (stat != yajl_status_ok) { + std::cerr << "Error in yajl_parse" << std::endl; + unsigned char* str = yajl_get_error(handle, 1, data, kMax); + std::string error_str((const char*)str); + yajl_free_error(handle, str); + throw Exception(error_str); + } + + size_t bytes = yajl_get_bytes_consumed(handle); + std::cerr << "Consumed " << bytes << " out of chunk " << count << " from '" << str << "'" << std::endl; + } + + stat = yajl_complete_parse(handle); + + size_t bytes = yajl_get_bytes_consumed(handle); + std::string str((const char*)data, bytes); + std::cerr << "Consumed stream " << bytes << ": '" << str << "'" << std::endl; + + if (stat != yajl_status_ok) { + std::cerr << "Error in yajl_complete_parse" << std::endl; + unsigned char* str = yajl_get_error(handle, 1, data, kMax); + std::string error_str((const char*)str); + yajl_free_error(handle, str); + throw Exception(error_str); + } +} + } // json diff --git a/src/json.h b/src/json.h index f1a5050e..ce95ed67 100644 --- a/src/json.h +++ b/src/json.h @@ -16,6 +16,7 @@ #ifndef JSON_H_ #define JSON_H_ +#include #include #include #include @@ -331,6 +332,10 @@ inline std::unique_ptr object( class Parser { public: + class ParseState; + + Parser(std::function)> callback); + ~Parser(); static std::vector> AllFromStream( std::istream& stream) throw(Exception); static std::vector> AllFromString( @@ -339,6 +344,15 @@ class Parser { throw(Exception); static std::unique_ptr FromString(const std::string& input) throw(Exception); + + void ParseStream(std::istream& stream) throw(Exception); + // Used to accept inline construction of streams. + void ParseStream(std::istream&& stream) throw(Exception) { + ParseStream(stream); + } + + private: + std::unique_ptr state_; }; } diff --git a/test/json_unittest.cc b/test/json_unittest.cc index 6ddd153f..18c500a3 100644 --- a/test/json_unittest.cc +++ b/test/json_unittest.cc @@ -424,4 +424,88 @@ TEST(ParseError, ObjectNoValue) { ASSERT_THROW(json::Parser::FromString("{\"x\":}"), json::Exception); } +// Streaming parsing test. + +TEST(StreamingTest, CompleteStream) { + GuardJsonException([](){ + json::value v; + json::Parser p([&v](json::value r) { v = std::move(r); }); + p.ParseStream(std::istringstream( + "{\n" + " \"foo\": [1, 2, 3],\n" + " \"bar\": {\"x\": 0, \"y\": null},\n" + " \"baz\": true,\n" + " \"str\": \"asdfasdf\"\n" + "}\n" + )); + EXPECT_TOSTRING_EQ( + "{" + "\"bar\":{\"x\":0.0,\"y\":null}," + "\"baz\":true," + "\"foo\":[1.0,2.0,3.0]," + "\"str\":\"asdfasdf\"" + "}", + v); + }); +} + +TEST(StreamingTest, SplitStream) { + GuardJsonException([](){ + json::value v; + json::Parser p([&v](json::value r) { v = std::move(r); }); + p.ParseStream(std::istringstream( + "{\n" + " \"foo\": [1, 2, 3],\n" + )); + p.ParseStream(std::istringstream( + " \"bar\": {\"x\": 0, \"y\": null},\n" + " \"baz\": true,\n" + )); + p.ParseStream(std::istringstream( + " \"str\": \"asdfasdf\"\n" + "}\n" + )); + EXPECT_TOSTRING_EQ( + "{" + "\"bar\":{\"x\":0.0,\"y\":null}," + "\"baz\":true," + "\"foo\":[1.0,2.0,3.0]," + "\"str\":\"asdfasdf\"" + "}", + v); + }); +} + +TEST(StreamingTest, BrokenStream) { + GuardJsonException([](){ + json::value v; + json::Parser p([&v](json::value r) { v = std::move(r); }); + p.ParseStream(std::istringstream( + "{\n" + " \"foo\": [1, 2, 3],\n" + " \"ba" + )); + p.ParseStream(std::istringstream( + "r\": {\"x\": 0, \"y\": nu" + )); + p.ParseStream(std::istringstream( + "ll},\n" + " \"baz\": true,\n" + " \"str\"" + )); + p.ParseStream(std::istringstream( + ": \"asdfasdf\"\n" + "}\n" + )); + EXPECT_TOSTRING_EQ( + "{" + "\"bar\":{\"x\":0.0,\"y\":null}," + "\"baz\":true," + "\"foo\":[1.0,2.0,3.0]," + "\"str\":\"asdfasdf\"" + "}", + v); + }); +} + } // namespace From eef763875d189d934d595ba325b0a1db3fdd5e01 Mon Sep 17 00:00:00 2001 From: Igor Peshansky Date: Fri, 30 Mar 2018 03:09:56 -0400 Subject: [PATCH 2/3] Only complete parsing when the parser object goes away. Return the number of bytes processed from ParseStream. Add more tests. --- src/json.cc | 34 +++++++++++++----------------- src/json.h | 6 +++--- test/json_unittest.cc | 48 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 23 deletions(-) diff --git a/src/json.cc b/src/json.cc index 87f9cffb..65c41dfa 100644 --- a/src/json.cc +++ b/src/json.cc @@ -501,12 +501,20 @@ class Parser::ParseState { handle_(yajl_alloc(&callbacks, NULL, (void*) &builder_)) { yajl_config(handle_, yajl_allow_comments, 1); yajl_config(handle_, yajl_allow_multiple_values, 1); - yajl_config(handle_, yajl_allow_partial_values, 1); + //yajl_config(handle_, yajl_allow_partial_values, 1); //yajl_config(handle_, yajl_allow_trailing_garbage, 1); //yajl_config(handle_, yajl_dont_validate_strings, 1); } ~ParseState() { + yajl_status stat = yajl_complete_parse(handle_); + if (stat != yajl_status_ok) { + std::cerr << "Error in yajl_complete_parse" << std::endl; + unsigned char* str = yajl_get_error(handle_, 0, nullptr, 0); + std::string error_str((const char*)str); + yajl_free_error(handle_, str); + throw Exception(error_str); + } yajl_free(handle_); } @@ -522,10 +530,10 @@ Parser::Parser(std::function)> callback) Parser::~Parser() {} -void Parser::ParseStream(std::istream& stream) throw(Exception) { +std::size_t Parser::ParseStream(std::istream& stream) throw(Exception) { const int kMax = 65536; unsigned char data[kMax]; - yajl_status stat; + size_t total_bytes_consumed = 0; yajl_handle& handle = state_->handle(); for (;;) { @@ -534,9 +542,8 @@ void Parser::ParseStream(std::istream& stream) throw(Exception) { } stream.read(reinterpret_cast(&data[0]), kMax); size_t count = stream.gcount(); - std::string str((const char*)data, count); - stat = yajl_parse(handle, data, count); + yajl_status stat = yajl_parse(handle, data, count); if (stat != yajl_status_ok) { std::cerr << "Error in yajl_parse" << std::endl; unsigned char* str = yajl_get_error(handle, 1, data, kMax); @@ -545,23 +552,10 @@ void Parser::ParseStream(std::istream& stream) throw(Exception) { throw Exception(error_str); } - size_t bytes = yajl_get_bytes_consumed(handle); - std::cerr << "Consumed " << bytes << " out of chunk " << count << " from '" << str << "'" << std::endl; + total_bytes_consumed += yajl_get_bytes_consumed(handle); } - stat = yajl_complete_parse(handle); - - size_t bytes = yajl_get_bytes_consumed(handle); - std::string str((const char*)data, bytes); - std::cerr << "Consumed stream " << bytes << ": '" << str << "'" << std::endl; - - if (stat != yajl_status_ok) { - std::cerr << "Error in yajl_complete_parse" << std::endl; - unsigned char* str = yajl_get_error(handle, 1, data, kMax); - std::string error_str((const char*)str); - yajl_free_error(handle, str); - throw Exception(error_str); - } + return total_bytes_consumed; } } // json diff --git a/src/json.h b/src/json.h index ce95ed67..b55d2a51 100644 --- a/src/json.h +++ b/src/json.h @@ -345,10 +345,10 @@ class Parser { static std::unique_ptr FromString(const std::string& input) throw(Exception); - void ParseStream(std::istream& stream) throw(Exception); + size_t ParseStream(std::istream& stream) throw(Exception); // Used to accept inline construction of streams. - void ParseStream(std::istream&& stream) throw(Exception) { - ParseStream(stream); + size_t ParseStream(std::istream&& stream) throw(Exception) { + return ParseStream(stream); } private: diff --git a/test/json_unittest.cc b/test/json_unittest.cc index 18c500a3..90030f15 100644 --- a/test/json_unittest.cc +++ b/test/json_unittest.cc @@ -508,4 +508,52 @@ TEST(StreamingTest, BrokenStream) { }); } +TEST(StreamingTest, MultipleObjectsStream) { + GuardJsonException([](){ + std::vector v; + json::Parser p([&v](json::value r) { v.emplace_back(std::move(r)); }); + p.ParseStream(std::istringstream( + "{\n" + " \"foo\": [1, 2, 3],\n" + " \"bar\": {\"x\": 0, \"y\": null},\n" + " \"baz\": true,\n" + " \"str\": \"asdfasdf\"\n" + "}\n" + "{\n" + " \"foo1\": [1, 2, 3],\n" + " \"bar1\": {\"x\": 0, \"y\": null},\n" + " \"baz1\": true,\n" + " \"str1\": \"asdfasdf\"\n" + "}\n" + )); + EXPECT_EQ(2, v.size()); + EXPECT_TOSTRING_EQ( + "{" + "\"bar\":{\"x\":0.0,\"y\":null}," + "\"baz\":true," + "\"foo\":[1.0,2.0,3.0]," + "\"str\":\"asdfasdf\"" + "}", + v[0]); + EXPECT_TOSTRING_EQ( + "{" + "\"bar1\":{\"x\":0.0,\"y\":null}," + "\"baz1\":true," + "\"foo1\":[1.0,2.0,3.0]," + "\"str1\":\"asdfasdf\"" + "}", + v[1]); + }); +} + +TEST(StreamingTest, ParseStreamReturnsByteCount) { + GuardJsonException([](){ + json::value v; + json::Parser p([&v](json::value r) { v = std::move(r); }); + size_t n = p.ParseStream(std::istringstream("123")); + EXPECT_TOSTRING_EQ("123", v); + EXPECT_EQ(3, n); + }); +} + } // namespace From 044d9b465d428424795a25cf89f3b223fe33e6de Mon Sep 17 00:00:00 2001 From: Igor Peshansky Date: Fri, 30 Mar 2018 09:47:44 -0400 Subject: [PATCH 3/3] Change loops; remove extra debugging printouts. --- src/json.cc | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/json.cc b/src/json.cc index 65c41dfa..7053788e 100644 --- a/src/json.cc +++ b/src/json.cc @@ -443,10 +443,7 @@ std::vector> Parser::AllFromStream(std::istream& stream) //yajl_config(handle, yajl_allow_trailing_garbage, 1); //yajl_config(handle, yajl_dont_validate_strings, 1); - for (;;) { - if (stream.eof()) { - break; - } + while (!stream.eof()) { stream.read(reinterpret_cast(&data[0]), kMax); size_t count = stream.gcount(); yajl_parse(handle, data, count); @@ -509,7 +506,6 @@ class Parser::ParseState { ~ParseState() { yajl_status stat = yajl_complete_parse(handle_); if (stat != yajl_status_ok) { - std::cerr << "Error in yajl_complete_parse" << std::endl; unsigned char* str = yajl_get_error(handle_, 0, nullptr, 0); std::string error_str((const char*)str); yajl_free_error(handle_, str); @@ -536,16 +532,12 @@ std::size_t Parser::ParseStream(std::istream& stream) throw(Exception) { size_t total_bytes_consumed = 0; yajl_handle& handle = state_->handle(); - for (;;) { - if (stream.eof()) { - break; - } + while (!stream.eof()) { stream.read(reinterpret_cast(&data[0]), kMax); size_t count = stream.gcount(); yajl_status stat = yajl_parse(handle, data, count); if (stat != yajl_status_ok) { - std::cerr << "Error in yajl_parse" << std::endl; unsigned char* str = yajl_get_error(handle, 1, data, kMax); std::string error_str((const char*)str); yajl_free_error(handle, str);