From 795ca7783ef53354aae3322737327ccb54c25af3 Mon Sep 17 00:00:00 2001 From: chenguoping Date: Fri, 20 Mar 2020 20:37:27 +0800 Subject: [PATCH 1/3] Ignore bom at the beginning of the UTF-8 text Add a testcase about bom. --- src/lib_json/json_reader.cpp | 12 +++++++++++- src/test_lib_json/main.cpp | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 10be6d2cf..8cd3b065b 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -939,6 +940,7 @@ class OurReader { bool readToken(Token& token); void skipSpaces(); + void skipBom(); bool match(const Char* pattern, int patternLength); bool readComment(); bool readCStyleComment(bool* containsNewLineResult); @@ -1009,7 +1011,6 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, if (!features_.allowComments_) { collectComments = false; } - begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; @@ -1022,6 +1023,8 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, nodes_.pop(); nodes_.push(&root); + // skip byte order mark if it exists at the beginning of the UTF-8 text. + skipBom(); bool successful = readValue(); nodes_.pop(); Token token; @@ -1268,6 +1271,13 @@ void OurReader::skipSpaces() { } } +void OurReader::skipBom() { + if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) { + begin_ += 3; + current_ = begin_; + } +} + bool OurReader::match(const Char* pattern, int patternLength) { if (end_ - current_ < patternLength) return false; diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp index 7b20e41ec..708d1d230 100644 --- a/src/test_lib_json/main.cpp +++ b/src/test_lib_json/main.cpp @@ -3578,6 +3578,19 @@ JSONTEST_FIXTURE_LOCAL(BuilderTest, settings) { } } +struct BomTest : JsonTest::TestCase {}; + +JSONTEST_FIXTURE_LOCAL(BomTest, withBom) { + const std::string with_bom = u8"\xEF\xBB\xBF{\"key\" : \"value\"}"; + Json::Value root; + JSONCPP_STRING errs; + std::istringstream iss(with_bom); + bool ok = parseFromStream(Json::CharReaderBuilder(), iss, &root, &errs); + JSONTEST_ASSERT(ok); + JSONTEST_ASSERT(errs.empty()); + JSONTEST_ASSERT_STRING_EQUAL(root["key"].asString(), "value"); +} + struct IteratorTest : JsonTest::TestCase {}; JSONTEST_FIXTURE_LOCAL(IteratorTest, convert) { From a5414b5af64c32e3ce574d451f485727f8fd9355 Mon Sep 17 00:00:00 2001 From: chenguoping Date: Sun, 26 Apr 2020 15:35:43 +0800 Subject: [PATCH 2/3] fix VS error --- src/lib_json/json_reader.cpp | 2 +- src/test_lib_json/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 8cd3b065b..4840f9d90 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -1011,6 +1010,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, if (!features_.allowComments_) { collectComments = false; } + begin_ = beginDoc; end_ = endDoc; collectComments_ = collectComments; diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp index 708d1d230..bc6080ac1 100644 --- a/src/test_lib_json/main.cpp +++ b/src/test_lib_json/main.cpp @@ -3581,7 +3581,7 @@ JSONTEST_FIXTURE_LOCAL(BuilderTest, settings) { struct BomTest : JsonTest::TestCase {}; JSONTEST_FIXTURE_LOCAL(BomTest, withBom) { - const std::string with_bom = u8"\xEF\xBB\xBF{\"key\" : \"value\"}"; + const std::string with_bom = "\xEF\xBB\xBF{\"key\" : \"value\"}"; Json::Value root; JSONCPP_STRING errs; std::istringstream iss(with_bom); From 7092f8d88b3a508b4470d4cb11d8dc4c4550743c Mon Sep 17 00:00:00 2001 From: chenguoping Date: Tue, 28 Apr 2020 15:02:46 +0800 Subject: [PATCH 3/3] refactory skipBom() --- src/lib_json/json_reader.cpp | 21 +++++++++++++++------ src/test_lib_json/main.cpp | 15 ++++++++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index 4840f9d90..341162bcd 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -871,6 +871,7 @@ class OurFeatures { bool failIfExtra_; bool rejectDupKeys_; bool allowSpecialFloats_; + bool allowBom_; size_t stackLimit_; }; // OurFeatures @@ -939,7 +940,7 @@ class OurReader { bool readToken(Token& token); void skipSpaces(); - void skipBom(); + void skipBom(bool allowBom); bool match(const Char* pattern, int patternLength); bool readComment(); bool readCStyleComment(bool* containsNewLineResult); @@ -1024,7 +1025,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root, nodes_.push(&root); // skip byte order mark if it exists at the beginning of the UTF-8 text. - skipBom(); + skipBom(features_.allowBom_); bool successful = readValue(); nodes_.pop(); Token token; @@ -1271,10 +1272,14 @@ void OurReader::skipSpaces() { } } -void OurReader::skipBom() { - if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) { - begin_ += 3; - current_ = begin_; +void OurReader::skipBom(bool allowBom) { + // If BOM is not allowed, then skip it. + // The default value is: false + if (!allowBom) { + if (strncmp(begin_, "\xEF\xBB\xBF", 3) == 0) { + begin_ += 3; + current_ = begin_; + } } } @@ -1895,6 +1900,7 @@ CharReader* CharReaderBuilder::newCharReader() const { features.failIfExtra_ = settings_["failIfExtra"].asBool(); features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool(); features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool(); + features.allowBom_ = settings_["allowBom"].asBool(); return new OurCharReader(collectComments, features); } static void getValidReaderKeys(std::set* valid_keys) { @@ -1910,6 +1916,7 @@ static void getValidReaderKeys(std::set* valid_keys) { valid_keys->insert("failIfExtra"); valid_keys->insert("rejectDupKeys"); valid_keys->insert("allowSpecialFloats"); + valid_keys->insert("allowBom"); } bool CharReaderBuilder::validate(Json::Value* invalid) const { Json::Value my_invalid; @@ -1944,6 +1951,7 @@ void CharReaderBuilder::strictMode(Json::Value* settings) { (*settings)["failIfExtra"] = true; (*settings)["rejectDupKeys"] = true; (*settings)["allowSpecialFloats"] = false; + (*settings)["allowBom"] = false; //! [CharReaderBuilderStrictMode] } // static @@ -1960,6 +1968,7 @@ void CharReaderBuilder::setDefaults(Json::Value* settings) { (*settings)["failIfExtra"] = false; (*settings)["rejectDupKeys"] = false; (*settings)["allowSpecialFloats"] = false; + (*settings)["allowBom"] = false; //! [CharReaderBuilderDefaults] } diff --git a/src/test_lib_json/main.cpp b/src/test_lib_json/main.cpp index bc6080ac1..b4f6f0b30 100644 --- a/src/test_lib_json/main.cpp +++ b/src/test_lib_json/main.cpp @@ -3580,16 +3580,29 @@ JSONTEST_FIXTURE_LOCAL(BuilderTest, settings) { struct BomTest : JsonTest::TestCase {}; -JSONTEST_FIXTURE_LOCAL(BomTest, withBom) { +JSONTEST_FIXTURE_LOCAL(BomTest, skipBom) { const std::string with_bom = "\xEF\xBB\xBF{\"key\" : \"value\"}"; Json::Value root; JSONCPP_STRING errs; std::istringstream iss(with_bom); bool ok = parseFromStream(Json::CharReaderBuilder(), iss, &root, &errs); + // The default behavior is to skip the BOM, so we can parse it normally. JSONTEST_ASSERT(ok); JSONTEST_ASSERT(errs.empty()); JSONTEST_ASSERT_STRING_EQUAL(root["key"].asString(), "value"); } +JSONTEST_FIXTURE_LOCAL(BomTest, allowBom) { + const std::string with_bom = "\xEF\xBB\xBF{\"key\" : \"value\"}"; + Json::Value root; + JSONCPP_STRING errs; + std::istringstream iss(with_bom); + Json::CharReaderBuilder b; + b.settings_["allowBom"] = true; + bool ok = parseFromStream(b, iss, &root, &errs); + // Detect the BOM, and failed on it. + JSONTEST_ASSERT(!ok); + JSONTEST_ASSERT(!errs.empty()); +} struct IteratorTest : JsonTest::TestCase {};