From 02f5bcaa6eea5097144ca34a59a8a02297f17e54 Mon Sep 17 00:00:00 2001 From: Sush Shringarputale Date: Mon, 28 Oct 2024 11:41:52 -0700 Subject: [PATCH 01/76] Add implementation to retrieve start and end positions of json during parse --- .../nlohmann/detail/input/binary_reader.hpp | 2 +- include/nlohmann/detail/input/json_sax.hpp | 46 +- include/nlohmann/detail/input/parser.hpp | 4 +- include/nlohmann/json.hpp | 53 +- single_include/nlohmann/json.hpp | 4024 +++++++++-------- tests/src/unit-cbor.cpp | 3 +- tests/src/unit-class_parser.cpp | 3 +- tests/src/unit-deserialization.cpp | 3 +- tests/src/unit-disabled_exceptions.cpp | 4 +- tests/src/unit-regression2.cpp | 4 +- tests/src/unit-ubjson.cpp | 4 +- 11 files changed, 2130 insertions(+), 2020 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index a6e100e761..d6fc97ddb6 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -62,7 +62,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 90583d671b..aac35fabd3 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -16,7 +16,7 @@ #include #include #include - +#include NLOHMANN_JSON_NAMESPACE_BEGIN /*! @@ -157,7 +157,7 @@ constructor contains the parsed value. @tparam BasicJsonType the JSON type */ -template +template class json_sax_dom_parser { public: @@ -166,14 +166,15 @@ class json_sax_dom_parser using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; /*! @param[in,out] r reference to a JSON value that is manipulated while parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, const lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer(lexer_) {} // make class move-only @@ -229,6 +230,14 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + if (m_lexer) + { + if (!ref_stack.empty()) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + } + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); @@ -249,6 +258,14 @@ class json_sax_dom_parser bool end_object() { + if (m_lexer) + { + if (!ref_stack.empty()) + { + (*ref_stack.rbegin())->end_position = m_lexer->get_position() - 1; + } + } + JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); @@ -338,9 +355,11 @@ class json_sax_dom_parser bool errored = false; /// whether to throw exceptions in case of errors const bool allow_exceptions = true; + /// the lexer to obtain the current position + const lexer_t* m_lexer = nullptr; }; -template +template class json_sax_dom_callback_parser { public: @@ -351,11 +370,13 @@ class json_sax_dom_callback_parser using binary_t = typename BasicJsonType::binary_t; using parser_callback_t = typename BasicJsonType::parser_callback_t; using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; json_sax_dom_callback_parser(BasicJsonType& r, const parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(cb), allow_exceptions(allow_exceptions_) + const bool allow_exceptions_ = true, + const lexer_t* lexer_ = nullptr) + : root(r), callback(cb), allow_exceptions(allow_exceptions_), m_lexer(lexer_) { keep_stack.push_back(true); } @@ -418,6 +439,11 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::object, true); ref_stack.push_back(val.second); + if (m_lexer && ref_stack.back()) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + // check object limit if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { @@ -455,6 +481,10 @@ class json_sax_dom_callback_parser } else { + if (m_lexer) + { + ref_stack.back()->end_position = m_lexer->get_position() - 1; + } ref_stack.back()->set_parents(); } } @@ -645,6 +675,8 @@ class json_sax_dom_callback_parser const bool allow_exceptions = true; /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer to obtain the current position + const lexer_t* m_lexer = nullptr; }; template diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index bdf85ba292..72efddcd5b 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -94,7 +94,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -122,7 +122,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 31ca64539b..8c7273791e 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -114,12 +114,15 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; + size_t start_position = std::string::npos; + size_t end_position = std::string::npos; + /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; using json_base_class_t = ::nlohmann::detail::json_base_class; @@ -894,6 +897,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } JSON_ASSERT(m_data.m_type == val.type()); set_parents(); + this->start_position = val.get_start_position(); + this->end_position = val.get_end_position(); assert_invariant(); } @@ -1205,6 +1210,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec break; } + this->start_position = other.start_position; + this->end_position = other.end_position; + set_parents(); assert_invariant(); } @@ -1213,6 +1221,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), + start_position(other.start_position), end_position(other.end_position), m_data(std::move(other.m_data)) { // check that passed value is valid @@ -1242,6 +1251,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + this->start_position = other.start_position; + this->end_position = other.end_position; json_base_class_t::operator=(std::move(other)); set_parents(); @@ -1394,6 +1405,16 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec return m_data.m_type; } + size_t get_start_position() const noexcept + { + return start_position; + } + + size_t get_end_position() const noexcept + { + return end_position; + } + /// @} private: @@ -4365,8 +4386,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -4381,8 +4402,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -4406,8 +4427,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); @@ -4422,8 +4443,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4437,8 +4458,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4460,8 +4481,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); @@ -4476,8 +4497,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4491,8 +4512,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4514,8 +4535,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); @@ -4530,8 +4551,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4545,8 +4566,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4560,8 +4581,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4575,8 +4596,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -4598,8 +4619,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a858728c4c..435a1fa617 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6655,2355 +6655,2388 @@ NLOHMANN_JSON_NAMESPACE_END // #include +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.3 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann +// SPDX-License-Identifier: MIT -NLOHMANN_JSON_NAMESPACE_BEGIN - -/*! -@brief SAX interface - -This class describes the SAX interface used by @ref nlohmann::json::sax_parse. -Each function is called in different situations while the input is parsed. The -boolean return value informs the parser whether to continue processing the -input. -*/ -template -struct json_sax -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - - /*! - @brief a null value was read - @return whether parsing should proceed - */ - virtual bool null() = 0; - - /*! - @brief a boolean value was read - @param[in] val boolean value - @return whether parsing should proceed - */ - virtual bool boolean(bool val) = 0; - - /*! - @brief an integer number was read - @param[in] val integer value - @return whether parsing should proceed - */ - virtual bool number_integer(number_integer_t val) = 0; - /*! - @brief an unsigned integer number was read - @param[in] val unsigned integer value - @return whether parsing should proceed - */ - virtual bool number_unsigned(number_unsigned_t val) = 0; - /*! - @brief a floating-point number was read - @param[in] val floating-point value - @param[in] s raw token value - @return whether parsing should proceed - */ - virtual bool number_float(number_float_t val, const string_t& s) = 0; +#include // array +#include // localeconv +#include // size_t +#include // snprintf +#include // strtof, strtod, strtold, strtoll, strtoull +#include // initializer_list +#include // char_traits, string +#include // move +#include // vector - /*! - @brief a string value was read - @param[in] val string value - @return whether parsing should proceed - @note It is safe to move the passed string value. - */ - virtual bool string(string_t& val) = 0; +// #include - /*! - @brief a binary value was read - @param[in] val binary value - @return whether parsing should proceed - @note It is safe to move the passed binary value. - */ - virtual bool binary(binary_t& val) = 0; +// #include - /*! - @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_object(std::size_t elements) = 0; +// #include - /*! - @brief an object key was read - @param[in] val object key - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool key(string_t& val) = 0; +// #include - /*! - @brief the end of an object was read - @return whether parsing should proceed - */ - virtual bool end_object() = 0; - /*! - @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_array(std::size_t elements) = 0; +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ - /*! - @brief the end of an array was read - @return whether parsing should proceed - */ - virtual bool end_array() = 0; +/////////// +// lexer // +/////////// - /*! - @brief a parse error occurred - @param[in] position the position in the input where the error occurs - @param[in] last_token the last read token - @param[in] ex an exception object describing the error - @return whether parsing should proceed (must return false) - */ - virtual bool parse_error(std::size_t position, - const std::string& last_token, - const detail::exception& ex) = 0; +template +class lexer_base +{ + public: + /// token types for the parser + enum class token_type + { + uninitialized, ///< indicating the scanner is uninitialized + literal_true, ///< the `true` literal + literal_false, ///< the `false` literal + literal_null, ///< the `null` literal + value_string, ///< a string -- use get_string() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value + begin_array, ///< the character for array begin `[` + begin_object, ///< the character for object begin `{` + end_array, ///< the character for array end `]` + end_object, ///< the character for object end `}` + name_separator, ///< the name separator `:` + value_separator, ///< the value separator `,` + parse_error, ///< indicating a parse error + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) + }; - json_sax() = default; - json_sax(const json_sax&) = default; - json_sax(json_sax&&) noexcept = default; - json_sax& operator=(const json_sax&) = default; - json_sax& operator=(json_sax&&) noexcept = default; - virtual ~json_sax() = default; + /// return name of values of type token_type (only used for errors) + JSON_HEDLEY_RETURNS_NON_NULL + JSON_HEDLEY_CONST + static const char* token_type_name(const token_type t) noexcept + { + switch (t) + { + case token_type::uninitialized: + return ""; + case token_type::literal_true: + return "true literal"; + case token_type::literal_false: + return "false literal"; + case token_type::literal_null: + return "null literal"; + case token_type::value_string: + return "string literal"; + case token_type::value_unsigned: + case token_type::value_integer: + case token_type::value_float: + return "number literal"; + case token_type::begin_array: + return "'['"; + case token_type::begin_object: + return "'{'"; + case token_type::end_array: + return "']'"; + case token_type::end_object: + return "'}'"; + case token_type::name_separator: + return "':'"; + case token_type::value_separator: + return "','"; + case token_type::parse_error: + return ""; + case token_type::end_of_input: + return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; + // LCOV_EXCL_START + default: // catch non-enum values + return "unknown token"; + // LCOV_EXCL_STOP + } + } }; - -namespace detail -{ /*! -@brief SAX implementation to create a JSON value from SAX events - -This class implements the @ref json_sax interface and processes the SAX events -to create a JSON value which makes it basically a DOM parser. The structure or -hierarchy of the JSON value is managed by the stack `ref_stack` which contains -a pointer to the respective array or object for each recursion depth. - -After successful parsing, the value that is passed by reference to the -constructor contains the parsed value. +@brief lexical analysis -@tparam BasicJsonType the JSON type +This class organizes the lexical analysis during JSON deserialization. */ -template -class json_sax_dom_parser +template +class lexer : public lexer_base { - public: using number_integer_t = typename BasicJsonType::number_integer_t; using number_unsigned_t = typename BasicJsonType::number_unsigned_t; using number_float_t = typename BasicJsonType::number_float_t; using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; + using char_type = typename InputAdapterType::char_type; + using char_int_type = typename char_traits::int_type; - /*! - @param[in,out] r reference to a JSON value that is manipulated while - parsing - @param[in] allow_exceptions_ whether parse errors yield exceptions - */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) + public: + using token_type = typename lexer_base::token_type; + + explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept + : ia(std::move(adapter)) + , ignore_comments(ignore_comments_) + , decimal_point_char(static_cast(get_decimal_point())) {} - // make class move-only - json_sax_dom_parser(const json_sax_dom_parser&) = delete; - json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; - json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_parser() = default; + // delete because of pointer members + lexer(const lexer&) = delete; + lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + lexer& operator=(lexer&) = delete; + lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~lexer() = default; - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } + private: + ///////////////////// + // locales + ///////////////////// - bool number_integer(number_integer_t val) + /// return the locale-dependent decimal point + JSON_HEDLEY_PURE + static char get_decimal_point() noexcept { - handle_value(val); - return true; + const auto* loc = localeconv(); + JSON_ASSERT(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); } - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } + ///////////////////// + // scan functions + ///////////////////// - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } + /*! + @brief get codepoint from 4 hex characters following `\u` - bool string(string_t& val) - { - handle_value(val); - return true; - } + For input "\u c1 c2 c3 c4" the codepoint is: + (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 + = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } + Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' + must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The + conversion is done by subtracting the offset (0x30, 0x37, and 0x57) + between the ASCII value of the character and the desired integer value. - bool start_object(std::size_t len) + @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or + non-hex character) + */ + int get_codepoint() { - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + // this function only makes sense after reading `\u` + JSON_ASSERT(current == 'u'); + int codepoint = 0; - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + const auto factors = { 12u, 8u, 4u, 0u }; + for (const auto factor : factors) { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + get(); + + if (current >= '0' && current <= '9') + { + codepoint += static_cast((static_cast(current) - 0x30u) << factor); + } + else if (current >= 'A' && current <= 'F') + { + codepoint += static_cast((static_cast(current) - 0x37u) << factor); + } + else if (current >= 'a' && current <= 'f') + { + codepoint += static_cast((static_cast(current) - 0x57u) << factor); + } + else + { + return -1; + } } - return true; + JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); + return codepoint; } - bool key(string_t& val) - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); + /*! + @brief check if the next byte(s) are inside a given range - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); - return true; - } + Adds the current byte and, for each passed range, reads a new byte and + checks if it is inside the range. If a violation was detected, set up an + error message and return false. Otherwise, return true. - bool end_object() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_object()); + @param[in] ranges list of integers; interpreted as list of pairs of + inclusive lower and upper bound, respectively - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } + @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, + 1, 2, or 3 pairs. This precondition is enforced by an assertion. - bool start_array(std::size_t len) + @return true if and only if no range violation was detected + */ + bool next_byte_in_range(std::initializer_list ranges) { - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); + add(current); - if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + for (auto range = ranges.begin(); range != ranges.end(); ++range) { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + get(); + if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions) + { + add(current); + } + else + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return false; + } } return true; } - bool end_array() - { - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(ref_stack.back()->is_array()); - - ref_stack.back()->set_parents(); - ref_stack.pop_back(); - return true; - } + /*! + @brief scan a string literal - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } + This function scans a string according to Sect. 7 of RFC 8259. While + scanning, bytes are escaped and copied into buffer token_buffer. Then the + function returns successfully, token_buffer is *not* null-terminated (as it + may contain \0 bytes), and token_buffer.size() is the number of bytes in the + string. - constexpr bool is_errored() const - { - return errored; - } + @return token_type::value_string if string could be successfully scanned, + token_type::parse_error otherwise - private: - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements + @note In case of errors, variable error_message contains a textual + description. */ - template - JSON_HEDLEY_RETURNS_NON_NULL - BasicJsonType* handle_value(Value&& v) + token_type scan_string() { - if (ref_stack.empty()) - { - root = BasicJsonType(std::forward(v)); - return &root; - } + // reset token_buffer (ignore opening quote) + reset(); - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + // we entered the function by reading an open quote + JSON_ASSERT(current == '\"'); - if (ref_stack.back()->is_array()) + while (true) { - ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); - return &(ref_stack.back()->m_data.m_value.array->back()); - } - - JSON_ASSERT(ref_stack.back()->is_object()); - JSON_ASSERT(object_element); - *object_element = BasicJsonType(std::forward(v)); - return object_element; - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; -}; - -template -class json_sax_dom_callback_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; - using parser_callback_t = typename BasicJsonType::parser_callback_t; - using parse_event_t = typename BasicJsonType::parse_event_t; - - json_sax_dom_callback_parser(BasicJsonType& r, - const parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(cb), allow_exceptions(allow_exceptions_) - { - keep_stack.push_back(true); - } + // get next character + switch (get()) + { + // end of file while parsing string + case char_traits::eof(): + { + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } - // make class move-only - json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; - json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~json_sax_dom_callback_parser() = default; + // closing quote + case '\"': + { + return token_type::value_string; + } - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } + // escapes + case '\\': + { + switch (get()) + { + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } + // unicode escapes + case 'u': + { + const int codepoint1 = get_codepoint(); + int codepoint = codepoint1; // start with codepoint1 - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } + if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } + // check if code point is a high surrogate + if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) + { + const int codepoint2 = get_codepoint(); - bool string(string_t& val) - { - handle_value(val); - return true; - } + if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } - bool binary(binary_t& val) - { - handle_value(std::move(val)); - return true; - } + // check if codepoint2 is a low surrogate + if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) + { + // overwrite codepoint + codepoint = static_cast( + // high surrogate occupies the most significant 22 bits + (static_cast(codepoint1) << 10u) + // low surrogate occupies the least significant 15 bits + + static_cast(codepoint2) + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result, so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00u); + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + } - bool start_object(std::size_t len) - { - // check callback for object start - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); - keep_stack.push_back(keep); + // result of the above calculation yields a proper codepoint + JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); - auto val = handle_value(BasicJsonType::value_t::object, true); - ref_stack.push_back(val.second); + // translate codepoint into bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(static_cast(codepoint)); + } + else if (codepoint <= 0x7FF) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(static_cast(0xC0u | (static_cast(codepoint) >> 6u))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } + else if (codepoint <= 0xFFFF) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(static_cast(0xE0u | (static_cast(codepoint) >> 12u))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(static_cast(0xF0u | (static_cast(codepoint) >> 18u))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 12u) & 0x3Fu))); + add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); + add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); + } - // check object limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); - } + break; + } - return true; - } + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; + } - bool key(string_t& val) - { - BasicJsonType k = BasicJsonType(val); + break; + } - // check callback for key - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); - key_keep_stack.push_back(keep); + // invalid control characters + case 0x00: + { + error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; + return token_type::parse_error; + } - // add discarded value at given key and store the reference for later - if (keep && ref_stack.back()) - { - object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); - } + case 0x01: + { + error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; + return token_type::parse_error; + } - return true; - } + case 0x02: + { + error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; + return token_type::parse_error; + } - bool end_object() - { - if (ref_stack.back()) - { - if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) - { - // discard object - *ref_stack.back() = discarded; - } - else - { - ref_stack.back()->set_parents(); - } - } + case 0x03: + { + error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; + return token_type::parse_error; + } - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); + case 0x04: + { + error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; + return token_type::parse_error; + } - if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) - { - // remove discarded value - for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) - { - if (it->is_discarded()) + case 0x05: { - ref_stack.back()->erase(it); - break; + error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; + return token_type::parse_error; } - } - } - return true; - } + case 0x06: + { + error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; + return token_type::parse_error; + } - bool start_array(std::size_t len) - { - const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); - keep_stack.push_back(keep); + case 0x07: + { + error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; + return token_type::parse_error; + } - auto val = handle_value(BasicJsonType::value_t::array, true); - ref_stack.push_back(val.second); + case 0x08: + { + error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; + return token_type::parse_error; + } - // check array limit - if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); - } + case 0x09: + { + error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; + return token_type::parse_error; + } - return true; - } + case 0x0A: + { + error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; + return token_type::parse_error; + } - bool end_array() - { - bool keep = true; + case 0x0B: + { + error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; + return token_type::parse_error; + } - if (ref_stack.back()) - { - keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); - if (keep) - { - ref_stack.back()->set_parents(); - } - else - { - // discard array - *ref_stack.back() = discarded; - } - } + case 0x0C: + { + error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; + return token_type::parse_error; + } - JSON_ASSERT(!ref_stack.empty()); - JSON_ASSERT(!keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); + case 0x0D: + { + error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; + return token_type::parse_error; + } - // remove discarded value - if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->pop_back(); - } + case 0x0E: + { + error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; + return token_type::parse_error; + } - return true; - } + case 0x0F: + { + error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; + return token_type::parse_error; + } - template - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const Exception& ex) - { - errored = true; - static_cast(ex); - if (allow_exceptions) - { - JSON_THROW(ex); - } - return false; - } + case 0x10: + { + error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; + return token_type::parse_error; + } - constexpr bool is_errored() const - { - return errored; - } + case 0x11: + { + error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; + return token_type::parse_error; + } - private: - /*! - @param[in] v value to add to the JSON value we build during parsing - @param[in] skip_callback whether we should skip calling the callback - function; this is required after start_array() and - start_object() SAX events, because otherwise we would call the - callback function with an empty array or object, respectively. + case 0x12: + { + error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; + return token_type::parse_error; + } - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements + case 0x13: + { + error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; + return token_type::parse_error; + } - @return pair of boolean (whether value should be kept) and pointer (to the - passed value in the ref_stack hierarchy; nullptr if not kept) - */ - template - std::pair handle_value(Value&& v, const bool skip_callback = false) - { - JSON_ASSERT(!keep_stack.empty()); + case 0x14: + { + error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; + return token_type::parse_error; + } - // do not handle this value if we know it would be added to a discarded - // container - if (!keep_stack.back()) - { - return {false, nullptr}; - } + case 0x15: + { + error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; + return token_type::parse_error; + } - // create value - auto value = BasicJsonType(std::forward(v)); + case 0x16: + { + error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; + return token_type::parse_error; + } - // check callback - const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); + case 0x17: + { + error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; + return token_type::parse_error; + } - // do not handle this value if we just learnt it shall be discarded - if (!keep) - { - return {false, nullptr}; - } + case 0x18: + { + error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; + return token_type::parse_error; + } - if (ref_stack.empty()) - { - root = std::move(value); - return {true, & root}; - } + case 0x19: + { + error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; + return token_type::parse_error; + } - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (!ref_stack.back()) - { - return {false, nullptr}; - } + case 0x1A: + { + error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; + return token_type::parse_error; + } - // we now only expect arrays and objects - JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + case 0x1B: + { + error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; + return token_type::parse_error; + } - // array - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); - return {true, & (ref_stack.back()->m_data.m_value.array->back())}; - } + case 0x1C: + { + error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; + return token_type::parse_error; + } - // object - JSON_ASSERT(ref_stack.back()->is_object()); - // check if we should store an element for the current key - JSON_ASSERT(!key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); + case 0x1D: + { + error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; + return token_type::parse_error; + } - if (!store_element) - { - return {false, nullptr}; - } + case 0x1E: + { + error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; + return token_type::parse_error; + } - JSON_ASSERT(object_element); - *object_element = std::move(value); - return {true, object_element}; - } + case 0x1F: + { + error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; + return token_type::parse_error; + } - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector ref_stack {}; - /// stack to manage which values to keep - std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) - /// stack to manage which object keys to keep - std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// callback function - const parser_callback_t callback = nullptr; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// a discarded value for the callback - BasicJsonType discarded = BasicJsonType::value_t::discarded; -}; + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + { + if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) + { + return token_type::parse_error; + } + break; + } + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xE0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xEE: + case 0xEF: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xED: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xF0: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xF1: + case 0xF2: + case 0xF3: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xF4: + { + if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // remaining bytes (80..C1 and F5..FF) are ill-formed + default: + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + } + } + } + + /*! + * @brief scan a comment + * @return whether comment could be scanned successfully + */ + bool scan_comment() + { + switch (get()) + { + // single-line comments skip input until a newline or EOF is read + case '/': + { + while (true) + { + switch (get()) + { + case '\n': + case '\r': + case char_traits::eof(): + case '\0': + return true; + + default: + break; + } + } + } + + // multi-line comments skip input until */ is read + case '*': + { + while (true) + { + switch (get()) + { + case char_traits::eof(): + case '\0': + { + error_message = "invalid comment; missing closing '*/'"; + return false; + } + + case '*': + { + switch (get()) + { + case '/': + return true; + + default: + { + unget(); + continue; + } + } + } + + default: + continue; + } + } + } + + // unexpected character after reading '/' + default: + { + error_message = "invalid comment; expecting '/' or '*' after '/'"; + return false; + } + } + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + JSON_HEDLEY_NON_NULL(2) + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + + /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 8259. + + The function is realized with a deterministic finite state machine derived + from the grammar described in RFC 8259. Starting in state "init", the + input is read and used to determined the next state. Only state "done" + accepts the number. State "error" is a trap state to model errors. In the + table below, "anything" means any character but the ones listed before. + + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in token_buffer. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. + */ + token_type scan_number() // lgtm [cpp/use-of-goto] + { + // reset token_buffer to store the number's bytes + reset(); + + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; -template -class json_sax_acceptor -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using binary_t = typename BasicJsonType::binary_t; + // state (init): we just found out we need to scan a number + switch (current) + { + case '-': + { + add(current); + goto scan_number_minus; + } - bool null() - { - return true; - } + case '0': + { + add(current); + goto scan_number_zero; + } - bool boolean(bool /*unused*/) - { - return true; - } + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } - bool number_integer(number_integer_t /*unused*/) - { - return true; - } + // all other characters are rejected outside scan_number() + default: // LCOV_EXCL_LINE + JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE + } - bool number_unsigned(number_unsigned_t /*unused*/) - { - return true; - } +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } - bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) - { - return true; - } + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } - bool string(string_t& /*unused*/) - { - return true; - } + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } - bool binary(binary_t& /*unused*/) - { - return true; - } +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } - bool start_object(std::size_t /*unused*/ = static_cast(-1)) - { - return true; - } + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } - bool key(string_t& /*unused*/) - { - return true; - } + default: + goto scan_number_done; + } - bool end_object() - { - return true; - } +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } - bool start_array(std::size_t /*unused*/ = static_cast(-1)) - { - return true; - } + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } - bool end_array() - { - return true; - } + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) - { - return false; - } -}; + default: + goto scan_number_done; + } -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + default: + goto scan_number_done; + } -#include // array -#include // localeconv -#include // size_t -#include // snprintf -#include // strtof, strtod, strtold, strtoll, strtoull -#include // initializer_list -#include // char_traits, string -#include // move -#include // vector +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } -// #include + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } -// #include + default: + { + error_message = + "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } -// #include +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } -// #include + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ + default: + goto scan_number_done; + } -/////////// -// lexer // -/////////// +scan_number_done: + // unget the character after the number (we only read it to know that + // we are done scanning a number) + unget(); -template -class lexer_base -{ - public: - /// token types for the parser - enum class token_type - { - uninitialized, ///< indicating the scanner is uninitialized - literal_true, ///< the `true` literal - literal_false, ///< the `false` literal - literal_null, ///< the `null` literal - value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value - value_integer, ///< a signed integer -- use get_number_integer() for actual value - value_float, ///< an floating point number -- use get_number_float() for actual value - begin_array, ///< the character for array begin `[` - begin_object, ///< the character for object begin `{` - end_array, ///< the character for array end `]` - end_object, ///< the character for object end `}` - name_separator, ///< the name separator `:` - value_separator, ///< the value separator `,` - parse_error, ///< indicating a parse error - end_of_input, ///< indicating the end of the input buffer - literal_or_value ///< a literal or the begin of a value (only for diagnostics) - }; + char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + errno = 0; - /// return name of values of type token_type (only used for errors) - JSON_HEDLEY_RETURNS_NON_NULL - JSON_HEDLEY_CONST - static const char* token_type_name(const token_type t) noexcept - { - switch (t) + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) { - case token_type::uninitialized: - return ""; - case token_type::literal_true: - return "true literal"; - case token_type::literal_false: - return "false literal"; - case token_type::literal_null: - return "null literal"; - case token_type::value_string: - return "string literal"; - case token_type::value_unsigned: - case token_type::value_integer: - case token_type::value_float: - return "number literal"; - case token_type::begin_array: - return "'['"; - case token_type::begin_object: - return "'{'"; - case token_type::end_array: - return "']'"; - case token_type::end_object: - return "'}'"; - case token_type::name_separator: - return "':'"; - case token_type::value_separator: - return "','"; - case token_type::parse_error: - return ""; - case token_type::end_of_input: - return "end of input"; - case token_type::literal_or_value: - return "'[', '{', or a literal"; - // LCOV_EXCL_START - default: // catch non-enum values - return "unknown token"; - // LCOV_EXCL_STOP - } - } -}; -/*! -@brief lexical analysis + const auto x = std::strtoull(token_buffer.data(), &endptr, 10); -This class organizes the lexical analysis during JSON deserialization. -*/ -template -class lexer : public lexer_base -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using char_type = typename InputAdapterType::char_type; - using char_int_type = typename char_traits::int_type; + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - public: - using token_type = typename lexer_base::token_type; + if (errno == 0) + { + value_unsigned = static_cast(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + const auto x = std::strtoll(token_buffer.data(), &endptr, 10); - explicit lexer(InputAdapterType&& adapter, bool ignore_comments_ = false) noexcept - : ia(std::move(adapter)) - , ignore_comments(ignore_comments_) - , decimal_point_char(static_cast(get_decimal_point())) - {} + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - // delete because of pointer members - lexer(const lexer&) = delete; - lexer(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - lexer& operator=(lexer&) = delete; - lexer& operator=(lexer&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) - ~lexer() = default; + if (errno == 0) + { + value_integer = static_cast(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } - private: - ///////////////////// - // locales - ///////////////////// + // this code is reached if we parse a floating-point number or if an + // integer conversion above failed + strtof(value_float, token_buffer.data(), &endptr); - /// return the locale-dependent decimal point - JSON_HEDLEY_PURE - static char get_decimal_point() noexcept - { - const auto* loc = localeconv(); - JSON_ASSERT(loc != nullptr); - return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); - } + // we checked the number format before + JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); - ///////////////////// - // scan functions - ///////////////////// + return token_type::value_float; + } /*! - @brief get codepoint from 4 hex characters following `\u` - - For input "\u c1 c2 c3 c4" the codepoint is: - (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 - = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) - - Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' - must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The - conversion is done by subtracting the offset (0x30, 0x37, and 0x57) - between the ASCII value of the character and the desired integer value. - - @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or - non-hex character) + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success */ - int get_codepoint() + JSON_HEDLEY_NON_NULL(2) + token_type scan_literal(const char_type* literal_text, const std::size_t length, + token_type return_type) { - // this function only makes sense after reading `\u` - JSON_ASSERT(current == 'u'); - int codepoint = 0; - - const auto factors = { 12u, 8u, 4u, 0u }; - for (const auto factor : factors) + JSON_ASSERT(char_traits::to_char_type(current) == literal_text[0]); + for (std::size_t i = 1; i < length; ++i) { - get(); - - if (current >= '0' && current <= '9') - { - codepoint += static_cast((static_cast(current) - 0x30u) << factor); - } - else if (current >= 'A' && current <= 'F') - { - codepoint += static_cast((static_cast(current) - 0x37u) << factor); - } - else if (current >= 'a' && current <= 'f') - { - codepoint += static_cast((static_cast(current) - 0x57u) << factor); - } - else + if (JSON_HEDLEY_UNLIKELY(char_traits::to_char_type(get()) != literal_text[i])) { - return -1; + error_message = "invalid literal"; + return token_type::parse_error; } } - - JSON_ASSERT(0x0000 <= codepoint && codepoint <= 0xFFFF); - return codepoint; + return return_type; } - /*! - @brief check if the next byte(s) are inside a given range + ///////////////////// + // input management + ///////////////////// - Adds the current byte and, for each passed range, reads a new byte and - checks if it is inside the range. If a violation was detected, set up an - error message and return false. Otherwise, return true. + /// reset token_buffer; current character is beginning of token + void reset() noexcept + { + token_buffer.clear(); + token_string.clear(); + token_string.push_back(char_traits::to_char_type(current)); + } - @param[in] ranges list of integers; interpreted as list of pairs of - inclusive lower and upper bound, respectively + /* + @brief get next character from the input - @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, - 1, 2, or 3 pairs. This precondition is enforced by an assertion. + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a + `char_traits::eof()` in that case. Stores the scanned characters + for use in error messages. - @return true if and only if no range violation was detected + @return character read from the input */ - bool next_byte_in_range(std::initializer_list ranges) + char_int_type get() { - JSON_ASSERT(ranges.size() == 2 || ranges.size() == 4 || ranges.size() == 6); - add(current); + ++position.chars_read_total; + ++position.chars_read_current_line; - for (auto range = ranges.begin(); range != ranges.end(); ++range) + if (next_unget) { - get(); - if (JSON_HEDLEY_LIKELY(*range <= current && current <= *(++range))) // NOLINT(bugprone-inc-dec-in-conditions) - { - add(current); - } - else - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return false; - } + // just reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia.get_character(); } - return true; - } + if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) + { + token_string.push_back(char_traits::to_char_type(current)); + } - /*! - @brief scan a string literal + if (current == '\n') + { + ++position.lines_read; + position.chars_read_current_line = 0; + } - This function scans a string according to Sect. 7 of RFC 8259. While - scanning, bytes are escaped and copied into buffer token_buffer. Then the - function returns successfully, token_buffer is *not* null-terminated (as it - may contain \0 bytes), and token_buffer.size() is the number of bytes in the - string. + return current; + } - @return token_type::value_string if string could be successfully scanned, - token_type::parse_error otherwise + /*! + @brief unget current character (read it again on next get) - @note In case of errors, variable error_message contains a textual - description. + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read_total, + chars_read_current_line, and token_string. The next call to get() will + behave as if the unget character is read again. */ - token_type scan_string() + void unget() { - // reset token_buffer (ignore opening quote) - reset(); + next_unget = true; - // we entered the function by reading an open quote - JSON_ASSERT(current == '\"'); + --position.chars_read_total; - while (true) + // in case we "unget" a newline, we have to also decrement the lines_read + if (position.chars_read_current_line == 0) { - // get next character - switch (get()) + if (position.lines_read > 0) { - // end of file while parsing string - case char_traits::eof(): - { - error_message = "invalid string: missing closing quote"; - return token_type::parse_error; - } - - // closing quote - case '\"': - { - return token_type::value_string; - } - - // escapes - case '\\': - { - switch (get()) - { - // quotation mark - case '\"': - add('\"'); - break; - // reverse solidus - case '\\': - add('\\'); - break; - // solidus - case '/': - add('/'); - break; - // backspace - case 'b': - add('\b'); - break; - // form feed - case 'f': - add('\f'); - break; - // line feed - case 'n': - add('\n'); - break; - // carriage return - case 'r': - add('\r'); - break; - // tab - case 't': - add('\t'); - break; - - // unicode escapes - case 'u': - { - const int codepoint1 = get_codepoint(); - int codepoint = codepoint1; // start with codepoint1 + --position.lines_read; + } + } + else + { + --position.chars_read_current_line; + } - if (JSON_HEDLEY_UNLIKELY(codepoint1 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } + if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) + { + JSON_ASSERT(!token_string.empty()); + token_string.pop_back(); + } + } - // check if code point is a high surrogate - if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) - { - // expect next \uxxxx entry - if (JSON_HEDLEY_LIKELY(get() == '\\' && get() == 'u')) - { - const int codepoint2 = get_codepoint(); + /// add a character to token_buffer + void add(char_int_type c) + { + token_buffer.push_back(static_cast(c)); + } - if (JSON_HEDLEY_UNLIKELY(codepoint2 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } + public: + ///////////////////// + // value getters + ///////////////////// - // check if codepoint2 is a low surrogate - if (JSON_HEDLEY_LIKELY(0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) - { - // overwrite codepoint - codepoint = static_cast( - // high surrogate occupies the most significant 22 bits - (static_cast(codepoint1) << 10u) - // low surrogate occupies the least significant 15 bits - + static_cast(codepoint2) - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result, so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00u); - } - else - { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - error_message = "invalid string: surrogate U+D800..U+DBFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - if (JSON_HEDLEY_UNLIKELY(0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF)) - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; - return token_type::parse_error; - } - } + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } - // result of the above calculation yields a proper codepoint - JSON_ASSERT(0x00 <= codepoint && codepoint <= 0x10FFFF); + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } - // translate codepoint into bytes - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - add(static_cast(codepoint)); - } - else if (codepoint <= 0x7FF) - { - // 2-byte characters: 110xxxxx 10xxxxxx - add(static_cast(0xC0u | (static_cast(codepoint) >> 6u))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } - else if (codepoint <= 0xFFFF) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - add(static_cast(0xE0u | (static_cast(codepoint) >> 12u))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } - else - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - add(static_cast(0xF0u | (static_cast(codepoint) >> 18u))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 12u) & 0x3Fu))); - add(static_cast(0x80u | ((static_cast(codepoint) >> 6u) & 0x3Fu))); - add(static_cast(0x80u | (static_cast(codepoint) & 0x3Fu))); - } + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } - break; - } + /// return current string value (implicitly resets the token; useful only once) + string_t& get_string() + { + return token_buffer; + } - // other characters after escape - default: - error_message = "invalid string: forbidden character after backslash"; - return token_type::parse_error; - } + ///////////////////// + // diagnostics + ///////////////////// - break; - } + /// return position of last read token + constexpr position_t get_position() const noexcept + { + return position; + } - // invalid control characters - case 0x00: - { - error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; - return token_type::parse_error; - } + /// return the last read token (for errors only). Will never contain EOF + /// (an arbitrary value that is not a valid char value, often -1), because + /// 255 may legitimately occur. May contain NUL, which should be escaped. + std::string get_token_string() const + { + // escape control characters + std::string result; + for (const auto c : token_string) + { + if (static_cast(c) <= '\x1F') + { + // escape control characters + std::array cs{{}}; + static_cast((std::snprintf)(cs.data(), cs.size(), "", static_cast(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + result += cs.data(); + } + else + { + // add character as is + result.push_back(static_cast(c)); + } + } - case 0x01: - { - error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; - return token_type::parse_error; - } + return result; + } - case 0x02: - { - error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; - return token_type::parse_error; - } + /// return syntax error message + JSON_HEDLEY_RETURNS_NON_NULL + constexpr const char* get_error_message() const noexcept + { + return error_message; + } - case 0x03: - { - error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; - return token_type::parse_error; - } + ///////////////////// + // actual scanner + ///////////////////// - case 0x04: - { - error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; - return token_type::parse_error; - } + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped + */ + bool skip_bom() + { + if (get() == 0xEF) + { + // check if we completely parse the BOM + return get() == 0xBB && get() == 0xBF; + } - case 0x05: - { - error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; - return token_type::parse_error; - } + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } - case 0x06: - { - error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; - return token_type::parse_error; - } + void skip_whitespace() + { + do + { + get(); + } + while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); + } - case 0x07: - { - error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; - return token_type::parse_error; - } + token_type scan() + { + // initially, skip the BOM + if (position.chars_read_total == 0 && !skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } - case 0x08: - { - error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; - return token_type::parse_error; - } + // read next character and ignore whitespace + skip_whitespace(); - case 0x09: - { - error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; - return token_type::parse_error; - } + // ignore comments + while (ignore_comments && current == '/') + { + if (!scan_comment()) + { + return token_type::parse_error; + } - case 0x0A: - { - error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; - return token_type::parse_error; - } + // skip following whitespace + skip_whitespace(); + } - case 0x0B: - { - error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; - return token_type::parse_error; - } + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; - case 0x0C: - { - error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; - return token_type::parse_error; - } + // literals + case 't': + { + std::array true_literal = {{static_cast('t'), static_cast('r'), static_cast('u'), static_cast('e')}}; + return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); + } + case 'f': + { + std::array false_literal = {{static_cast('f'), static_cast('a'), static_cast('l'), static_cast('s'), static_cast('e')}}; + return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); + } + case 'n': + { + std::array null_literal = {{static_cast('n'), static_cast('u'), static_cast('l'), static_cast('l')}}; + return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); + } - case 0x0D: - { - error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; - return token_type::parse_error; - } + // string + case '\"': + return scan_string(); - case 0x0E: - { - error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; - return token_type::parse_error; - } + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); - case 0x0F: - { - error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; - return token_type::parse_error; - } + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case char_traits::eof(): + return token_type::end_of_input; - case 0x10: - { - error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; - return token_type::parse_error; - } + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } + } - case 0x11: - { - error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; - return token_type::parse_error; - } + private: + /// input adapter + InputAdapterType ia; - case 0x12: - { - error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; - return token_type::parse_error; - } + /// whether comments should be ignored (true) or signaled as errors (false) + const bool ignore_comments = false; - case 0x13: - { - error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; - return token_type::parse_error; - } + /// the current character + char_int_type current = char_traits::eof(); - case 0x14: - { - error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; - return token_type::parse_error; - } + /// whether the next get() call should just return current + bool next_unget = false; - case 0x15: - { - error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; - return token_type::parse_error; - } + /// the start position of the current token + position_t position {}; - case 0x16: - { - error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; - return token_type::parse_error; - } + /// raw input token string (for error messages) + std::vector token_string {}; - case 0x17: - { - error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; - return token_type::parse_error; - } + /// buffer for variable-length tokens (numbers, strings) + string_t token_buffer {}; - case 0x18: - { - error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; - return token_type::parse_error; - } + /// a description of occurred lexer errors + const char* error_message = ""; - case 0x19: - { - error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; - return token_type::parse_error; - } + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; - case 0x1A: - { - error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; - return token_type::parse_error; - } + /// the decimal point + const char_int_type decimal_point_char = '.'; +}; - case 0x1B: - { - error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; - return token_type::parse_error; - } +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END - case 0x1C: - { - error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; - return token_type::parse_error; - } +NLOHMANN_JSON_NAMESPACE_BEGIN - case 0x1D: - { - error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; - return token_type::parse_error; - } +/*! +@brief SAX interface - case 0x1E: - { - error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; - return token_type::parse_error; - } +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template +struct json_sax +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; - case 0x1F: - { - error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; - return token_type::parse_error; - } + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; - // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case 0x20: - case 0x21: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4A: - case 0x4B: - case 0x4C: - case 0x4D: - case 0x4E: - case 0x4F: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5D: - case 0x5E: - case 0x5F: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: - { - add(current); - break; - } + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; - // U+0080..U+07FF: bytes C2..DF 80..BF - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - { - if (JSON_HEDLEY_UNLIKELY(!next_byte_in_range({0x80, 0xBF}))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; - // U+0800..U+0FFF: bytes E0 A0..BF 80..BF - case 0xE0: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief a floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; - // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF - // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xEE: - case 0xEF: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief a string value was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string value. + */ + virtual bool string(string_t& val) = 0; - // U+D000..U+D7FF: bytes ED 80..9F 80..BF - case 0xED: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief a binary value was read + @param[in] val binary value + @return whether parsing should proceed + @note It is safe to move the passed binary value. + */ + virtual bool binary(binary_t& val) = 0; - // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - case 0xF0: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; - // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - case 0xF1: - case 0xF2: - case 0xF3: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; - // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - case 0xF4: - { - if (JSON_HEDLEY_UNLIKELY(!(next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; - // remaining bytes (80..C1 and F5..FF) are ill-formed - default: - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - } - } - } + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; /*! - * @brief scan a comment - * @return whether comment could be scanned successfully - */ - bool scan_comment() - { - switch (get()) - { - // single-line comments skip input until a newline or EOF is read - case '/': - { - while (true) - { - switch (get()) - { - case '\n': - case '\r': - case char_traits::eof(): - case '\0': - return true; + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; - default: - break; - } - } - } + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; - // multi-line comments skip input until */ is read - case '*': - { - while (true) - { - switch (get()) - { - case char_traits::eof(): - case '\0': - { - error_message = "invalid comment; missing closing '*/'"; - return false; - } + json_sax() = default; + json_sax(const json_sax&) = default; + json_sax(json_sax&&) noexcept = default; + json_sax& operator=(const json_sax&) = default; + json_sax& operator=(json_sax&&) noexcept = default; + virtual ~json_sax() = default; +}; - case '*': - { - switch (get()) - { - case '/': - return true; +namespace detail +{ +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ +template +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using lexer_t = lexer; - default: - { - unget(); - continue; - } - } - } + /*! + @param[in,out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, const lexer_t* lexer_ = nullptr) + : root(r), allow_exceptions(allow_exceptions_), m_lexer(lexer_) + {} - default: - continue; - } - } - } + // make class move-only + json_sax_dom_parser(const json_sax_dom_parser&) = delete; + json_sax_dom_parser(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_parser& operator=(const json_sax_dom_parser&) = delete; + json_sax_dom_parser& operator=(json_sax_dom_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_parser() = default; - // unexpected character after reading '/' - default: - { - error_message = "invalid comment; expecting '/' or '*' after '/'"; - return false; - } - } + bool null() + { + handle_value(nullptr); + return true; } - JSON_HEDLEY_NON_NULL(2) - static void strtof(float& f, const char* str, char** endptr) noexcept + bool boolean(bool val) { - f = std::strtof(str, endptr); + handle_value(val); + return true; } - JSON_HEDLEY_NON_NULL(2) - static void strtof(double& f, const char* str, char** endptr) noexcept + bool number_integer(number_integer_t val) { - f = std::strtod(str, endptr); + handle_value(val); + return true; } - JSON_HEDLEY_NON_NULL(2) - static void strtof(long double& f, const char* str, char** endptr) noexcept + bool number_unsigned(number_unsigned_t val) { - f = std::strtold(str, endptr); + handle_value(val); + return true; } - /*! - @brief scan a number literal + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } - This function scans a string according to Sect. 6 of RFC 8259. + bool string(string_t& val) + { + handle_value(val); + return true; + } - The function is realized with a deterministic finite state machine derived - from the grammar described in RFC 8259. Starting in state "init", the - input is read and used to determined the next state. Only state "done" - accepts the number. State "error" is a trap state to model errors. In the - table below, "anything" means any character but the ones listed before. + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } - state | 0 | 1-9 | e E | + | - | . | anything - ---------|----------|----------|----------|---------|---------|----------|----------- - init | zero | any1 | [error] | [error] | minus | [error] | [error] - minus | zero | any1 | [error] | [error] | [error] | [error] | [error] - zero | done | done | exponent | done | done | decimal1 | done - any1 | any1 | any1 | exponent | done | done | decimal1 | done - decimal1 | decimal2 | decimal2 | [error] | [error] | [error] | [error] | [error] - decimal2 | decimal2 | decimal2 | exponent | done | done | done | done - exponent | any2 | any2 | [error] | sign | sign | [error] | [error] - sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] - any2 | any2 | any2 | done | done | done | done | done + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - The state machine is realized with one label per state (prefixed with - "scan_number_") and `goto` statements between them. The state machine - contains cycles, but any cycle can be left when EOF is read. Therefore, - the function is guaranteed to terminate. + if (m_lexer) + { + if (!ref_stack.empty()) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + } - During scanning, the read bytes are stored in token_buffer. This string is - then converted to a signed integer, an unsigned integer, or a - floating-point number. + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); + } - @return token_type::value_unsigned, token_type::value_integer, or - token_type::value_float if number could be successfully scanned, - token_type::parse_error otherwise + return true; + } - @note The scanner is independent of the current locale. Internally, the - locale's decimal point is used instead of `.` to work with the - locale-dependent converters. - */ - token_type scan_number() // lgtm [cpp/use-of-goto] + bool key(string_t& val) { - // reset token_buffer to store the number's bytes - reset(); + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); - // the type of the parsed number; initially set to unsigned; will be - // changed if minus sign, decimal point or exponent is read - token_type number_type = token_type::value_unsigned; + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val)); + return true; + } - // state (init): we just found out we need to scan a number - switch (current) + bool end_object() + { + if (m_lexer) { - case '-': + if (!ref_stack.empty()) { - add(current); - goto scan_number_minus; + (*ref_stack.rbegin())->end_position = m_lexer->get_position() - 1; } + } - case '0': - { - add(current); - goto scan_number_zero; - } + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_object()); - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } - // all other characters are rejected outside scan_number() - default: // LCOV_EXCL_LINE - JSON_ASSERT(false); // NOLINT(cert-dcl03-c,hicpp-static-assert,misc-static-assert) LCOV_EXCL_LINE - } + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); -scan_number_minus: - // state: we just parsed a leading minus sign - number_type = token_type::value_integer; - switch (get()) + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { - case '0': - { - add(current); - goto scan_number_zero; - } + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } + return true; + } - default: - { - error_message = "invalid number; expected digit after '-'"; - return token_type::parse_error; - } - } + bool end_array() + { + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(ref_stack.back()->is_array()); -scan_number_zero: - // state: we just parse a zero (maybe with a leading minus sign) - switch (get()) + ref_stack.back()->set_parents(); + ref_stack.pop_back(); + return true; + } + + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) { - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } + JSON_THROW(ex); + } + return false; + } - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } + constexpr bool is_errored() const + { + return errored; + } - default: - goto scan_number_done; + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template + JSON_HEDLEY_RETURNS_NON_NULL + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward(v)); + return &root; } -scan_number_any1: - // state: we just parsed a number 0-9 (maybe with a leading minus sign) - switch (get()) + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } + ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + return &(ref_stack.back()->m_data.m_value.array->back()); + } - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } + JSON_ASSERT(ref_stack.back()->is_object()); + JSON_ASSERT(object_element); + *object_element = BasicJsonType(std::forward(v)); + return object_element; + } - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// the lexer to obtain the current position + const lexer_t* m_lexer = nullptr; +}; - default: - goto scan_number_done; - } +template +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + using lexer_t = lexer; -scan_number_decimal1: - // state: we just parsed a decimal point - number_type = token_type::value_float; - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } + json_sax_dom_callback_parser(BasicJsonType& r, + const parser_callback_t cb, + const bool allow_exceptions_ = true, + const lexer_t* lexer_ = nullptr) + : root(r), callback(cb), allow_exceptions(allow_exceptions_), m_lexer(lexer_) + { + keep_stack.push_back(true); + } - default: - { - error_message = "invalid number; expected digit after '.'"; - return token_type::parse_error; - } - } + // make class move-only + json_sax_dom_callback_parser(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + json_sax_dom_callback_parser& operator=(const json_sax_dom_callback_parser&) = delete; + json_sax_dom_callback_parser& operator=(json_sax_dom_callback_parser&&) = default; // NOLINT(hicpp-noexcept-move,performance-noexcept-move-constructor) + ~json_sax_dom_callback_parser() = default; -scan_number_decimal2: - // we just parsed at least one number after a decimal point - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } + bool null() + { + handle_value(nullptr); + return true; + } - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } + bool boolean(bool val) + { + handle_value(val); + return true; + } - default: - goto scan_number_done; - } + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } -scan_number_exponent: - // we just parsed an exponent - number_type = token_type::value_float; - switch (get()) - { - case '+': - case '-': - { - add(current); - goto scan_number_sign; - } + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } - default: - { - error_message = - "invalid number; expected '+', '-', or digit after exponent"; - return token_type::parse_error; - } - } + bool string(string_t& val) + { + handle_value(val); + return true; + } -scan_number_sign: - // we just parsed an exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } + bool binary(binary_t& val) + { + handle_value(std::move(val)); + return true; + } - default: - { - error_message = "invalid number; expected digit after exponent sign"; - return token_type::parse_error; - } - } + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); -scan_number_any2: - // we just parsed a number after the exponent or exponent sign - switch (get()) + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + if (m_lexer && ref_stack.back()) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } - default: - goto scan_number_done; + // check object limit + if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back())); } -scan_number_done: - // unget the character after the number (we only read it to know that - // we are done scanning a number) - unget(); + return true; + } - char* endptr = nullptr; // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) - errno = 0; + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); - // try to parse integers first and fall back to floats - if (number_type == token_type::value_unsigned) + // check callback for key + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep && ref_stack.back()) { - const auto x = std::strtoull(token_buffer.data(), &endptr, 10); + object_element = &(ref_stack.back()->m_data.m_value.object->operator[](val) = discarded); + } - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + return true; + } - if (errno == 0) + bool end_object() + { + if (ref_stack.back()) + { + if (!callback(static_cast(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) { - value_unsigned = static_cast(x); - if (value_unsigned == x) + // discard object + *ref_stack.back() = discarded; + } + else + { + if (m_lexer) { - return token_type::value_unsigned; + ref_stack.back()->end_position = m_lexer->get_position() - 1; } + ref_stack.back()->set_parents(); } } - else if (number_type == token_type::value_integer) - { - const auto x = std::strtoll(token_buffer.data(), &endptr, 10); - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); - if (errno == 0) + if (!ref_stack.empty() && ref_stack.back() && ref_stack.back()->is_structured()) + { + // remove discarded value + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) { - value_integer = static_cast(x); - if (value_integer == x) + if (it->is_discarded()) { - return token_type::value_integer; + ref_stack.back()->erase(it); + break; } } } - // this code is reached if we parse a floating-point number or if an - // integer conversion above failed - strtof(value_float, token_buffer.data(), &endptr); + return true; + } - // we checked the number format before - JSON_ASSERT(endptr == token_buffer.data() + token_buffer.size()); + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); - return token_type::value_float; + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + // check array limit + if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); + } + + return true; } - /*! - @param[in] literal_text the literal text to expect - @param[in] length the length of the passed literal text - @param[in] return_type the token type to return on success - */ - JSON_HEDLEY_NON_NULL(2) - token_type scan_literal(const char_type* literal_text, const std::size_t length, - token_type return_type) + bool end_array() { - JSON_ASSERT(char_traits::to_char_type(current) == literal_text[0]); - for (std::size_t i = 1; i < length; ++i) + bool keep = true; + + if (ref_stack.back()) { - if (JSON_HEDLEY_UNLIKELY(char_traits::to_char_type(get()) != literal_text[i])) + keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (keep) { - error_message = "invalid literal"; - return token_type::parse_error; + ref_stack.back()->set_parents(); + } + else + { + // discard array + *ref_stack.back() = discarded; } } - return return_type; + + JSON_ASSERT(!ref_stack.empty()); + JSON_ASSERT(!keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + // remove discarded value + if (!keep && !ref_stack.empty() && ref_stack.back()->is_array()) + { + ref_stack.back()->m_data.m_value.array->pop_back(); + } + + return true; } - ///////////////////// - // input management - ///////////////////// + template + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const Exception& ex) + { + errored = true; + static_cast(ex); + if (allow_exceptions) + { + JSON_THROW(ex); + } + return false; + } - /// reset token_buffer; current character is beginning of token - void reset() noexcept + constexpr bool is_errored() const { - token_buffer.clear(); - token_string.clear(); - token_string.push_back(char_traits::to_char_type(current)); + return errored; } - /* - @brief get next character from the input + private: + /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a - `char_traits::eof()` in that case. Stores the scanned characters - for use in error messages. + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements - @return character read from the input + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) */ - char_int_type get() + template + std::pair handle_value(Value&& v, const bool skip_callback = false) { - ++position.chars_read_total; - ++position.chars_read_current_line; + JSON_ASSERT(!keep_stack.empty()); - if (next_unget) + // do not handle this value if we know it would be added to a discarded + // container + if (!keep_stack.back()) { - // just reset the next_unget variable and work with current - next_unget = false; + return {false, nullptr}; } - else + + // create value + auto value = BasicJsonType(std::forward(v)); + + // check callback + const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (!keep) { - current = ia.get_character(); + return {false, nullptr}; } - if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) + if (ref_stack.empty()) { - token_string.push_back(char_traits::to_char_type(current)); + root = std::move(value); + return {true, & root}; } - if (current == '\n') + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (!ref_stack.back()) { - ++position.lines_read; - position.chars_read_current_line = 0; + return {false, nullptr}; } - return current; - } - - /*! - @brief unget current character (read it again on next get) - - We implement unget by setting variable next_unget to true. The input is not - changed - we just simulate ungetting by modifying chars_read_total, - chars_read_current_line, and token_string. The next call to get() will - behave as if the unget character is read again. - */ - void unget() - { - next_unget = true; - - --position.chars_read_total; + // we now only expect arrays and objects + JSON_ASSERT(ref_stack.back()->is_array() || ref_stack.back()->is_object()); - // in case we "unget" a newline, we have to also decrement the lines_read - if (position.chars_read_current_line == 0) + // array + if (ref_stack.back()->is_array()) { - if (position.lines_read > 0) - { - --position.lines_read; - } + ref_stack.back()->m_data.m_value.array->emplace_back(std::move(value)); + return {true, & (ref_stack.back()->m_data.m_value.array->back())}; } - else + + // object + JSON_ASSERT(ref_stack.back()->is_object()); + // check if we should store an element for the current key + JSON_ASSERT(!key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (!store_element) { - --position.chars_read_current_line; + return {false, nullptr}; } - if (JSON_HEDLEY_LIKELY(current != char_traits::eof())) - { - JSON_ASSERT(!token_string.empty()); - token_string.pop_back(); - } - } + JSON_ASSERT(object_element); + *object_element = std::move(value); + return {true, object_element}; + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector ref_stack {}; + /// stack to manage which values to keep + std::vector keep_stack {}; // NOLINT(readability-redundant-member-init) + /// stack to manage which object keys to keep + std::vector key_keep_stack {}; // NOLINT(readability-redundant-member-init) + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; + /// the lexer to obtain the current position + const lexer_t* m_lexer = nullptr; +}; + +template +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using binary_t = typename BasicJsonType::binary_t; - /// add a character to token_buffer - void add(char_int_type c) + bool null() { - token_buffer.push_back(static_cast(c)); + return true; } - public: - ///////////////////// - // value getters - ///////////////////// - - /// return integer value - constexpr number_integer_t get_number_integer() const noexcept + bool boolean(bool /*unused*/) { - return value_integer; + return true; } - /// return unsigned integer value - constexpr number_unsigned_t get_number_unsigned() const noexcept + bool number_integer(number_integer_t /*unused*/) { - return value_unsigned; + return true; } - /// return floating-point value - constexpr number_float_t get_number_float() const noexcept + bool number_unsigned(number_unsigned_t /*unused*/) { - return value_float; + return true; } - /// return current string value (implicitly resets the token; useful only once) - string_t& get_string() + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) { - return token_buffer; + return true; } - ///////////////////// - // diagnostics - ///////////////////// - - /// return position of last read token - constexpr position_t get_position() const noexcept + bool string(string_t& /*unused*/) { - return position; + return true; } - /// return the last read token (for errors only). Will never contain EOF - /// (an arbitrary value that is not a valid char value, often -1), because - /// 255 may legitimately occur. May contain NUL, which should be escaped. - std::string get_token_string() const + bool binary(binary_t& /*unused*/) { - // escape control characters - std::string result; - for (const auto c : token_string) - { - if (static_cast(c) <= '\x1F') - { - // escape control characters - std::array cs{{}}; - static_cast((std::snprintf)(cs.data(), cs.size(), "", static_cast(c))); // NOLINT(cppcoreguidelines-pro-type-vararg,hicpp-vararg) - result += cs.data(); - } - else - { - // add character as is - result.push_back(static_cast(c)); - } - } - - return result; + return true; } - /// return syntax error message - JSON_HEDLEY_RETURNS_NON_NULL - constexpr const char* get_error_message() const noexcept + bool start_object(std::size_t /*unused*/ = static_cast(-1)) { - return error_message; + return true; } - ///////////////////// - // actual scanner - ///////////////////// - - /*! - @brief skip the UTF-8 byte order mark - @return true iff there is no BOM or the correct BOM has been skipped - */ - bool skip_bom() + bool key(string_t& /*unused*/) { - if (get() == 0xEF) - { - // check if we completely parse the BOM - return get() == 0xBB && get() == 0xBF; - } - - // the first character is not the beginning of the BOM; unget it to - // process is later - unget(); return true; } - void skip_whitespace() + bool end_object() { - do - { - get(); - } - while (current == ' ' || current == '\t' || current == '\n' || current == '\r'); + return true; } - token_type scan() + bool start_array(std::size_t /*unused*/ = static_cast(-1)) { - // initially, skip the BOM - if (position.chars_read_total == 0 && !skip_bom()) - { - error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; - return token_type::parse_error; - } - - // read next character and ignore whitespace - skip_whitespace(); - - // ignore comments - while (ignore_comments && current == '/') - { - if (!scan_comment()) - { - return token_type::parse_error; - } - - // skip following whitespace - skip_whitespace(); - } - - switch (current) - { - // structural characters - case '[': - return token_type::begin_array; - case ']': - return token_type::end_array; - case '{': - return token_type::begin_object; - case '}': - return token_type::end_object; - case ':': - return token_type::name_separator; - case ',': - return token_type::value_separator; - - // literals - case 't': - { - std::array true_literal = {{static_cast('t'), static_cast('r'), static_cast('u'), static_cast('e')}}; - return scan_literal(true_literal.data(), true_literal.size(), token_type::literal_true); - } - case 'f': - { - std::array false_literal = {{static_cast('f'), static_cast('a'), static_cast('l'), static_cast('s'), static_cast('e')}}; - return scan_literal(false_literal.data(), false_literal.size(), token_type::literal_false); - } - case 'n': - { - std::array null_literal = {{static_cast('n'), static_cast('u'), static_cast('l'), static_cast('l')}}; - return scan_literal(null_literal.data(), null_literal.size(), token_type::literal_null); - } - - // string - case '\"': - return scan_string(); - - // number - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return scan_number(); - - // end of input (the null byte is needed when parsing from - // string literals) - case '\0': - case char_traits::eof(): - return token_type::end_of_input; - - // error - default: - error_message = "invalid literal"; - return token_type::parse_error; - } + return true; } - private: - /// input adapter - InputAdapterType ia; - - /// whether comments should be ignored (true) or signaled as errors (false) - const bool ignore_comments = false; - - /// the current character - char_int_type current = char_traits::eof(); - - /// whether the next get() call should just return current - bool next_unget = false; - - /// the start position of the current token - position_t position {}; - - /// raw input token string (for error messages) - std::vector token_string {}; - - /// buffer for variable-length tokens (numbers, strings) - string_t token_buffer {}; - - /// a description of occurred lexer errors - const char* error_message = ""; - - // number values - number_integer_t value_integer = 0; - number_unsigned_t value_unsigned = 0; - number_float_t value_float = 0; + bool end_array() + { + return true; + } - /// the decimal point - const char_int_type decimal_point_char = '.'; + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } }; } // namespace detail NLOHMANN_JSON_NAMESPACE_END +// #include + // #include // #include @@ -9208,7 +9241,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -12263,7 +12296,7 @@ class parser { if (callback) { - json_sax_dom_callback_parser sdp(result, callback, allow_exceptions); + json_sax_dom_callback_parser sdp(result, callback, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -12291,7 +12324,7 @@ class parser } else { - json_sax_dom_parser sdp(result, allow_exceptions); + json_sax_dom_parser sdp(result, allow_exceptions, &m_lexer); sax_parse_internal(&sdp); // in strict mode, input must be completely read @@ -19417,12 +19450,15 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec friend class ::nlohmann::detail::binary_writer; template friend class ::nlohmann::detail::binary_reader; - template + template friend class ::nlohmann::detail::json_sax_dom_parser; - template + template friend class ::nlohmann::detail::json_sax_dom_callback_parser; friend class ::nlohmann::detail::exception; + size_t start_position = std::string::npos; + size_t end_position = std::string::npos; + /// workaround type for MSVC using basic_json_t = NLOHMANN_BASIC_JSON_TPL; using json_base_class_t = ::nlohmann::detail::json_base_class; @@ -20197,6 +20233,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec } JSON_ASSERT(m_data.m_type == val.type()); set_parents(); + this->start_position = val.get_start_position(); + this->end_position = val.get_end_position(); assert_invariant(); } @@ -20508,6 +20546,9 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec break; } + this->start_position = other.start_position; + this->end_position = other.end_position; + set_parents(); assert_invariant(); } @@ -20516,6 +20557,7 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec /// @sa https://json.nlohmann.me/api/basic_json/basic_json/ basic_json(basic_json&& other) noexcept : json_base_class_t(std::forward(other)), + start_position(other.start_position), end_position(other.end_position), m_data(std::move(other.m_data)) { // check that passed value is valid @@ -20545,6 +20587,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec using std::swap; swap(m_data.m_type, other.m_data.m_type); swap(m_data.m_value, other.m_data.m_value); + this->start_position = other.start_position; + this->end_position = other.end_position; json_base_class_t::operator=(std::move(other)); set_parents(); @@ -20697,6 +20741,16 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec return m_data.m_type; } + size_t get_start_position() const noexcept + { + return start_position; + } + + size_t get_end_position() const noexcept + { + return end_position; + } + /// @} private: @@ -23668,8 +23722,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -23684,8 +23738,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); } @@ -23709,8 +23763,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const cbor_tag_handler_t tag_handler = cbor_tag_handler_t::error) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::cbor).sax_parse(input_format_t::cbor, &sdp, strict, tag_handler); return res ? result : basic_json(value_t::discarded); @@ -23725,8 +23779,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23740,8 +23794,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23763,8 +23817,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::msgpack).sax_parse(input_format_t::msgpack, &sdp, strict); return res ? result : basic_json(value_t::discarded); @@ -23779,8 +23833,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23794,8 +23848,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23817,8 +23871,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::ubjson).sax_parse(input_format_t::ubjson, &sdp, strict); return res ? result : basic_json(value_t::discarded); @@ -23833,8 +23887,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23848,8 +23902,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bjdata).sax_parse(input_format_t::bjdata, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23863,8 +23917,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::forward(i)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23878,8 +23932,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = detail::input_adapter(std::move(first), std::move(last)); + detail::json_sax_dom_parser sdp(result, allow_exceptions); const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); } @@ -23901,8 +23955,8 @@ class basic_json // NOLINT(cppcoreguidelines-special-member-functions,hicpp-spec const bool allow_exceptions = true) { basic_json result; - detail::json_sax_dom_parser sdp(result, allow_exceptions); auto ia = i.get(); + detail::json_sax_dom_parser sdp(result, allow_exceptions); // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) const bool res = binary_reader(std::move(ia), input_format_t::bson).sax_parse(input_format_t::bson, &sdp, strict); return res ? result : basic_json(value_t::discarded); diff --git a/tests/src/unit-cbor.cpp b/tests/src/unit-cbor.cpp index be94d2f6e1..9301959d7b 100644 --- a/tests/src/unit-cbor.cpp +++ b/tests/src/unit-cbor.cpp @@ -1632,7 +1632,8 @@ TEST_CASE("CBOR") }; json j; - auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); + auto ia = nlohmann::detail::input_adapter(input); + auto cbp = nlohmann::detail::json_sax_dom_callback_parser(j, callback, true); CHECK(json::sax_parse(input, &cbp, json::input_format_t::cbor)); CHECK(j.at("foo").is_binary()); CHECK(binary_seen); diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index e2a8bac0f7..6ed5082be5 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -219,7 +219,8 @@ json parser_helper(const std::string& s) CHECK(j_nothrow == j); json j_sax; - nlohmann::detail::json_sax_dom_parser sdp(j_sax); + auto ia = nlohmann::detail::input_adapter(s); + nlohmann::detail::json_sax_dom_parser sdp(j_sax); json::sax_parse(s, &sdp); CHECK(j_sax == j); diff --git a/tests/src/unit-deserialization.cpp b/tests/src/unit-deserialization.cpp index 65bb28a224..4d42043f64 100644 --- a/tests/src/unit-deserialization.cpp +++ b/tests/src/unit-deserialization.cpp @@ -583,7 +583,8 @@ TEST_CASE("deserialization") auto first = str.begin(); auto last = str.end(); json j; - json_sax_dom_parser sax(j, true); + auto ia = nlohmann::detail::input_adapter(str); + json_sax_dom_parser sax(j, true); CHECK(json::sax_parse(proxy(first), proxy(last), &sax, input_format_t::json, false)); diff --git a/tests/src/unit-disabled_exceptions.cpp b/tests/src/unit-disabled_exceptions.cpp index 4ad155140a..3ef31b1584 100644 --- a/tests/src/unit-disabled_exceptions.cpp +++ b/tests/src/unit-disabled_exceptions.cpp @@ -20,10 +20,10 @@ using json = nlohmann::json; // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: - explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} + explicit sax_no_exception(json& j) : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) { diff --git a/tests/src/unit-regression2.cpp b/tests/src/unit-regression2.cpp index 0172a45ea2..e012a3abee 100644 --- a/tests/src/unit-regression2.cpp +++ b/tests/src/unit-regression2.cpp @@ -162,11 +162,11 @@ struct adl_serializer // for #2824 ///////////////////////////////////////////////////////////////////// -class sax_no_exception : public nlohmann::detail::json_sax_dom_parser +class sax_no_exception : public nlohmann::detail::json_sax_dom_parser { public: explicit sax_no_exception(json& j) - : nlohmann::detail::json_sax_dom_parser(j, false) + : nlohmann::detail::json_sax_dom_parser(j, false) {} static bool parse_error(std::size_t /*position*/, const std::string& /*last_token*/, const json::exception& ex) diff --git a/tests/src/unit-ubjson.cpp b/tests/src/unit-ubjson.cpp index 06611c5fe1..99c6f7d7ee 100644 --- a/tests/src/unit-ubjson.cpp +++ b/tests/src/unit-ubjson.cpp @@ -1617,7 +1617,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); @@ -1631,7 +1631,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); From 311861f48da30c3928d458798a5c50a11fd8e316 Mon Sep 17 00:00:00 2001 From: Sush Shringarputale Date: Tue, 5 Nov 2024 11:56:21 -0800 Subject: [PATCH 02/76] Add more unit tests and add start/stop parsing for arrays --- include/nlohmann/detail/input/json_sax.hpp | 40 ++-- single_include/nlohmann/json.hpp | 40 ++-- tests/src/unit-class_parser.cpp | 238 +++++++++++++++++++++ tests/src/unit-ubjson.cpp | 2 +- 4 files changed, 295 insertions(+), 25 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index aac35fabd3..26edbf48c3 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -232,10 +232,7 @@ class json_sax_dom_parser if (m_lexer) { - if (!ref_stack.empty()) - { - ref_stack.back()->start_position = m_lexer->get_position() - 1; - } + ref_stack.back()->start_position = m_lexer->get_position() - 1; } if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) @@ -258,17 +255,16 @@ class json_sax_dom_parser bool end_object() { - if (m_lexer) - { - if (!ref_stack.empty()) - { - (*ref_stack.rbegin())->end_position = m_lexer->get_position() - 1; - } - } JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); + if (m_lexer) + { + // set end position of the object (inclusive) + ref_stack.back()->end_position = m_lexer->get_position(); + } + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -278,6 +274,11 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + if (m_lexer) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); @@ -291,6 +292,12 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_array()); + if (m_lexer) + { + // set end position of the object (inclusive) + ref_stack.back()->end_position = m_lexer->get_position(); + } + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -483,7 +490,7 @@ class json_sax_dom_callback_parser { if (m_lexer) { - ref_stack.back()->end_position = m_lexer->get_position() - 1; + ref_stack.back()->end_position = m_lexer->get_position(); } ref_stack.back()->set_parents(); } @@ -518,6 +525,11 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); + if (m_lexer && ref_stack.back()) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + // check array limit if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { @@ -536,6 +548,10 @@ class json_sax_dom_callback_parser keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); if (keep) { + if (m_lexer) + { + ref_stack.back()->end_position = m_lexer->get_position(); + } ref_stack.back()->set_parents(); } else diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 435a1fa617..0f6104d59e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8509,10 +8509,7 @@ class json_sax_dom_parser if (m_lexer) { - if (!ref_stack.empty()) - { - ref_stack.back()->start_position = m_lexer->get_position() - 1; - } + ref_stack.back()->start_position = m_lexer->get_position() - 1; } if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) @@ -8535,17 +8532,16 @@ class json_sax_dom_parser bool end_object() { - if (m_lexer) - { - if (!ref_stack.empty()) - { - (*ref_stack.rbegin())->end_position = m_lexer->get_position() - 1; - } - } JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_object()); + if (m_lexer) + { + // set end position of the object (inclusive) + ref_stack.back()->end_position = m_lexer->get_position(); + } + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -8555,6 +8551,11 @@ class json_sax_dom_parser { ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + if (m_lexer) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + if (JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back())); @@ -8568,6 +8569,12 @@ class json_sax_dom_parser JSON_ASSERT(!ref_stack.empty()); JSON_ASSERT(ref_stack.back()->is_array()); + if (m_lexer) + { + // set end position of the object (inclusive) + ref_stack.back()->end_position = m_lexer->get_position(); + } + ref_stack.back()->set_parents(); ref_stack.pop_back(); return true; @@ -8760,7 +8767,7 @@ class json_sax_dom_callback_parser { if (m_lexer) { - ref_stack.back()->end_position = m_lexer->get_position() - 1; + ref_stack.back()->end_position = m_lexer->get_position(); } ref_stack.back()->set_parents(); } @@ -8795,6 +8802,11 @@ class json_sax_dom_callback_parser auto val = handle_value(BasicJsonType::value_t::array, true); ref_stack.push_back(val.second); + if (m_lexer && ref_stack.back()) + { + ref_stack.back()->start_position = m_lexer->get_position() - 1; + } + // check array limit if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast(-1) && len > ref_stack.back()->max_size())) { @@ -8813,6 +8825,10 @@ class json_sax_dom_callback_parser keep = callback(static_cast(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); if (keep) { + if (m_lexer) + { + ref_stack.back()->end_position = m_lexer->get_position(); + } ref_stack.back()->set_parents(); } else diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index 6ed5082be5..a705c7edd8 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -303,6 +303,42 @@ void comments_helper(const std::string& s) } } +void start_pos_end_pos_helper(std::string& nested_type_json_str, const std::string& root_type_json_str, const json& expected_json, bool should_generate_start_end_pos, json::parser_callback_t cb = nullptr) +{ + json j; + + // 1. If callback is provided, use callback version of parse() + if (cb) + { + j = json::parse(root_type_json_str, cb); + } + else + { + j = json::parse(root_type_json_str); + } + + // 2. Check if the generated JSON is as expected + CHECK(j == expected_json); + + // 3. Check if the start and end positions are generated correctly for root object + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == root_type_json_str.size()); + + // 4. Get the nested object + const auto& nested = j["nested"]; + if (should_generate_start_end_pos) + { + // 5. Check if the start and end positions are generated correctly for nested objects and arrays + CHECK(nested_type_json_str == root_type_json_str.substr(nested.get_start_position(), nested.get_end_position() - nested.get_start_position())); + } + else + { + // 6. Check if the start and end positions are not generated for nested primitive types + CHECK(nested.get_start_position() == std::string::npos); + CHECK(nested.get_end_position() == std::string::npos); + } +} + } // namespace TEST_CASE("parser class") @@ -1689,4 +1725,206 @@ TEST_CASE("parser class") CHECK_THROWS_WITH_AS(_ = json::parse("/a", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 2: syntax error while parsing value - invalid comment; expecting '/' or '*' after '/'; last read: '/a'", json::parse_error); CHECK_THROWS_WITH_AS(_ = json::parse("/*", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid comment; missing closing '*/'; last read: '/*'", json::parse_error); } + + SECTION("retrieve start position and end position") + { + SECTION("for object") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"({ "a": 1,"b" : "test"})"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test"})"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {{"a", 1}, {"b", "test"}}}, {"anotherValue", "test"}}), true, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"({ "a": 1,"b" : "test"})"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test"})"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {{"a", 1}, {"b", "test"}}}, {"anotherValue", "test"}}), true); + } + } + + SECTION("for array") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"([1, "test"])"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {1, "test"}}, {"anotherValue", "test"}}), true, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"([1, "test"])"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {1, "test"}}, {"anotherValue", "test"}}), true); + } + } + + SECTION("for simple types") + { + SECTION("no nested") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + + // 1. string type + std::string json_str = R"("test")"; + json j = json::parse(json_str, cb); + CHECK(j == json("test")); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str, cb); + CHECK(j == json(1)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str, cb); + CHECK(j == json(true)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str, cb); + CHECK(j == json(nullptr)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + } + + SECTION("without callback") + { + // 1. string type + std::string json_str = R"("test")"; + json j = json::parse(json_str); + CHECK(j == json("test")); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 2. number type + json_str = R"(1)"; + j = json::parse(json_str); + CHECK(j == json(1)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 3. boolean type + json_str = R"(true)"; + j = json::parse(json_str); + CHECK(j == json(true)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + + // 4. null type + json_str = R"(null)"; + j = json::parse(json_str); + CHECK(j == json(nullptr)); + CHECK(j.get_start_position() == std::string::npos); + CHECK(j.get_end_position() == std::string::npos); + } + } + + SECTION("string type") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"("test")"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), false, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"("test")"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), false); + } + } + + SECTION("number type") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"(1)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), false, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"(1)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), false); + } + } + + SECTION("boolean type") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"(true)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), false, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"(true)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), false); + } + } + + SECTION("null type") + { + SECTION("with callback") + { + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept + { + return true; + }; + std::string nested_type_json_str = R"(null)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), false, cb); + } + + SECTION("without callback") + { + std::string nested_type_json_str = R"(null)"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), false); + } + } + } + } } diff --git a/tests/src/unit-ubjson.cpp b/tests/src/unit-ubjson.cpp index 99c6f7d7ee..2d7b6c65b7 100644 --- a/tests/src/unit-ubjson.cpp +++ b/tests/src/unit-ubjson.cpp @@ -1631,7 +1631,7 @@ TEST_CASE("UBJSON") CHECK_THROWS_AS(_ = json::from_ubjson(v_ubjson), json::out_of_range&); json j; - nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept + nlohmann::detail::json_sax_dom_callback_parser scp(j, [](int /*unused*/, json::parse_event_t /*unused*/, const json& /*unused*/) noexcept { return true; }); From b3f6499554f42765ddf8b5fed02ceec97d52e9f6 Mon Sep 17 00:00:00 2001 From: Sush Shringarputale Date: Thu, 7 Nov 2024 17:04:25 -0800 Subject: [PATCH 03/76] Add raw value for all types --- include/nlohmann/detail/input/json_sax.hpp | 81 ++++++++++++++++++++-- tests/src/unit-class_parser.cpp | 54 +++++++-------- 2 files changed, 100 insertions(+), 35 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 26edbf48c3..4cf99a8cf5 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -173,7 +173,7 @@ class json_sax_dom_parser parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, const lexer_t* lexer_ = nullptr) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) : root(r), allow_exceptions(allow_exceptions_), m_lexer(lexer_) {} @@ -208,7 +208,7 @@ class json_sax_dom_parser return true; } - bool number_float(number_float_t val, const string_t& /*unused*/) + bool number_float(number_float_t val, const string_t& float_string) { handle_value(val); return true; @@ -322,6 +322,39 @@ class json_sax_dom_parser } private: + + void set_start_end_pos(BasicJsonType& v) + { + if (m_lexer) + { + v.end_position = m_lexer->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + default: + v.start_position = v.end_position - m_lexer->get_string().size(); + break; + } + } + } /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -335,6 +368,7 @@ class json_sax_dom_parser if (ref_stack.empty()) { root = BasicJsonType(std::forward(v)); + set_start_end_pos(root); return &root; } @@ -343,12 +377,14 @@ class json_sax_dom_parser if (ref_stack.back()->is_array()) { ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + set_start_end_pos(ref_stack.back()->m_data.m_value.array->back()); return &(ref_stack.back()->m_data.m_value.array->back()); } JSON_ASSERT(ref_stack.back()->is_object()); JSON_ASSERT(object_element); *object_element = BasicJsonType(std::forward(v)); + set_start_end_pos(*object_element); return object_element; } @@ -363,7 +399,7 @@ class json_sax_dom_parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// the lexer to obtain the current position - const lexer_t* m_lexer = nullptr; + lexer_t* m_lexer = nullptr; }; template @@ -382,7 +418,7 @@ class json_sax_dom_callback_parser json_sax_dom_callback_parser(BasicJsonType& r, const parser_callback_t cb, const bool allow_exceptions_ = true, - const lexer_t* lexer_ = nullptr) + lexer_t* lexer_ = nullptr) : root(r), callback(cb), allow_exceptions(allow_exceptions_), m_lexer(lexer_) { keep_stack.push_back(true); @@ -594,6 +630,40 @@ class json_sax_dom_callback_parser } private: + + template + void set_start_end_pos(Value& v) + { + if (m_lexer) + { + v.end_position = m_lexer->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + default: + v.start_position = v.end_position - m_lexer->get_string().size(); + break; + } + } + } /*! @param[in] v value to add to the JSON value we build during parsing @param[in] skip_callback whether we should skip calling the callback @@ -623,6 +693,7 @@ class json_sax_dom_callback_parser // create value auto value = BasicJsonType(std::forward(v)); + set_start_end_pos(value); // check callback const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); @@ -692,7 +763,7 @@ class json_sax_dom_callback_parser /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; /// the lexer to obtain the current position - const lexer_t* m_lexer = nullptr; + lexer_t* m_lexer = nullptr; }; template diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index a705c7edd8..ff827920e0 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -331,12 +331,6 @@ void start_pos_end_pos_helper(std::string& nested_type_json_str, const std::stri // 5. Check if the start and end positions are generated correctly for nested objects and arrays CHECK(nested_type_json_str == root_type_json_str.substr(nested.get_start_position(), nested.get_end_position() - nested.get_start_position())); } - else - { - // 6. Check if the start and end positions are not generated for nested primitive types - CHECK(nested.get_start_position() == std::string::npos); - CHECK(nested.get_end_position() == std::string::npos); - } } } // namespace @@ -1785,29 +1779,29 @@ TEST_CASE("parser class") std::string json_str = R"("test")"; json j = json::parse(json_str, cb); CHECK(j == json("test")); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 2. number type json_str = R"(1)"; j = json::parse(json_str, cb); CHECK(j == json(1)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 3. boolean type json_str = R"(true)"; j = json::parse(json_str, cb); CHECK(j == json(true)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 4. null type json_str = R"(null)"; j = json::parse(json_str, cb); CHECK(j == json(nullptr)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); } SECTION("without callback") @@ -1816,29 +1810,29 @@ TEST_CASE("parser class") std::string json_str = R"("test")"; json j = json::parse(json_str); CHECK(j == json("test")); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 2. number type json_str = R"(1)"; j = json::parse(json_str); CHECK(j == json(1)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 3. boolean type json_str = R"(true)"; j = json::parse(json_str); CHECK(j == json(true)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); // 4. null type json_str = R"(null)"; j = json::parse(json_str); CHECK(j == json(nullptr)); - CHECK(j.get_start_position() == std::string::npos); - CHECK(j.get_end_position() == std::string::npos); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == json_str.size()); } } @@ -1852,14 +1846,14 @@ TEST_CASE("parser class") }; std::string nested_type_json_str = R"("test")"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), false, cb); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), true, cb); } SECTION("without callback") { std::string nested_type_json_str = R"("test")"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), false); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), true); } } @@ -1873,14 +1867,14 @@ TEST_CASE("parser class") }; std::string nested_type_json_str = R"(1)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), false, cb); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), true, cb); } SECTION("without callback") { std::string nested_type_json_str = R"(1)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), false); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), true); } } @@ -1894,14 +1888,14 @@ TEST_CASE("parser class") }; std::string nested_type_json_str = R"(true)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), false, cb); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), true, cb); } SECTION("without callback") { std::string nested_type_json_str = R"(true)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), false); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), true); } } @@ -1915,14 +1909,14 @@ TEST_CASE("parser class") }; std::string nested_type_json_str = R"(null)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), false, cb); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), true, cb); } SECTION("without callback") { std::string nested_type_json_str = R"(null)"; std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), false); + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), true); } } } From d321cdb44cc7c650a84bbc850c64d19e1efdd049 Mon Sep 17 00:00:00 2001 From: Sush Shringarputale Date: Fri, 8 Nov 2024 11:28:52 -0800 Subject: [PATCH 04/76] Add more tests and fix compiler warning --- include/nlohmann/detail/input/json_sax.hpp | 2 +- single_include/nlohmann/json.hpp | 79 ++++++- tests/src/unit-class_parser.cpp | 259 ++++++++++----------- 3 files changed, 199 insertions(+), 141 deletions(-) diff --git a/include/nlohmann/detail/input/json_sax.hpp b/include/nlohmann/detail/input/json_sax.hpp index 4cf99a8cf5..bb410d8254 100644 --- a/include/nlohmann/detail/input/json_sax.hpp +++ b/include/nlohmann/detail/input/json_sax.hpp @@ -208,7 +208,7 @@ class json_sax_dom_parser return true; } - bool number_float(number_float_t val, const string_t& float_string) + bool number_float(number_float_t val, const string_t& /*unused*/) { handle_value(val); return true; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 0f6104d59e..dfc9c5b900 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8450,7 +8450,7 @@ class json_sax_dom_parser parsing @param[in] allow_exceptions_ whether parse errors yield exceptions */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, const lexer_t* lexer_ = nullptr) + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr) : root(r), allow_exceptions(allow_exceptions_), m_lexer(lexer_) {} @@ -8599,6 +8599,39 @@ class json_sax_dom_parser } private: + + void set_start_end_pos(BasicJsonType& v) + { + if (m_lexer) + { + v.end_position = m_lexer->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + default: + v.start_position = v.end_position - m_lexer->get_string().size(); + break; + } + } + } /*! @invariant If the ref stack is empty, then the passed value will be the new root. @@ -8612,6 +8645,7 @@ class json_sax_dom_parser if (ref_stack.empty()) { root = BasicJsonType(std::forward(v)); + set_start_end_pos(root); return &root; } @@ -8620,12 +8654,14 @@ class json_sax_dom_parser if (ref_stack.back()->is_array()) { ref_stack.back()->m_data.m_value.array->emplace_back(std::forward(v)); + set_start_end_pos(ref_stack.back()->m_data.m_value.array->back()); return &(ref_stack.back()->m_data.m_value.array->back()); } JSON_ASSERT(ref_stack.back()->is_object()); JSON_ASSERT(object_element); *object_element = BasicJsonType(std::forward(v)); + set_start_end_pos(*object_element); return object_element; } @@ -8640,7 +8676,7 @@ class json_sax_dom_parser /// whether to throw exceptions in case of errors const bool allow_exceptions = true; /// the lexer to obtain the current position - const lexer_t* m_lexer = nullptr; + lexer_t* m_lexer = nullptr; }; template @@ -8659,7 +8695,7 @@ class json_sax_dom_callback_parser json_sax_dom_callback_parser(BasicJsonType& r, const parser_callback_t cb, const bool allow_exceptions_ = true, - const lexer_t* lexer_ = nullptr) + lexer_t* lexer_ = nullptr) : root(r), callback(cb), allow_exceptions(allow_exceptions_), m_lexer(lexer_) { keep_stack.push_back(true); @@ -8871,6 +8907,40 @@ class json_sax_dom_callback_parser } private: + + template + void set_start_end_pos(Value& v) + { + if (m_lexer) + { + v.end_position = m_lexer->get_position(); + + switch (v.type()) + { + case value_t::boolean: + { + v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5); + break; + } + + case value_t::null: + { + v.start_position = v.end_position - 4; + break; + } + + case value_t::string: + { + v.start_position = v.end_position - v.m_data.m_value.string->size() - 2; + break; + } + + default: + v.start_position = v.end_position - m_lexer->get_string().size(); + break; + } + } + } /*! @param[in] v value to add to the JSON value we build during parsing @param[in] skip_callback whether we should skip calling the callback @@ -8900,6 +8970,7 @@ class json_sax_dom_callback_parser // create value auto value = BasicJsonType(std::forward(v)); + set_start_end_pos(value); // check callback const bool keep = skip_callback || callback(static_cast(ref_stack.size()), parse_event_t::value, value); @@ -8969,7 +9040,7 @@ class json_sax_dom_callback_parser /// a discarded value for the callback BasicJsonType discarded = BasicJsonType::value_t::discarded; /// the lexer to obtain the current position - const lexer_t* m_lexer = nullptr; + lexer_t* m_lexer = nullptr; }; template diff --git a/tests/src/unit-class_parser.cpp b/tests/src/unit-class_parser.cpp index ff827920e0..51264a2116 100644 --- a/tests/src/unit-class_parser.cpp +++ b/tests/src/unit-class_parser.cpp @@ -303,7 +303,14 @@ void comments_helper(const std::string& s) } } -void start_pos_end_pos_helper(std::string& nested_type_json_str, const std::string& root_type_json_str, const json& expected_json, bool should_generate_start_end_pos, json::parser_callback_t cb = nullptr) +void validateFn(const std::string& original_string, const json& j, const json& check) +{ + CHECK(j == check); + CHECK(j.get_start_position() == 0); + CHECK(j.get_end_position() == original_string.size()); +} + +void start_pos_end_pos_helper(std::string& nested_type_json_str, const std::string& root_type_json_str, const json& expected_json, json::parser_callback_t cb = nullptr) { json j; @@ -318,19 +325,12 @@ void start_pos_end_pos_helper(std::string& nested_type_json_str, const std::stri } // 2. Check if the generated JSON is as expected - CHECK(j == expected_json); - - // 3. Check if the start and end positions are generated correctly for root object - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == root_type_json_str.size()); + validateFn(root_type_json_str, j, expected_json); - // 4. Get the nested object + // 3. Get the nested object const auto& nested = j["nested"]; - if (should_generate_start_end_pos) - { - // 5. Check if the start and end positions are generated correctly for nested objects and arrays - CHECK(nested_type_json_str == root_type_json_str.substr(nested.get_start_position(), nested.get_end_position() - nested.get_start_position())); - } + // 4. Check if the start and end positions are generated correctly for nested objects and arrays + CHECK(nested_type_json_str == root_type_json_str.substr(nested.get_start_position(), nested.get_end_position() - nested.get_start_position())); } } // namespace @@ -1720,48 +1720,83 @@ TEST_CASE("parser class") CHECK_THROWS_WITH_AS(_ = json::parse("/*", nullptr, true, true), "[json.exception.parse_error.101] parse error at line 1, column 3: syntax error while parsing value - invalid comment; missing closing '*/'; last read: '/*'", json::parse_error); } +// Macro for all test cases for start_pos and end_pos +#define SETUP_TESTCASES() \ + SECTION("with callback") \ + { \ + SECTION("filter nothing") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept \ + { \ + return true; \ + }; \ + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, expected, cb); \ + } \ + SECTION("filter element") \ + { \ + json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t event, json& j) noexcept \ + { \ + return (event != json::parse_event_t::key && event != json::parse_event_t::value) || j != json("a"); \ + }; \ + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, filteredExpected, cb); \ + } \ + } \ + SECTION("without callback") \ + { \ + start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, expected); \ + } + SECTION("retrieve start position and end position") { SECTION("for object") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"({ "a": 1,"b" : "test"})"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test"})"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {{"a", 1}, {"b", "test"}}}, {"anotherValue", "test"}}), true, cb); - } + std::string nested_type_json_str = R"({ "a": 1,"b" : "test1"})"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test2"})"; + auto expected = json({{"nested", {{"a", 1}, {"b", "test1"}}}, {"anotherValue", "test2"}}); + auto filteredExpected = expected; + filteredExpected["nested"].erase("a"); - SECTION("without callback") - { - std::string nested_type_json_str = R"({ "a": 1,"b" : "test"})"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test"})"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {{"a", 1}, {"b", "test"}}}, {"anotherValue", "test"}}), true); - } + SETUP_TESTCASES() } SECTION("for array") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"([1, "test"])"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {1, "test"}}, {"anotherValue", "test"}}), true, cb); - } + std::string nested_type_json_str = R"(["a", "test", 45])"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {"a", "test", 45}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"] = json({"test", 45}); + SETUP_TESTCASES() + } - SECTION("without callback") - { - std::string nested_type_json_str = R"([1, "test"])"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", {1, "test"}}, {"anotherValue", "test"}}), true); - } + SECTION("for array with objects") + { + std::string nested_type_json_str = R"([{"a": 1, "b": "test"}, {"c": 2, "d": "test2"}])"; + std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", {{{"a", 1}, {"b", "test"}}, {{"c", 2}, {"d", "test2"}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected["nested"][0].erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_array = j["nested"]; + auto nested_obj = nested_array[0]; + CHECK(nested_type_json_str.substr(1, 21) == root_type_json_str.substr(nested_obj.get_start_position(), nested_obj.get_end_position() - nested_obj.get_start_position())); + CHECK(nested_type_json_str.substr(24, 22) == root_type_json_str.substr(nested_array[1].get_start_position(), nested_array[1].get_end_position() - nested_array[1].get_start_position())); + } + + SECTION("for two levels of nesting objects") + { + std::string nested_type_json_str = R"({"nested2": {"b": "test"}})"; + std::string root_type_json_str = R"({ "a": 2, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"a", 2}, {"nested", {{"nested2", {{"b", "test"}}}}}, {"anotherValue", "test"}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() + + auto j = json::parse(root_type_json_str); + auto nested_obj = j["nested"]["nested2"]; + CHECK(nested_type_json_str.substr(12, 13) == root_type_json_str.substr(nested_obj.get_start_position(), nested_obj.get_end_position() - nested_obj.get_start_position())); } SECTION("for simple types") @@ -1778,30 +1813,22 @@ TEST_CASE("parser class") // 1. string type std::string json_str = R"("test")"; json j = json::parse(json_str, cb); - CHECK(j == json("test")); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, "test"); // 2. number type json_str = R"(1)"; j = json::parse(json_str, cb); - CHECK(j == json(1)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, 1); // 3. boolean type json_str = R"(true)"; j = json::parse(json_str, cb); - CHECK(j == json(true)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, true); // 4. null type json_str = R"(null)"; j = json::parse(json_str, cb); - CHECK(j == json(nullptr)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, nullptr); } SECTION("without callback") @@ -1809,115 +1836,75 @@ TEST_CASE("parser class") // 1. string type std::string json_str = R"("test")"; json j = json::parse(json_str); - CHECK(j == json("test")); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, "test"); // 2. number type json_str = R"(1)"; j = json::parse(json_str); - CHECK(j == json(1)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, 1); + + json_str = R"(1.001239923)"; + j = json::parse(json_str); + validateFn(json_str, j, 1.001239923); + + json_str = R"(1.123812389000000)"; + j = json::parse(json_str); + validateFn(json_str, j, 1.123812389); // 3. boolean type json_str = R"(true)"; j = json::parse(json_str); - CHECK(j == json(true)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, true); + + json_str = R"(false)"; + j = json::parse(json_str); + validateFn(json_str, j, false); // 4. null type json_str = R"(null)"; j = json::parse(json_str); - CHECK(j == json(nullptr)); - CHECK(j.get_start_position() == 0); - CHECK(j.get_end_position() == json_str.size()); + validateFn(json_str, j, nullptr); } } SECTION("string type") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"("test")"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), true, cb); - } - - SECTION("without callback") - { - std::string nested_type_json_str = R"("test")"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", "test"}, {"anotherValue", "test"}}), true); - } + std::string nested_type_json_str = R"("test")"; + std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", "test"}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() } SECTION("number type") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"(1)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), true, cb); - } - - SECTION("without callback") - { - std::string nested_type_json_str = R"(1)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", 1}, {"anotherValue", "test"}}), true); - } + std::string nested_type_json_str = R"(2)"; + std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", 2}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() } SECTION("boolean type") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"(true)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), true, cb); - } - - SECTION("without callback") - { - std::string nested_type_json_str = R"(true)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", true}, {"anotherValue", "test"}}), true); - } + std::string nested_type_json_str = R"(true)"; + std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", true}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() } SECTION("null type") { - SECTION("with callback") - { - json::parser_callback_t const cb = [](int /*unused*/, json::parse_event_t /*unused*/, json& /*unused*/) noexcept - { - return true; - }; - std::string nested_type_json_str = R"(null)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), true, cb); - } - - SECTION("without callback") - { - std::string nested_type_json_str = R"(null)"; - std::string root_type_json_str = R"({ "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; - start_pos_end_pos_helper(nested_type_json_str, root_type_json_str, json({{"nested", nullptr}, {"anotherValue", "test"}}), true); - } + std::string nested_type_json_str = R"(null)"; + std::string root_type_json_str = R"({ "a": 1, "nested": )" + nested_type_json_str + R"(, "anotherValue": "test" })"; + auto expected = json({{"nested", nullptr}, {"anotherValue", "test"}, {"a", 1}}); + auto filteredExpected = expected; + filteredExpected.erase("a"); + SETUP_TESTCASES() } } } From 64ad6ce64b99c845922cbe531738b9c12492e8a3 Mon Sep 17 00:00:00 2001 From: Sush Shringarputale Date: Tue, 12 Nov 2024 17:42:35 -0800 Subject: [PATCH 05/76] Amalgamate --- docs/examples/binary_t.cpp | 2 +- ...h_subtype__byte_container_with_subtype.cpp | 2 +- ..._container_with_subtype__clear_subtype.cpp | 2 +- ...te_container_with_subtype__has_subtype.cpp | 2 +- ...te_container_with_subtype__set_subtype.cpp | 2 +- .../byte_container_with_subtype__subtype.cpp | 2 +- docs/examples/contains__keytype.c++17.cpp | 6 +- docs/examples/contains__object_t_key_type.cpp | 6 +- docs/examples/get__ValueType_const.cpp | 4 +- .../json_pointer__operator__equal.cpp | 8 +- ...on_pointer__operator__equal_stringtype.cpp | 6 +- .../nlohmann_json_namespace_begin.c++17.cpp | 2 +- .../nlohmann/byte_container_with_subtype.hpp | 2 +- .../nlohmann/detail/conversions/from_json.hpp | 12 +- .../nlohmann/detail/conversions/to_chars.hpp | 76 +- .../nlohmann/detail/conversions/to_json.hpp | 4 +- .../nlohmann/detail/input/binary_reader.hpp | 16 +- .../nlohmann/detail/input/input_adapters.hpp | 18 +- include/nlohmann/detail/input/parser.hpp | 2 +- .../detail/iterators/iteration_proxy.hpp | 12 +- .../detail/iterators/iterator_traits.hpp | 4 +- include/nlohmann/detail/json_pointer.hpp | 8 +- include/nlohmann/detail/macro_scope.hpp | 2 +- include/nlohmann/detail/meta/detected.hpp | 6 +- include/nlohmann/detail/meta/type_traits.hpp | 134 +- .../nlohmann/detail/output/binary_writer.hpp | 12 +- .../detail/output/output_adapters.hpp | 24 +- include/nlohmann/detail/output/serializer.hpp | 4 +- include/nlohmann/json.hpp | 94 +- include/nlohmann/ordered_map.hpp | 30 +- single_include/nlohmann/json.hpp | 24906 ---------------- tests/benchmarks/src/benchmarks.cpp | 14 +- tests/src/unit-bjdata.cpp | 14 +- .../src/unit-byte_container_with_subtype.cpp | 14 +- tests/src/unit-cbor.cpp | 14 +- tests/src/unit-comparison.cpp | 16 +- tests/src/unit-concepts.cpp | 2 +- tests/src/unit-constructor1.cpp | 16 +- tests/src/unit-conversions.cpp | 178 +- tests/src/unit-custom-base-class.cpp | 18 +- tests/src/unit-inspection.cpp | 8 +- tests/src/unit-iterators2.cpp | 136 +- tests/src/unit-json_patch.cpp | 22 +- tests/src/unit-json_pointer.cpp | 2 +- tests/src/unit-msgpack.cpp | 10 +- tests/src/unit-regression1.cpp | 14 +- tests/src/unit-regression2.cpp | 40 +- tests/src/unit-serialization.cpp | 128 +- tests/src/unit-testsuites.cpp | 4 +- tests/src/unit-to_chars.cpp | 280 +- tests/src/unit-udt.cpp | 40 +- tests/src/unit-udt_macro.cpp | 100 +- 52 files changed, 787 insertions(+), 25693 deletions(-) diff --git a/docs/examples/binary_t.cpp b/docs/examples/binary_t.cpp index bfaee5ca86..2b30a49c6f 100644 --- a/docs/examples/binary_t.cpp +++ b/docs/examples/binary_t.cpp @@ -6,5 +6,5 @@ using json = nlohmann::json; int main() { - std::cout << std::boolalpha << std::is_same>, json::binary_t>::value << std::endl; + std::cout << std::boolalpha << std::is_same>, json::binary_t>::value << std::endl; } diff --git a/docs/examples/byte_container_with_subtype__byte_container_with_subtype.cpp b/docs/examples/byte_container_with_subtype__byte_container_with_subtype.cpp index 1c10be5c26..68647aa3c5 100644 --- a/docs/examples/byte_container_with_subtype__byte_container_with_subtype.cpp +++ b/docs/examples/byte_container_with_subtype__byte_container_with_subtype.cpp @@ -2,7 +2,7 @@ #include // define a byte container based on std::vector -using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; +using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; using json = nlohmann::json; diff --git a/docs/examples/byte_container_with_subtype__clear_subtype.cpp b/docs/examples/byte_container_with_subtype__clear_subtype.cpp index f9ce6842bb..e29a5d54b9 100644 --- a/docs/examples/byte_container_with_subtype__clear_subtype.cpp +++ b/docs/examples/byte_container_with_subtype__clear_subtype.cpp @@ -2,7 +2,7 @@ #include // define a byte container based on std::vector -using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; +using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; using json = nlohmann::json; diff --git a/docs/examples/byte_container_with_subtype__has_subtype.cpp b/docs/examples/byte_container_with_subtype__has_subtype.cpp index 61c21eaae6..09d808903c 100644 --- a/docs/examples/byte_container_with_subtype__has_subtype.cpp +++ b/docs/examples/byte_container_with_subtype__has_subtype.cpp @@ -2,7 +2,7 @@ #include // define a byte container based on std::vector -using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; +using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; int main() { diff --git a/docs/examples/byte_container_with_subtype__set_subtype.cpp b/docs/examples/byte_container_with_subtype__set_subtype.cpp index b2694c54d5..fd05f62f11 100644 --- a/docs/examples/byte_container_with_subtype__set_subtype.cpp +++ b/docs/examples/byte_container_with_subtype__set_subtype.cpp @@ -2,7 +2,7 @@ #include // define a byte container based on std::vector -using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; +using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; using json = nlohmann::json; diff --git a/docs/examples/byte_container_with_subtype__subtype.cpp b/docs/examples/byte_container_with_subtype__subtype.cpp index cd230ade1e..054b61df25 100644 --- a/docs/examples/byte_container_with_subtype__subtype.cpp +++ b/docs/examples/byte_container_with_subtype__subtype.cpp @@ -2,7 +2,7 @@ #include // define a byte container based on std::vector -using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; +using byte_container_with_subtype = nlohmann::byte_container_with_subtype>; int main() { diff --git a/docs/examples/contains__keytype.c++17.cpp b/docs/examples/contains__keytype.c++17.cpp index 43b62fab10..3826197a28 100644 --- a/docs/examples/contains__keytype.c++17.cpp +++ b/docs/examples/contains__keytype.c++17.cpp @@ -14,7 +14,7 @@ int main() // call contains std::cout << std::boolalpha << - "j_object contains 'key': " << j_object.contains("key"sv) << '\n' << - "j_object contains 'another': " << j_object.contains("another"sv) << '\n' << - "j_array contains 'key': " << j_array.contains("key"sv) << std::endl; + "j_object contains 'key': " << j_object.contains("key"sv) << '\n' << + "j_object contains 'another': " << j_object.contains("another"sv) << '\n' << + "j_array contains 'key': " << j_array.contains("key"sv) << std::endl; } diff --git a/docs/examples/contains__object_t_key_type.cpp b/docs/examples/contains__object_t_key_type.cpp index a8bc8143df..9bd58cecfd 100644 --- a/docs/examples/contains__object_t_key_type.cpp +++ b/docs/examples/contains__object_t_key_type.cpp @@ -12,7 +12,7 @@ int main() // call contains std::cout << std::boolalpha << - "j_object contains 'key': " << j_object.contains("key") << '\n' << - "j_object contains 'another': " << j_object.contains("another") << '\n' << - "j_array contains 'key': " << j_array.contains("key") << std::endl; + "j_object contains 'key': " << j_object.contains("key") << '\n' << + "j_object contains 'another': " << j_object.contains("another") << '\n' << + "j_array contains 'key': " << j_array.contains("key") << std::endl; } diff --git a/docs/examples/get__ValueType_const.cpp b/docs/examples/get__ValueType_const.cpp index db63791fcb..5bd5555c84 100644 --- a/docs/examples/get__ValueType_const.cpp +++ b/docs/examples/get__ValueType_const.cpp @@ -28,8 +28,8 @@ int main() auto v4 = json_types["number"]["floating-point"].template get(); auto v5 = json_types["number"]["floating-point"].template get(); auto v6 = json_types["string"].template get(); - auto v7 = json_types["array"].template get>(); - auto v8 = json_types.template get>(); + auto v7 = json_types["array"].template get> (); + auto v8 = json_types.template get> (); // print the conversion results std::cout << v1 << '\n'; diff --git a/docs/examples/json_pointer__operator__equal.cpp b/docs/examples/json_pointer__operator__equal.cpp index dce6df03c3..adf8b60c15 100644 --- a/docs/examples/json_pointer__operator__equal.cpp +++ b/docs/examples/json_pointer__operator__equal.cpp @@ -12,8 +12,8 @@ int main() // compare JSON pointers std::cout << std::boolalpha - << "\"" << ptr0 << "\" == \"" << ptr0 << "\": " << (ptr0 == ptr0) << '\n' - << "\"" << ptr0 << "\" == \"" << ptr1 << "\": " << (ptr0 == ptr1) << '\n' - << "\"" << ptr1 << "\" == \"" << ptr2 << "\": " << (ptr1 == ptr2) << '\n' - << "\"" << ptr2 << "\" == \"" << ptr2 << "\": " << (ptr2 == ptr2) << std::endl; + << "\"" << ptr0 << "\" == \"" << ptr0 << "\": " << (ptr0 == ptr0) << '\n' + << "\"" << ptr0 << "\" == \"" << ptr1 << "\": " << (ptr0 == ptr1) << '\n' + << "\"" << ptr1 << "\" == \"" << ptr2 << "\": " << (ptr1 == ptr2) << '\n' + << "\"" << ptr2 << "\" == \"" << ptr2 << "\": " << (ptr2 == ptr2) << std::endl; } diff --git a/docs/examples/json_pointer__operator__equal_stringtype.cpp b/docs/examples/json_pointer__operator__equal_stringtype.cpp index af8ec5a29c..dfdff89429 100644 --- a/docs/examples/json_pointer__operator__equal_stringtype.cpp +++ b/docs/examples/json_pointer__operator__equal_stringtype.cpp @@ -18,9 +18,9 @@ int main() // compare JSON pointers and strings std::cout << std::boolalpha - << "\"" << ptr0 << "\" == \"" << str0 << "\": " << (ptr0 == str0) << '\n' - << "\"" << str0 << "\" == \"" << ptr1 << "\": " << (str0 == ptr1) << '\n' - << "\"" << ptr2 << "\" == \"" << str1 << "\": " << (ptr2 == str1) << std::endl; + << "\"" << ptr0 << "\" == \"" << str0 << "\": " << (ptr0 == str0) << '\n' + << "\"" << str0 << "\" == \"" << ptr1 << "\": " << (str0 == ptr1) << '\n' + << "\"" << ptr2 << "\" == \"" << str1 << "\": " << (ptr2 == str1) << std::endl; try { diff --git a/docs/examples/nlohmann_json_namespace_begin.c++17.cpp b/docs/examples/nlohmann_json_namespace_begin.c++17.cpp index 9385d593d5..bb83f09aed 100644 --- a/docs/examples/nlohmann_json_namespace_begin.c++17.cpp +++ b/docs/examples/nlohmann_json_namespace_begin.c++17.cpp @@ -5,7 +5,7 @@ // partial specialization (see https://json.nlohmann.me/features/arbitrary_types/) NLOHMANN_JSON_NAMESPACE_BEGIN template -struct adl_serializer> +struct adl_serializer> { static void to_json(json& j, const std::optional& opt) { diff --git a/include/nlohmann/byte_container_with_subtype.hpp b/include/nlohmann/byte_container_with_subtype.hpp index 91382cd682..f4be96ad0b 100644 --- a/include/nlohmann/byte_container_with_subtype.hpp +++ b/include/nlohmann/byte_container_with_subtype.hpp @@ -57,7 +57,7 @@ class byte_container_with_subtype : public BinaryType bool operator==(const byte_container_with_subtype& rhs) const { return std::tie(static_cast(*this), m_subtype, m_has_subtype) == - std::tie(static_cast(rhs), rhs.m_subtype, rhs.m_has_subtype); + std::tie(static_cast(rhs), rhs.m_subtype, rhs.m_has_subtype); } bool operator!=(const byte_container_with_subtype& rhs) const diff --git a/include/nlohmann/detail/conversions/from_json.hpp b/include/nlohmann/detail/conversions/from_json.hpp index aa2f0cbf4c..3a8b131721 100644 --- a/include/nlohmann/detail/conversions/from_json.hpp +++ b/include/nlohmann/detail/conversions/from_json.hpp @@ -275,13 +275,13 @@ void()) template < typename BasicJsonType, typename T, std::size_t... Idx > std::array from_json_inplace_array_impl(BasicJsonType&& j, - identity_tag> /*unused*/, index_sequence /*unused*/) + identity_tag> /*unused*/, index_sequence /*unused*/) { return { { std::forward(j).at(Idx).template get()... } }; } template < typename BasicJsonType, typename T, std::size_t N > -auto from_json(BasicJsonType&& j, identity_tag> tag) +auto from_json(BasicJsonType&& j, identity_tag> tag) -> decltype(from_json_inplace_array_impl(std::forward(j), tag, make_index_sequence {})) { if (JSON_HEDLEY_UNLIKELY(!j.is_array())) @@ -380,7 +380,7 @@ std::tuple from_json_tuple_impl_base(BasicJsonType&& j, index_sequence< } template < typename BasicJsonType, class A1, class A2 > -std::pair from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<0> /*unused*/) +std::pair from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<0> /*unused*/) { return {std::forward(j).at(0).template get(), std::forward(j).at(1).template get()}; @@ -389,11 +389,11 @@ std::pair from_json_tuple_impl(BasicJsonType&& j, identity_tag inline void from_json_tuple_impl(BasicJsonType&& j, std::pair& p, priority_tag<1> /*unused*/) { - p = from_json_tuple_impl(std::forward(j), identity_tag> {}, priority_tag<0> {}); + p = from_json_tuple_impl(std::forward(j), identity_tag> {}, priority_tag<0> {}); } template -std::tuple from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<2> /*unused*/) +std::tuple from_json_tuple_impl(BasicJsonType&& j, identity_tag> /*unused*/, priority_tag<2> /*unused*/) { return from_json_tuple_impl_base(std::forward(j), index_sequence_for {}); } @@ -471,7 +471,7 @@ inline void from_json(const BasicJsonType& j, std_fs::path& p) struct from_json_fn { template - auto operator()(const BasicJsonType& j, T&& val) const + auto operator()(const BasicJsonType & j, T&& val) const noexcept(noexcept(from_json(j, std::forward(val)))) -> decltype(from_json(j, std::forward(val))) { diff --git a/include/nlohmann/detail/conversions/to_chars.hpp b/include/nlohmann/detail/conversions/to_chars.hpp index e10741c923..369897fa43 100644 --- a/include/nlohmann/detail/conversions/to_chars.hpp +++ b/include/nlohmann/detail/conversions/to_chars.hpp @@ -386,43 +386,43 @@ inline cached_power get_cached_power_for_binary_exponent(int e) { 0xAB70FE17C79AC6CA, -1060, -300 }, { 0xFF77B1FCBEBCDC4F, -1034, -292 }, { 0xBE5691EF416BD60C, -1007, -284 }, - { 0x8DD01FAD907FFC3C, -980, -276 }, - { 0xD3515C2831559A83, -954, -268 }, - { 0x9D71AC8FADA6C9B5, -927, -260 }, - { 0xEA9C227723EE8BCB, -901, -252 }, - { 0xAECC49914078536D, -874, -244 }, - { 0x823C12795DB6CE57, -847, -236 }, - { 0xC21094364DFB5637, -821, -228 }, - { 0x9096EA6F3848984F, -794, -220 }, - { 0xD77485CB25823AC7, -768, -212 }, - { 0xA086CFCD97BF97F4, -741, -204 }, - { 0xEF340A98172AACE5, -715, -196 }, - { 0xB23867FB2A35B28E, -688, -188 }, - { 0x84C8D4DFD2C63F3B, -661, -180 }, - { 0xC5DD44271AD3CDBA, -635, -172 }, - { 0x936B9FCEBB25C996, -608, -164 }, - { 0xDBAC6C247D62A584, -582, -156 }, - { 0xA3AB66580D5FDAF6, -555, -148 }, - { 0xF3E2F893DEC3F126, -529, -140 }, - { 0xB5B5ADA8AAFF80B8, -502, -132 }, - { 0x87625F056C7C4A8B, -475, -124 }, - { 0xC9BCFF6034C13053, -449, -116 }, - { 0x964E858C91BA2655, -422, -108 }, - { 0xDFF9772470297EBD, -396, -100 }, - { 0xA6DFBD9FB8E5B88F, -369, -92 }, - { 0xF8A95FCF88747D94, -343, -84 }, - { 0xB94470938FA89BCF, -316, -76 }, - { 0x8A08F0F8BF0F156B, -289, -68 }, - { 0xCDB02555653131B6, -263, -60 }, - { 0x993FE2C6D07B7FAC, -236, -52 }, - { 0xE45C10C42A2B3B06, -210, -44 }, - { 0xAA242499697392D3, -183, -36 }, - { 0xFD87B5F28300CA0E, -157, -28 }, - { 0xBCE5086492111AEB, -130, -20 }, - { 0x8CBCCC096F5088CC, -103, -12 }, - { 0xD1B71758E219652C, -77, -4 }, - { 0x9C40000000000000, -50, 4 }, - { 0xE8D4A51000000000, -24, 12 }, + { 0x8DD01FAD907FFC3C, -980, -276 }, + { 0xD3515C2831559A83, -954, -268 }, + { 0x9D71AC8FADA6C9B5, -927, -260 }, + { 0xEA9C227723EE8BCB, -901, -252 }, + { 0xAECC49914078536D, -874, -244 }, + { 0x823C12795DB6CE57, -847, -236 }, + { 0xC21094364DFB5637, -821, -228 }, + { 0x9096EA6F3848984F, -794, -220 }, + { 0xD77485CB25823AC7, -768, -212 }, + { 0xA086CFCD97BF97F4, -741, -204 }, + { 0xEF340A98172AACE5, -715, -196 }, + { 0xB23867FB2A35B28E, -688, -188 }, + { 0x84C8D4DFD2C63F3B, -661, -180 }, + { 0xC5DD44271AD3CDBA, -635, -172 }, + { 0x936B9FCEBB25C996, -608, -164 }, + { 0xDBAC6C247D62A584, -582, -156 }, + { 0xA3AB66580D5FDAF6, -555, -148 }, + { 0xF3E2F893DEC3F126, -529, -140 }, + { 0xB5B5ADA8AAFF80B8, -502, -132 }, + { 0x87625F056C7C4A8B, -475, -124 }, + { 0xC9BCFF6034C13053, -449, -116 }, + { 0x964E858C91BA2655, -422, -108 }, + { 0xDFF9772470297EBD, -396, -100 }, + { 0xA6DFBD9FB8E5B88F, -369, -92 }, + { 0xF8A95FCF88747D94, -343, -84 }, + { 0xB94470938FA89BCF, -316, -76 }, + { 0x8A08F0F8BF0F156B, -289, -68 }, + { 0xCDB02555653131B6, -263, -60 }, + { 0x993FE2C6D07B7FAC, -236, -52 }, + { 0xE45C10C42A2B3B06, -210, -44 }, + { 0xAA242499697392D3, -183, -36 }, + { 0xFD87B5F28300CA0E, -157, -28 }, + { 0xBCE5086492111AEB, -130, -20 }, + { 0x8CBCCC096F5088CC, -103, -12 }, + { 0xD1B71758E219652C, -77, -4 }, + { 0x9C40000000000000, -50, 4 }, + { 0xE8D4A51000000000, -24, 12 }, { 0xAD78EBC5AC620000, 3, 20 }, { 0x813F3978F8940984, 30, 28 }, { 0xC097CE7BC90715B3, 56, 36 }, @@ -572,7 +572,7 @@ inline void grisu2_round(char* buf, int len, std::uint64_t dist, std::uint64_t d while (rest < dist && delta - rest >= ten_k - && (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) + && (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { JSON_ASSERT(buf[len - 1] != '0'); buf[len - 1]--; diff --git a/include/nlohmann/detail/conversions/to_json.hpp b/include/nlohmann/detail/conversions/to_json.hpp index 562089c330..98e1b584a1 100644 --- a/include/nlohmann/detail/conversions/to_json.hpp +++ b/include/nlohmann/detail/conversions/to_json.hpp @@ -394,7 +394,7 @@ inline void to_json(BasicJsonType& j, const std::pair& p) // for https://github.com/nlohmann/json/pull/1134 template>::value, int> = 0> + enable_if_t>::value, int> = 0> inline void to_json(BasicJsonType& j, const T& b) { j = { {b.key(), b.value()} }; @@ -423,7 +423,7 @@ inline void to_json(BasicJsonType& j, const std_fs::path& p) struct to_json_fn { template - auto operator()(BasicJsonType& j, T&& val) const noexcept(noexcept(to_json(j, std::forward(val)))) + auto operator()(BasicJsonType & j, T&& val) const noexcept(noexcept(to_json(j, std::forward(val)))) -> decltype(to_json(j, std::forward(val)), void()) { return to_json(j, std::forward(val)); diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index d6fc97ddb6..8b91644860 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -62,7 +62,7 @@ static inline bool little_endianness(int num = 1) noexcept /*! @brief deserialization of CBOR, MessagePack, and UBJSON values */ -template> +template> class binary_reader { using number_integer_t = typename BasicJsonType::number_integer_t; @@ -2005,7 +2005,7 @@ class binary_reader @return whether size determination completed */ - bool get_ubjson_size_value(std::size_t& result, bool& is_ndarray, char_int_type prefix = 0) + bool get_ubjson_size_value(std::size_t& result, bool & is_ndarray, char_int_type prefix = 0) { if (prefix == 0) { @@ -2240,7 +2240,7 @@ class binary_reader { result.second = get(); // must not ignore 'N', because 'N' maybe the type if (input_format == input_format_t::bjdata - && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second))) + && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second))) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, @@ -2784,7 +2784,7 @@ class binary_reader on big endian systems. */ template - bool get_number(const input_format_t format, NumberType& result) + bool get_number(const input_format_t format, NumberType & result) { // step 1: read input into array with system's byte order std::array vec{}; @@ -2911,8 +2911,8 @@ class binary_reader @return a message string to use in the parse_error exceptions */ std::string exception_message(const input_format_t format, - const std::string& detail, - const std::string& context) const + const std::string & detail, + const std::string & context) const { std::string error_msg = "syntax error while parsing "; @@ -2989,12 +2989,12 @@ class binary_reader // lookup tables // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers = - JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_; + JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_; using bjd_type = std::pair; // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map = - JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_; + JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_; #undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ #undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 33fca3e4b9..2e4fe191ee 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -362,7 +362,7 @@ struct is_iterator_of_multibyte }; template -struct iterator_input_adapter_factory::value>> +struct iterator_input_adapter_factory::value >> { using iterator_type = IteratorType; using char_type = typename std::iterator_traits::value_type; @@ -398,15 +398,15 @@ struct container_input_adapter_factory {}; template struct container_input_adapter_factory< ContainerType, - void_t()), end(std::declval()))>> - { - using adapter_type = decltype(input_adapter(begin(std::declval()), end(std::declval()))); - - static adapter_type create(const ContainerType& container) + void_t()), end(std::declval())) >> { - return input_adapter(begin(container), end(container)); -} - }; + using adapter_type = decltype(input_adapter(begin(std::declval()), end(std::declval()))); + + static adapter_type create(const ContainerType & container) + { + return input_adapter(begin(container), end(container)); + } +}; } // namespace container_input_adapter_factory_impl diff --git a/include/nlohmann/detail/input/parser.hpp b/include/nlohmann/detail/input/parser.hpp index 72efddcd5b..57d103e1dc 100644 --- a/include/nlohmann/detail/input/parser.hpp +++ b/include/nlohmann/detail/input/parser.hpp @@ -348,7 +348,7 @@ class parser return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string(), parse_error::create(101, m_lexer.get_position(), - "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); + "attempting to parse an empty input; check that your input string or stream contains the expected JSON", nullptr)); } return sax->parse_error(m_lexer.get_position(), diff --git a/include/nlohmann/detail/iterators/iteration_proxy.hpp b/include/nlohmann/detail/iterators/iteration_proxy.hpp index 76293de227..fc79ce928c 100644 --- a/include/nlohmann/detail/iterators/iteration_proxy.hpp +++ b/include/nlohmann/detail/iterators/iteration_proxy.hpp @@ -41,7 +41,7 @@ template class iteration_proxy_value using pointer = value_type *; using reference = value_type &; using iterator_category = std::input_iterator_tag; - using string_type = typename std::remove_cv< typename std::remove_reference().key() ) >::type >::type; + using string_type = typename std::remove_cv< typename std::remove_reference().key() ) >::type >::type; private: /// the iterator @@ -219,16 +219,16 @@ namespace std #pragma clang diagnostic ignored "-Wmismatched-tags" #endif template -class tuple_size<::nlohmann::detail::iteration_proxy_value> // NOLINT(cert-dcl58-cpp) - : public std::integral_constant {}; +class tuple_size<::nlohmann::detail::iteration_proxy_value> // NOLINT(cert-dcl58-cpp) + : public std::integral_constant {}; template class tuple_element> // NOLINT(cert-dcl58-cpp) { public: using type = decltype( - get(std::declval < - ::nlohmann::detail::iteration_proxy_value> ())); + get(std::declval < + ::nlohmann::detail::iteration_proxy_value> ())); }; #if defined(__clang__) #pragma clang diagnostic pop @@ -238,5 +238,5 @@ class tuple_element> #if JSON_HAS_RANGES template - inline constexpr bool ::std::ranges::enable_borrowed_range<::nlohmann::detail::iteration_proxy> = true; + inline constexpr bool ::std::ranges::enable_borrowed_range<::nlohmann::detail::iteration_proxy> = true; #endif diff --git a/include/nlohmann/detail/iterators/iterator_traits.hpp b/include/nlohmann/detail/iterators/iterator_traits.hpp index 84cc27a85e..777501f94a 100644 --- a/include/nlohmann/detail/iterators/iterator_traits.hpp +++ b/include/nlohmann/detail/iterators/iterator_traits.hpp @@ -43,12 +43,12 @@ struct iterator_traits template struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types + : iterator_types { }; template -struct iterator_traits::value>> +struct iterator_traits::value >> { using iterator_category = std::random_access_iterator_tag; using value_type = T; diff --git a/include/nlohmann/detail/json_pointer.hpp b/include/nlohmann/detail/json_pointer.hpp index 4fdcd9ad28..85a14b760f 100644 --- a/include/nlohmann/detail/json_pointer.hpp +++ b/include/nlohmann/detail/json_pointer.hpp @@ -232,8 +232,8 @@ class json_pointer errno = 0; // strtoull doesn't reset errno const unsigned long long res = std::strtoull(p, &p_end, 10); // NOLINT(runtime/int) if (p == p_end // invalid input or empty string - || errno == ERANGE // out of range - || JSON_HEDLEY_UNLIKELY(static_cast(p_end - p) != s.size())) // incomplete read + || errno == ERANGE // out of range + || JSON_HEDLEY_UNLIKELY(static_cast(p_end - p) != s.size())) // incomplete read { JSON_THROW(detail::out_of_range::create(404, detail::concat("unresolved reference token '", s, "'"), nullptr)); } @@ -708,8 +708,8 @@ class json_pointer // ~ must be followed by 0 or 1 if (JSON_HEDLEY_UNLIKELY(pos == reference_token.size() - 1 || - (reference_token[pos + 1] != '0' && - reference_token[pos + 1] != '1'))) + (reference_token[pos + 1] != '0' && + reference_token[pos + 1] != '1'))) { JSON_THROW(detail::parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'", nullptr)); } diff --git a/include/nlohmann/detail/macro_scope.hpp b/include/nlohmann/detail/macro_scope.hpp index 97127a6462..bdc8e132e2 100644 --- a/include/nlohmann/detail/macro_scope.hpp +++ b/include/nlohmann/detail/macro_scope.hpp @@ -454,7 +454,7 @@ struct would_call_std_##std_name \ { \ static constexpr auto const value = ::nlohmann::detail:: \ - is_detected_exact::value; \ + is_detected_exact::value; \ }; \ } /* namespace detail2 */ \ \ diff --git a/include/nlohmann/detail/meta/detected.hpp b/include/nlohmann/detail/meta/detected.hpp index 1db9bf9ca3..a9bde2f73e 100644 --- a/include/nlohmann/detail/meta/detected.hpp +++ b/include/nlohmann/detail/meta/detected.hpp @@ -38,7 +38,7 @@ struct detector }; template class Op, class... Args> -struct detector>, Op, Args...> +struct detector>, Op, Args...> { using value_t = std::true_type; using type = Op; @@ -60,9 +60,9 @@ template class Op, class... Args> using detected_or_t = typename detected_or::type; template class Op, class... Args> -using is_detected_exact = std::is_same>; +using is_detected_exact = std::is_same>; -template class Op, class... Args> +template class Op, class... Args > using is_detected_convertible = std::is_convertible, To>; diff --git a/include/nlohmann/detail/meta/type_traits.hpp b/include/nlohmann/detail/meta/type_traits.hpp index e1b000dcc2..0b08f090ec 100644 --- a/include/nlohmann/detail/meta/type_traits.hpp +++ b/include/nlohmann/detail/meta/type_traits.hpp @@ -73,7 +73,7 @@ template struct is_json_ref : std::false_type {}; template -struct is_json_ref> : std::true_type {}; +struct is_json_ref> : std::true_type {}; ////////////////////////// // aliases for detected // @@ -130,7 +130,7 @@ struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value static constexpr bool value = is_detected_exact::value; + const BasicJsonType&, T&>::value; }; // This trait checks if JSONSerializer::from_json(json const&) exists @@ -145,7 +145,7 @@ struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_jso static constexpr bool value = is_detected_exact::value; + const BasicJsonType&>::value; }; // This trait checks if BasicJsonType::json_serializer::to_json exists @@ -160,7 +160,7 @@ struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> static constexpr bool value = is_detected_exact::value; + T>::value; }; template @@ -176,7 +176,7 @@ struct actual_object_comparator using object_t = typename BasicJsonType::object_t; using object_comparator_t = typename BasicJsonType::default_object_comparator_t; using type = typename std::conditional < has_key_compare::value, - typename object_t::key_compare, object_comparator_t>::type; + typename object_t::key_compare, object_comparator_t>::type; }; template @@ -260,58 +260,58 @@ template struct is_default_constructible : std::is_default_constructible {}; template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; +struct is_default_constructible> + : conjunction, is_default_constructible> {}; template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; +struct is_default_constructible> + : conjunction, is_default_constructible> {}; template -struct is_default_constructible> - : conjunction...> {}; +struct is_default_constructible> + : conjunction... > {}; template -struct is_default_constructible> - : conjunction...> {}; +struct is_default_constructible> + : conjunction... > {}; template struct is_constructible : std::is_constructible {}; template -struct is_constructible> : is_default_constructible> {}; +struct is_constructible> : is_default_constructible> {}; template -struct is_constructible> : is_default_constructible> {}; +struct is_constructible> : is_default_constructible> {}; template -struct is_constructible> : is_default_constructible> {}; +struct is_constructible> : is_default_constructible> {}; template -struct is_constructible> : is_default_constructible> {}; +struct is_constructible> : is_default_constructible> {}; template struct is_iterator_traits : std::false_type {}; template -struct is_iterator_traits> +struct is_iterator_traits> { - private: + private: using traits = iterator_traits; - public: + public: static constexpr auto value = is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value; + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; }; template struct is_range { - private: + private: using t_ref = typename std::add_lvalue_reference::type; using iterator = detected_t; @@ -321,17 +321,17 @@ struct is_range // and https://en.cppreference.com/w/cpp/iterator/sentinel_for // but reimplementing these would be too much work, as a lot of other concepts are used underneath static constexpr auto is_iterator_begin = - is_iterator_traits>::value; + is_iterator_traits>::value; - public: + public: static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; }; template -using iterator_t = enable_if_t::value, result_of_begin())>>; +using iterator_t = enable_if_t::value, result_of_begin()) >>; template -using range_value_t = value_type_t>>; +using range_value_t = value_type_t>>; // The following implementation of is_complete_type is taken from // https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ @@ -358,9 +358,9 @@ struct is_compatible_object_type_impl < // macOS's is_constructible does not play well with nonesuch... static constexpr bool value = is_constructible::value && - is_constructible::value; + typename CompatibleObjectType::key_type>::value && + is_constructible::value; }; template @@ -420,8 +420,8 @@ struct is_constructible_string_type static constexpr auto value = conjunction < is_constructible, - is_detected_exact>::value; + is_detected_exact>::value; }; template @@ -432,14 +432,14 @@ struct is_compatible_array_type_impl < BasicJsonType, CompatibleArrayType, enable_if_t < is_detected::value&& - is_iterator_traits>>::value&& + is_iterator_traits>>::value&& // special case for types like std::filesystem::path whose iterator's value_type are themselves // c.f. https://github.com/nlohmann/json/pull/3073 - !std::is_same>::value >> + !std::is_same>::value >> { static constexpr bool value = is_constructible>::value; + range_value_t>::value; }; template @@ -466,24 +466,24 @@ struct is_constructible_array_type_impl < (std::is_move_assignable::value || std::is_copy_assignable::value)&& is_detected::value&& -is_iterator_traits>>::value&& +is_iterator_traits>>::value&& is_detected::value&& // special case for types like std::filesystem::path whose iterator's value_type are themselves // c.f. https://github.com/nlohmann/json/pull/3073 -!std::is_same>::value&& - is_complete_type < - detected_t>::value >> +!std::is_same>::value&& +is_complete_type < +detected_t>::value >> { using value_type = range_value_t; static constexpr bool value = std::is_same::value || - has_from_json::value || - has_non_default_from_json < - BasicJsonType, - value_type >::value; + typename BasicJsonType::array_t::value_type>::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; }; template @@ -507,9 +507,9 @@ struct is_compatible_integer_type_impl < static constexpr auto value = is_constructible::value && - CompatibleLimits::is_integer && - RealLimits::is_signed == CompatibleLimits::is_signed; + CompatibleNumberIntegerType>::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; }; template @@ -537,7 +537,7 @@ template struct is_constructible_tuple : std::false_type {}; template -struct is_constructible_tuple> : conjunction...> {}; +struct is_constructible_tuple> : conjunction... > {}; template struct is_json_iterator_of : std::false_type {}; @@ -554,10 +554,10 @@ template