From f06c8fd8e310ad57087142d395ec1ea5523e2c49 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Fri, 14 Sep 2018 22:58:22 +0200 Subject: [PATCH 01/36] BSON: serialization of non-objects is not supported --- include/nlohmann/detail/exceptions.hpp | 1 + .../nlohmann/detail/input/binary_reader.hpp | 28 ++++ .../nlohmann/detail/input/input_adapters.hpp | 2 +- .../nlohmann/detail/output/binary_writer.hpp | 55 +++++++ include/nlohmann/json.hpp | 48 +++++++ single_include/nlohmann/json.hpp | 134 +++++++++++++++++- test/src/unit-bson.cpp | 70 +++++++++ 7 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 test/src/unit-bson.cpp diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index b73d7b1f9..274a88c7f 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -220,6 +220,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 05ab36f39..9f684273d 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -80,6 +80,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -120,6 +124,30 @@ class binary_reader } private: + + bool parse_bson_internal() + { + int docLen = 0; + int byte; + for (int i = 0; i < 4; ++i) + { + byte = get(); + if (JSON_UNLIKELY(current == std::char_traits::eof())) + { + if (i == 1) + { + return sax->boolean(docLen != 0x00); + } + return false; + } + docLen |= static_cast(byte) << 8 * i; + } + + //sax->null(); + get(); + return true; + } + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index c2a20ab7d..a877984e9 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -18,7 +18,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 71e5ec81e..f58213f5f 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -676,6 +676,37 @@ class binary_writer } } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const BasicJsonType& j) + { + assert(j.type() == value_t::object); + + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + break; + case value_t::discarded: + break; + case value_t::object: + write_bson_object(j); + break; + } + } + + private: /* @brief write a number to output input @@ -704,6 +735,30 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + /* + @brief write a number to output in little endian format + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + */ + template + void write_number_little_endian(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (!is_little_endian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + + // UBJSON: write number (floating point) template::value, int>::type = 0> diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index ee78c1c16..8b6a01707 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6590,6 +6590,26 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -6897,6 +6917,34 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 69e4bddcf..606a35746 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -819,6 +819,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -1882,7 +1883,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // @@ -6020,6 +6021,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -6060,6 +6065,30 @@ class binary_reader } private: + + bool parse_bson_internal() + { + int docLen = 0; + int byte; + for (int i = 0; i < 4; ++i) + { + byte = get(); + if (JSON_UNLIKELY(current == std::char_traits::eof())) + { + if (i == 1) + { + return sax->boolean(docLen != 0x00); + } + return false; + } + docLen |= static_cast(byte) << 8 * i; + } + + //sax->null(); + get(); + return true; + } + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -8317,6 +8346,37 @@ class binary_writer } } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const BasicJsonType& j) + { + assert(j.type() == value_t::object); + + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + break; + case value_t::discarded: + break; + case value_t::object: + write_bson_object(j); + break; + } + } + + private: /* @brief write a number to output input @@ -8345,6 +8405,30 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + /* + @brief write a number to output in little endian format + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + */ + template + void write_number_little_endian(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (!is_little_endian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -17663,6 +17747,26 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -17970,6 +18074,34 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp new file mode 100644 index 000000000..4e17f2339 --- /dev/null +++ b/test/src/unit-bson.cpp @@ -0,0 +1,70 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.2.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "catch.hpp" + +#include +using nlohmann::json; + +#include + +TEST_CASE("BSON") +{ + SECTION("individual values") + { + SECTION("discarded") + { + // discarded values are not serialized + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("null") + { + json j = nullptr; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + + SECTION("boolean") + { + SECTION("true") + { + json j = true; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + + SECTION("false") + { + json j = false; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + } + } +} From 5f5836ce1c7a2fc10a85bab058cf1dd0bc23d9a8 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 00:43:39 +0200 Subject: [PATCH 02/36] BSON: Support empty objects --- .../nlohmann/detail/input/binary_reader.hpp | 52 +++++++++++++----- .../nlohmann/detail/output/binary_writer.hpp | 3 +- single_include/nlohmann/json.hpp | 55 +++++++++++++------ test/src/unit-bson.cpp | 47 +++++++++++++++- 4 files changed, 124 insertions(+), 33 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 9f684273d..9b00cc49f 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -127,25 +127,18 @@ class binary_reader bool parse_bson_internal() { - int docLen = 0; - int byte; - for (int i = 0; i < 4; ++i) + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_object(documentSize - 5))) { - byte = get(); - if (JSON_UNLIKELY(current == std::char_traits::eof())) - { - if (i == 1) - { - return sax->boolean(docLen != 0x00); - } - return false; - } - docLen |= static_cast(byte) << 8 * i; + return false; } - //sax->null(); + const auto result = sax->end_object(); + get(); - return true; + return result; } /*! @@ -927,6 +920,35 @@ class binary_reader return true; } + template + bool get_number_little_endian(NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (!is_little_endian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + /*! @brief create a string by reading characters from the input diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index f58213f5f..4b9a13437 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -684,7 +684,8 @@ class binary_writer void write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); - + write_number_little_endian(5); + oa->write_character(static_cast(0x00)); } /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 606a35746..a68c64a91 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6068,25 +6068,18 @@ class binary_reader bool parse_bson_internal() { - int docLen = 0; - int byte; - for (int i = 0; i < 4; ++i) + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_object(documentSize - 5))) { - byte = get(); - if (JSON_UNLIKELY(current == std::char_traits::eof())) - { - if (i == 1) - { - return sax->boolean(docLen != 0x00); - } - return false; - } - docLen |= static_cast(byte) << 8 * i; + return false; } - //sax->null(); + const auto result = sax->end_object(); + get(); - return true; + return result; } /*! @@ -6868,6 +6861,35 @@ class binary_reader return true; } + template + bool get_number_little_endian(NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (!is_little_endian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + /*! @brief create a string by reading characters from the input @@ -8354,7 +8376,8 @@ class binary_writer void write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); - + write_number_little_endian(5); + oa->write_character(static_cast(0x00)); } /*! diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 4e17f2339..78a301399 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -36,7 +36,7 @@ using nlohmann::json; TEST_CASE("BSON") { - SECTION("individual values") + SECTION("individual values not supported") { SECTION("discarded") { @@ -66,5 +66,50 @@ TEST_CASE("BSON") REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); } } + + SECTION("number") + { + json j = 42; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + + SECTION("float") + { + json j = 4.2; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + + SECTION("string") + { + json j = "not supported"; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + + SECTION("array") + { + json j = std::vector {1, 2, 3, 4, 5, 6, 7}; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + } + } + + SECTION("objects") + { + SECTION("empty object") + { + json j = json::object(); + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From 9a0dddc5d2536d6d1960a2fb89027e3849794306 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 03:08:50 +0200 Subject: [PATCH 03/36] BSON: Object with single boolean --- .../nlohmann/detail/input/binary_reader.hpp | 60 +++++++- .../nlohmann/detail/output/binary_writer.hpp | 44 +++++- .../detail/output/output_adapters.hpp | 43 ++++++ single_include/nlohmann/json.hpp | 141 +++++++++++++++++- test/src/unit-bson.cpp | 48 ++++++ 5 files changed, 330 insertions(+), 6 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 9b00cc49f..54833cc30 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -125,19 +125,75 @@ class binary_reader private: + template + OutputIt generate_until(OutputIt&& d_first, UnaryPredicate&& pred, Gen&& gen) + { + for (auto x = gen(); !pred(x); x = gen()) + { + *d_first++ = x; + } + + return d_first; + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_bson_str(string_t& result) + { + bool success = true; + generate_until(std::back_inserter(result), [](char c) + { + return c == 0x00; + }, [this, &success] + { + get(); + if (JSON_UNLIKELY(unexpect_eof())) + { + success = false; + } + return static_cast(current); + }); + return success; + } + + bool parse_bson_internal() { std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_object(documentSize - 5))) + if (not JSON_UNLIKELY(sax->start_object(-1))) { return false; } - const auto result = sax->end_object(); + while (auto entry_type = get()) + { + switch (entry_type) + { + case 0x01: + { + string_t key; + get_bson_str(key); + sax->key(key); + sax->boolean(static_cast(get())); + } break; + case 0x08: + { + string_t key; + get_bson_str(key); + sax->key(key); + sax->boolean(static_cast(get())); + } break; + } + } get(); + const auto result = sax->end_object(); + return result; } diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 4b9a13437..98e6104bc 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -677,6 +677,16 @@ class binary_writer } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x08)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + oa->write_character(j.m_value.boolean ? static_cast(0x01) : static_cast(0x00)); + return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; + } + /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object @@ -684,8 +694,16 @@ class binary_writer void write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); - write_number_little_endian(5); + auto document_size_offset = oa->reserve_characters(4ul); + std::int32_t document_size = 5ul; + + for (const auto& el : *j.m_value.object) + { + document_size += write_bson_object_entry(el.first, el.second); + } + oa->write_character(static_cast(0x00)); + write_number_little_endian_at(document_size_offset, document_size); } /*! @@ -759,6 +777,30 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + /* + @brief write a number to output in little endian format + + @param[in] offset The offset where to start writing + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + */ + template + void write_number_little_endian_at(std::size_t offset, const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (!is_little_endian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); + } + // UBJSON: write number (floating point) template struct output_adapter_protocol { virtual void write_character(CharType c) = 0; virtual void write_characters(const CharType* s, std::size_t length) = 0; + virtual void write_characters_at(std::size_t position, const CharType* s, std::size_t length) = 0; + virtual std::size_t reserve_characters(std::size_t length) = 0; virtual ~output_adapter_protocol() = default; }; @@ -42,6 +44,18 @@ class output_vector_adapter : public output_adapter_protocol std::copy(s, s + length, std::back_inserter(v)); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::begin(v) + position); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = v.size(); + std::fill_n(std::back_inserter(v), length, static_cast(0x00)); + return position; + } + private: std::vector& v; }; @@ -63,6 +77,22 @@ class output_stream_adapter : public output_adapter_protocol stream.write(s, static_cast(length)); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + const auto orig_offset = stream.tellp(); + stream.seekp(static_cast::pos_type>(position)); + stream.write(s, static_cast(length)); + stream.seekp(orig_offset); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = stream.tellp(); + std::vector empty(length, static_cast(0)); + stream.write(empty.data(), length); + return static_cast(position); + } + private: std::basic_ostream& stream; }; @@ -84,6 +114,19 @@ class output_string_adapter : public output_adapter_protocol str.append(s, length); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::begin(str) + position); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = str.size(); + std::fill_n(std::back_inserter(str), length, static_cast(0x00)); + return position; + } + + private: StringType& str; }; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a68c64a91..6b81d3548 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5838,6 +5838,8 @@ template struct output_adapter_protocol { virtual void write_character(CharType c) = 0; virtual void write_characters(const CharType* s, std::size_t length) = 0; + virtual void write_characters_at(std::size_t position, const CharType* s, std::size_t length) = 0; + virtual std::size_t reserve_characters(std::size_t length) = 0; virtual ~output_adapter_protocol() = default; }; @@ -5862,6 +5864,18 @@ class output_vector_adapter : public output_adapter_protocol std::copy(s, s + length, std::back_inserter(v)); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::begin(v) + position); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = v.size(); + std::fill_n(std::back_inserter(v), length, static_cast(0x00)); + return position; + } + private: std::vector& v; }; @@ -5883,6 +5897,22 @@ class output_stream_adapter : public output_adapter_protocol stream.write(s, static_cast(length)); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + const auto orig_offset = stream.tellp(); + stream.seekp(static_cast::pos_type>(position)); + stream.write(s, static_cast(length)); + stream.seekp(orig_offset); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = stream.tellp(); + std::vector empty(length, static_cast(0)); + stream.write(empty.data(), length); + return static_cast(position); + } + private: std::basic_ostream& stream; }; @@ -5904,6 +5934,19 @@ class output_string_adapter : public output_adapter_protocol str.append(s, length); } + void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::begin(str) + position); + } + + std::size_t reserve_characters(std::size_t length) override + { + const auto position = str.size(); + std::fill_n(std::back_inserter(str), length, static_cast(0x00)); + return position; + } + + private: StringType& str; }; @@ -6066,19 +6109,69 @@ class binary_reader private: + template + OutputIt generate_until(OutputIt&& d_first, UnaryPredicate&& pred, Gen&& gen) + { + for (auto x = gen(); !pred(x); x = gen()) + { + *d_first++ = x; + } + + return d_first; + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_bson_str(string_t& result) + { + bool success = true; + generate_until(std::back_inserter(result), [](char c) + { + return c == 0x00; + }, [this, &success] + { + get(); + if (JSON_UNLIKELY(unexpect_eof())) + { + success = false; + } + return static_cast(current); + }); + return success; + } + + bool parse_bson_internal() { std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_object(documentSize - 5))) + if (not JSON_UNLIKELY(sax->start_object(-1))) { return false; } - const auto result = sax->end_object(); + while (auto entry_type = get()) + { + switch (entry_type) + { + case 0x08: + { + string_t key; + get_bson_str(key); + sax->key(key); + sax->boolean(static_cast(get())); + } + break; + } + } get(); + const auto result = sax->end_object(); + return result; } @@ -8369,6 +8462,16 @@ class binary_writer } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x08)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + oa->write_character(j.m_value.boolean ? static_cast(0x01) : static_cast(0x00)); + return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; + } + /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object @@ -8376,8 +8479,16 @@ class binary_writer void write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); - write_number_little_endian(5); + auto document_size_offset = oa->reserve_characters(4ul); + std::int32_t document_size = 5ul; + + for (const auto& el : *j.m_value.object) + { + document_size += write_bson_object_entry(el.first, el.second); + } + oa->write_character(static_cast(0x00)); + write_number_little_endian_at(document_size_offset, document_size); } /*! @@ -8451,6 +8562,30 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } + /* + @brief write a number to output in little endian format + + @param[in] offset The offset where to start writing + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + */ + template + void write_number_little_endian_at(std::size_t offset, const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (!is_little_endian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); + } + // UBJSON: write number (floating point) template expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, // value = true + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + // SECTION("non-empty object with double") + // { + // json j = + // { + // { "entry", true } + // }; + + // std::vector expected = + // { + // 0x14, 0x00, 0x00, 0x00, // size (little endian) + // 0x01, /// entry: double + // 'e', 'n', 't', 'r', 'y', '\x00', + // 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + // 0x00 // end marker + // }; + + // const auto result = json::to_bson(j); + // CHECK(result == expected); + + // // roundtrip + // //CHECK(json::from_bson(result) == j); + // //CHECK(json::from_bson(result, true, false) == j); + // } } } From 0c0f2e44b5bad1be335d68b35965ef05eb905e90 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 03:23:54 +0200 Subject: [PATCH 04/36] BSON: support doubles --- .../nlohmann/detail/input/binary_reader.hpp | 12 ++-- .../nlohmann/detail/output/binary_writer.hpp | 29 ++++++++- single_include/nlohmann/json.hpp | 41 ++++++++++-- test/src/unit-bson.cpp | 64 +++++++++++++------ 4 files changed, 115 insertions(+), 31 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 54833cc30..dc9015b89 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -137,8 +137,6 @@ class binary_reader } /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size @return whether array creation completed */ bool get_bson_str(string_t& result) @@ -179,15 +177,19 @@ class binary_reader string_t key; get_bson_str(key); sax->key(key); - sax->boolean(static_cast(get())); - } break; + double number; + get_number_little_endian(number); + sax->number_float(static_cast(number), ""); + } + break; case 0x08: { string_t key; get_bson_str(key); sax->key(key); sax->boolean(static_cast(get())); - } break; + } + break; } } diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 98e6104bc..283c2cf27 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -676,8 +676,7 @@ class binary_writer } } - - std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + std::size_t write_bson_boolean(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { oa->write_character(static_cast(0x08)); // boolean oa->write_characters( @@ -687,6 +686,32 @@ class binary_writer return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; } + std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x01)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + write_number_little_endian(j.m_value.number_float); + return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; + } + + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + break; + case value_t::boolean: + return write_bson_boolean(name, j); + case value_t::number_float: + return write_bson_double(name, j); + }; + + return 0ul; + } + /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6b81d3548..3da423890 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6121,8 +6121,6 @@ class binary_reader } /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size @return whether array creation completed */ bool get_bson_str(string_t& result) @@ -6158,6 +6156,16 @@ class binary_reader { switch (entry_type) { + case 0x01: + { + string_t key; + get_bson_str(key); + sax->key(key); + double number; + get_number_little_endian(number); + sax->number_float(static_cast(number), ""); + } + break; case 0x08: { string_t key; @@ -8461,8 +8469,7 @@ class binary_writer } } - - std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + std::size_t write_bson_boolean(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { oa->write_character(static_cast(0x08)); // boolean oa->write_characters( @@ -8472,6 +8479,32 @@ class binary_writer return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; } + std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x01)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + write_number_little_endian(j.m_value.number_float); + return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; + } + + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + break; + case value_t::boolean: + return write_bson_boolean(name, j); + case value_t::number_float: + return write_bson_double(name, j); + }; + + return 0ul; + } + /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 9a7107072..977468070 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -136,28 +136,52 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } - // SECTION("non-empty object with double") - // { - // json j = - // { - // { "entry", true } - // }; + SECTION("non-empty object with bool") + { + json j = + { + { "entry", false } + }; - // std::vector expected = - // { - // 0x14, 0x00, 0x00, 0x00, // size (little endian) - // 0x01, /// entry: double - // 'e', 'n', 't', 'r', 'y', '\x00', - // 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, - // 0x00 // end marker - // }; + std::vector expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00, // value = false + 0x00 // end marker + }; - // const auto result = json::to_bson(j); - // CHECK(result == expected); + const auto result = json::to_bson(j); + CHECK(result == expected); - // // roundtrip - // //CHECK(json::from_bson(result) == j); - // //CHECK(json::from_bson(result, true, false) == j); - // } + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with double") + { + json j = + { + { "entry", 4.2 } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x01, /// entry: double + 'e', 'n', 't', 'r', 'y', '\x00', + 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From 6c447de0768cb9cf235c886525e1aaa411a34e3d Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 11:33:24 +0200 Subject: [PATCH 05/36] BSON: Support objects with string members --- .../nlohmann/detail/input/binary_reader.hpp | 25 ++++++++--- .../nlohmann/detail/output/binary_writer.hpp | 19 +++++++- single_include/nlohmann/json.hpp | 44 ++++++++++++++++--- test/src/unit-bson.cpp | 26 +++++++++++ 4 files changed, 100 insertions(+), 14 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index dc9015b89..113dcb910 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -139,7 +139,7 @@ class binary_reader /*! @return whether array creation completed */ - bool get_bson_str(string_t& result) + bool get_bson_cstr(string_t& result) { bool success = true; generate_until(std::back_inserter(result), [](char c) @@ -148,7 +148,7 @@ class binary_reader }, [this, &success] { get(); - if (JSON_UNLIKELY(unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof())) { success = false; } @@ -172,20 +172,33 @@ class binary_reader { switch (entry_type) { - case 0x01: + case 0x01: // double { string_t key; - get_bson_str(key); + get_bson_cstr(key); sax->key(key); double number; get_number_little_endian(number); sax->number_float(static_cast(number), ""); } break; - case 0x08: + case 0x02: // string { string_t key; - get_bson_str(key); + get_bson_cstr(key); + sax->key(key); + std::int32_t len; + string_t value; + get_number_little_endian(len); + get_string(len - 1ul, value); + get(); + sax->string(value); + } + break; + case 0x08: // boolean + { + string_t key; + get_bson_cstr(key); sax->key(key); sax->boolean(static_cast(get())); } diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 283c2cf27..a377d3484 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -688,7 +688,7 @@ class binary_writer std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - oa->write_character(static_cast(0x01)); // boolean + oa->write_character(static_cast(0x01)); // double oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); @@ -696,6 +696,21 @@ class binary_writer return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; } + std::size_t write_bson_string(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x02)); // string (UTF-8) + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.string->size() + 1ul)); + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size() + 1); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -707,6 +722,8 @@ class binary_writer return write_bson_boolean(name, j); case value_t::number_float: return write_bson_double(name, j); + case value_t::string: + return write_bson_string(name, j); }; return 0ul; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3da423890..c549f9074 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6123,7 +6123,7 @@ class binary_reader /*! @return whether array creation completed */ - bool get_bson_str(string_t& result) + bool get_bson_cstr(string_t& result) { bool success = true; generate_until(std::back_inserter(result), [](char c) @@ -6132,7 +6132,7 @@ class binary_reader }, [this, &success] { get(); - if (JSON_UNLIKELY(unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof())) { success = false; } @@ -6156,20 +6156,33 @@ class binary_reader { switch (entry_type) { - case 0x01: + case 0x01: // double { string_t key; - get_bson_str(key); + get_bson_cstr(key); sax->key(key); double number; get_number_little_endian(number); sax->number_float(static_cast(number), ""); } break; - case 0x08: + case 0x02: // string { string_t key; - get_bson_str(key); + get_bson_cstr(key); + sax->key(key); + std::int32_t len; + string_t value; + get_number_little_endian(len); + get_string(len - 1ul, value); + get(); + sax->string(value); + } + break; + case 0x08: // boolean + { + string_t key; + get_bson_cstr(key); sax->key(key); sax->boolean(static_cast(get())); } @@ -8481,7 +8494,7 @@ class binary_writer std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - oa->write_character(static_cast(0x01)); // boolean + oa->write_character(static_cast(0x01)); // double oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); @@ -8489,6 +8502,21 @@ class binary_writer return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; } + std::size_t write_bson_string(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x02)); // string (UTF-8) + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.string->size() + 1ul)); + oa->write_characters( + reinterpret_cast(j.m_value.string->c_str()), + j.m_value.string->size() + 1); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -8500,6 +8528,8 @@ class binary_writer return write_bson_boolean(name, j); case value_t::number_float: return write_bson_double(name, j); + case value_t::string: + return write_bson_string(name, j); }; return 0ul; diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 977468070..e3b4717f4 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -183,5 +183,31 @@ TEST_CASE("BSON") CHECK(json::from_bson(result) == j); CHECK(json::from_bson(result, true, false) == j); } + + SECTION("non-empty object with string") + { + json j = + { + { "entry", "bsonstr" } + }; + + std::vector expected = + { + 0x18, 0x00, 0x00, 0x00, // size (little endian) + 0x02, /// entry: string (UTF-8) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x08, 0x00, 0x00, 0x00, 'b', 's', 'o', 'n', 's', 't', 'r', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + } } From c5ef0231712b33c5c05f4d77ac75eec2ac779ffe Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 11:38:26 +0200 Subject: [PATCH 06/36] BSON: support objects with null members --- .../nlohmann/detail/input/binary_reader.hpp | 8 +++++++ .../nlohmann/detail/output/binary_writer.hpp | 12 ++++++++++ single_include/nlohmann/json.hpp | 20 ++++++++++++++++ test/src/unit-bson.cpp | 23 +++++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 113dcb910..3fd6cc207 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -203,6 +203,14 @@ class binary_reader sax->boolean(static_cast(get())); } break; + case 0x0A: // null + { + string_t key; + get_bson_cstr(key); + sax->key(key); + sax->null(); + } + break; } } diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index a377d3484..966d8c22a 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -711,6 +711,16 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; } + std::size_t write_bson_null(const typename BasicJsonType::string_t& name, const BasicJsonType&) + { + oa->write_character(static_cast(0x0A)); // null + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + return /*id*/ 1ul + name.size() + 1ul; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -724,6 +734,8 @@ class binary_writer return write_bson_double(name, j); case value_t::string: return write_bson_string(name, j); + case value_t::null: + return write_bson_null(name, j); }; return 0ul; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c549f9074..faeaf2140 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6187,6 +6187,14 @@ class binary_reader sax->boolean(static_cast(get())); } break; + case 0x0A: // null + { + string_t key; + get_bson_cstr(key); + sax->key(key); + sax->null(); + } + break; } } @@ -8517,6 +8525,16 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; } + std::size_t write_bson_null(const typename BasicJsonType::string_t& name, const BasicJsonType&) + { + oa->write_character(static_cast(0x0A)); // null + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + return /*id*/ 1ul + name.size() + 1ul; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -8530,6 +8548,8 @@ class binary_writer return write_bson_double(name, j); case value_t::string: return write_bson_string(name, j); + case value_t::null: + return write_bson_null(name, j); }; return 0ul; diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index e3b4717f4..e83e112a8 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -208,6 +208,29 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with null member") + { + json j = + { + { "entry", nullptr } + }; + + std::vector expected = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0x0A, /// entry: null + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + } } From 7ee361f7ad3806258e3c9a7ea2fc8dce5fdb54bc Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 11:54:17 +0200 Subject: [PATCH 07/36] BSON: support objects with int32 members --- .../nlohmann/detail/input/binary_reader.hpp | 10 ++++++++ .../nlohmann/detail/output/binary_writer.hpp | 14 +++++++++++ single_include/nlohmann/json.hpp | 24 +++++++++++++++++++ test/src/unit-bson.cpp | 24 +++++++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 3fd6cc207..f11e164e0 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -203,6 +203,16 @@ class binary_reader sax->boolean(static_cast(get())); } break; + case 0x10: // int32 + { + string_t key; + get_bson_cstr(key); + sax->key(key); + std::int32_t value; + get_number_little_endian(value); + sax->number_integer(static_cast(value)); + } + break; case 0x0A: // null { string_t key; diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 966d8c22a..ed59b7c20 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -721,6 +721,18 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul; } + std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -732,6 +744,8 @@ class binary_writer return write_bson_boolean(name, j); case value_t::number_float: return write_bson_double(name, j); + case value_t::number_integer: + return write_bson_integer(name, j); case value_t::string: return write_bson_string(name, j); case value_t::null: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index faeaf2140..d0cce8a17 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6187,6 +6187,16 @@ class binary_reader sax->boolean(static_cast(get())); } break; + case 0x10: // int32 + { + string_t key; + get_bson_cstr(key); + sax->key(key); + std::int32_t value; + get_number_little_endian(value); + sax->number_integer(static_cast(value)); + } + break; case 0x0A: // null { string_t key; @@ -8535,6 +8545,18 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul; } + std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -8546,6 +8568,8 @@ class binary_writer return write_bson_boolean(name, j); case value_t::number_float: return write_bson_double(name, j); + case value_t::number_integer: + return write_bson_integer(name, j); case value_t::string: return write_bson_string(name, j); case value_t::null: diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index e83e112a8..266dd12af 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -231,6 +231,30 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{0x12345678} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + } } From c0d8921a6797497f3fbd2a8c2a2b29aaca9704c3 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 12:00:53 +0200 Subject: [PATCH 08/36] BSON: support objects with int64 members --- .../nlohmann/detail/input/binary_reader.hpp | 10 ++++++ .../nlohmann/detail/output/binary_writer.hpp | 26 ++++++++++---- single_include/nlohmann/json.hpp | 36 +++++++++++++++---- test/src/unit-bson.cpp | 23 ++++++++++++ 4 files changed, 83 insertions(+), 12 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f11e164e0..d135a4c2e 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -213,6 +213,16 @@ class binary_reader sax->number_integer(static_cast(value)); } break; + case 0x12: // int64 + { + string_t key; + get_bson_cstr(key); + sax->key(key); + std::int64_t value; + get_number_little_endian(value); + sax->number_integer(static_cast(value)); + } + break; case 0x0A: // null { string_t key; diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index ed59b7c20..24426d489 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -723,14 +723,28 @@ class binary_writer std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); + if (j.m_value.number_integer <= static_cast((std::numeric_limits::max)())) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number_little_endian(static_cast(j.m_value.number_integer)); - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + else + { + oa->write_character(static_cast(0x12)); // int64 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + } } std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index d0cce8a17..c564e57fb 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6197,6 +6197,16 @@ class binary_reader sax->number_integer(static_cast(value)); } break; + case 0x12: // int64 + { + string_t key; + get_bson_cstr(key); + sax->key(key); + std::int64_t value; + get_number_little_endian(value); + sax->number_integer(static_cast(value)); + } + break; case 0x0A: // null { string_t key; @@ -8547,14 +8557,28 @@ class binary_writer std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); + if (j.m_value.number_integer <= static_cast((std::numeric_limits::max)())) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number_little_endian(static_cast(j.m_value.number_integer)); - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + else + { + oa->write_character(static_cast(0x12)); // int64 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + } } std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 266dd12af..73989769e 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -255,6 +255,29 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From 83b427ad676795d4181c49513aaa46b8352e27eb Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 12:11:21 +0200 Subject: [PATCH 09/36] BSON: unsigned integers --- .../nlohmann/detail/output/binary_writer.hpp | 34 ++++++++- single_include/nlohmann/json.hpp | 34 ++++++++- test/src/unit-bson.cpp | 72 +++++++++++++++++++ 3 files changed, 136 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 24426d489..770c7dd3e 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -723,14 +723,42 @@ class binary_writer std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - if (j.m_value.number_integer <= static_cast((std::numeric_limits::max)())) + auto n = j.m_value.number_integer; + if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x10)); // int32 oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number_little_endian(static_cast(n)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + else + { + oa->write_character(static_cast(0x12)); // int64 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + } + } + + std::size_t write_bson_unsigned(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + auto n = j.m_value.number_integer; + if (n <= static_cast((std::numeric_limits::max)())) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -760,6 +788,8 @@ class binary_writer return write_bson_double(name, j); case value_t::number_integer: return write_bson_integer(name, j); + case value_t::number_unsigned: + return write_bson_unsigned(name, j); case value_t::string: return write_bson_string(name, j); case value_t::null: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c564e57fb..fa1d8e391 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8557,14 +8557,42 @@ class binary_writer std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { - if (j.m_value.number_integer <= static_cast((std::numeric_limits::max)())) + auto n = j.m_value.number_integer; + if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) { oa->write_character(static_cast(0x10)); // int32 oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number_little_endian(static_cast(n)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + } + else + { + oa->write_character(static_cast(0x12)); // int64 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(j.m_value.number_integer)); + + return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + } + } + + std::size_t write_bson_unsigned(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + auto n = j.m_value.number_integer; + if (n <= static_cast((std::numeric_limits::max)())) + { + oa->write_character(static_cast(0x10)); // int32 + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + write_number_little_endian(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -8594,6 +8622,8 @@ class binary_writer return write_bson_double(name, j); case value_t::number_integer: return write_bson_integer(name, j); + case value_t::number_unsigned: + return write_bson_unsigned(name, j); case value_t::string: return write_bson_string(name, j); case value_t::null: diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 73989769e..8de05e807 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -279,5 +279,77 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with negative integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with negative integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with unsigned integer (64-bit) member") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + { "entry", std::uint64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From 5ce7d6bdd7dc28d8d9e8f6d3bc91b541217bdb5d Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 13:03:42 +0200 Subject: [PATCH 10/36] BSON: support objects with objects as members --- .../nlohmann/detail/input/binary_reader.hpp | 9 +++++- .../nlohmann/detail/output/binary_writer.hpp | 17 ++++++++++- single_include/nlohmann/json.hpp | 26 +++++++++++++++-- test/src/unit-bson.cpp | 29 +++++++++++++++++++ 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index d135a4c2e..7deb788a6 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -231,10 +231,17 @@ class binary_reader sax->null(); } break; + case 0x03: // object + { + string_t key; + get_bson_cstr(key); + sax->key(key); + parse_bson_internal(); + } + break; } } - get(); const auto result = sax->end_object(); return result; diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 770c7dd3e..1ec1d7948 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -775,6 +775,18 @@ class binary_writer } } + std::size_t write_bson_object_internal(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x03)); // object + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + auto const embedded_document_size = write_bson_object(j); + + return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -782,6 +794,8 @@ class binary_writer default: JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); break; + case value_t::object: + return write_bson_object_internal(name, j); case value_t::boolean: return write_bson_boolean(name, j); case value_t::number_float: @@ -803,7 +817,7 @@ class binary_writer @param[in] j JSON value to serialize @pre j.type() == value_t::object */ - void write_bson_object(const BasicJsonType& j) + std::size_t write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); auto document_size_offset = oa->reserve_characters(4ul); @@ -816,6 +830,7 @@ class binary_writer oa->write_character(static_cast(0x00)); write_number_little_endian_at(document_size_offset, document_size); + return document_size; } /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index fa1d8e391..6201f047b 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6215,10 +6215,17 @@ class binary_reader sax->null(); } break; + case 0x03: // object + { + string_t key; + get_bson_cstr(key); + sax->key(key); + parse_bson_internal(); + } + break; } } - get(); const auto result = sax->end_object(); return result; @@ -8609,6 +8616,18 @@ class binary_writer } } + std::size_t write_bson_object_internal(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x03)); // object + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + auto const embedded_document_size = write_bson_object(j); + + return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -8616,6 +8635,8 @@ class binary_writer default: JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); break; + case value_t::object: + return write_bson_object_internal(name, j); case value_t::boolean: return write_bson_boolean(name, j); case value_t::number_float: @@ -8637,7 +8658,7 @@ class binary_writer @param[in] j JSON value to serialize @pre j.type() == value_t::object */ - void write_bson_object(const BasicJsonType& j) + std::size_t write_bson_object(const BasicJsonType& j) { assert(j.type() == value_t::object); auto document_size_offset = oa->reserve_characters(4ul); @@ -8650,6 +8671,7 @@ class binary_writer oa->write_character(static_cast(0x00)); write_number_little_endian_at(document_size_offset, document_size); + return document_size; } /*! diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 8de05e807..bdc6fe74e 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -351,5 +351,34 @@ TEST_CASE("BSON") CHECK(json::from_bson(result) == j); CHECK(json::from_bson(result, true, false) == j); } + + SECTION("non-empty object with object member") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + { "entry", json::object() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x03, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From 120d1d77d4371db55dffefa27c1ec16fd648e40a Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 13:40:20 +0200 Subject: [PATCH 11/36] BSON: test case for a more complex document --- .../nlohmann/detail/output/binary_writer.hpp | 2 +- single_include/nlohmann/json.hpp | 2 +- test/src/unit-bson.cpp | 32 +++++++++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 1ec1d7948..aa4f34fe8 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -792,7 +792,7 @@ class binary_writer switch (j.type()) { default: - JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + JSON_THROW(type_error::create(317, "JSON value of type be serialized to requested format: " + std::to_string((int)j.type()))); break; case value_t::object: return write_bson_object_internal(name, j); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6201f047b..1c966bc3e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8633,7 +8633,7 @@ class binary_writer switch (j.type()) { default: - JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + JSON_THROW(type_error::create(317, "JSON value of type be serialized to requested format: " + std::to_string((int)j.type()))); break; case value_t::object: return write_bson_object_internal(name, j); diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index bdc6fe74e..c016466d1 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -380,5 +380,37 @@ TEST_CASE("BSON") CHECK(json::from_bson(result) == j); CHECK(json::from_bson(result, true, false) == j); } + + SECTION("Some more complex document") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector expected = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } } } From cf485c2907e394aafc50e77bc372603467ee9f33 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 13:54:08 +0200 Subject: [PATCH 12/36] BSON: Support for arrays --- .../nlohmann/detail/input/binary_reader.hpp | 49 ++++++++++--- .../nlohmann/detail/output/binary_writer.hpp | 24 ++++++ single_include/nlohmann/json.hpp | 73 ++++++++++++++++--- test/src/unit-bson.cpp | 29 +++++++- 4 files changed, 154 insertions(+), 21 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 7deb788a6..140cd8ab1 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -157,17 +157,8 @@ class binary_reader return success; } - - bool parse_bson_internal() + void parse_bson_entries() { - std::int32_t documentSize; - get_number_little_endian(documentSize); - - if (not JSON_UNLIKELY(sax->start_object(-1))) - { - return false; - } - while (auto entry_type = get()) { switch (entry_type) @@ -239,8 +230,46 @@ class binary_reader parse_bson_internal(); } break; + case 0x04: // array + { + string_t key; + get_bson_cstr(key); + sax->key(key); + parse_bson_array(); + } + break; } } + } + + bool parse_bson_array() + { + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_array(-1))) + { + return false; + } + + parse_bson_entries(); + + const auto result = sax->end_array(); + + return result; + } + + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_object(-1))) + { + return false; + } + + parse_bson_entries(); const auto result = sax->end_object(); diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index aa4f34fe8..24333935d 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -787,6 +787,28 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } + std::size_t write_bson_array(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x04)); // object + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + + auto document_size_offset = oa->reserve_characters(4ul); + std::int32_t embedded_document_size = 5ul; + + for (const auto& el : *j.m_value.array) + { + embedded_document_size += write_bson_object_entry("", el); + } + + oa->write_character(static_cast(0x00)); + write_number_little_endian_at(document_size_offset, embedded_document_size); + + return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -796,6 +818,8 @@ class binary_writer break; case value_t::object: return write_bson_object_internal(name, j); + case value_t::array: + return write_bson_array(name, j); case value_t::boolean: return write_bson_boolean(name, j); case value_t::number_float: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 1c966bc3e..adb0d2941 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6141,17 +6141,8 @@ class binary_reader return success; } - - bool parse_bson_internal() + void parse_bson_entries() { - std::int32_t documentSize; - get_number_little_endian(documentSize); - - if (not JSON_UNLIKELY(sax->start_object(-1))) - { - return false; - } - while (auto entry_type = get()) { switch (entry_type) @@ -6223,8 +6214,46 @@ class binary_reader parse_bson_internal(); } break; + case 0x04: // array + { + string_t key; + get_bson_cstr(key); + sax->key(key); + parse_bson_array(); + } + break; } } + } + + bool parse_bson_array() + { + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_array(-1))) + { + return false; + } + + parse_bson_entries(); + + const auto result = sax->end_array(); + + return result; + } + + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number_little_endian(documentSize); + + if (not JSON_UNLIKELY(sax->start_object(-1))) + { + return false; + } + + parse_bson_entries(); const auto result = sax->end_object(); @@ -8628,6 +8657,28 @@ class binary_writer return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } + std::size_t write_bson_array(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + oa->write_character(static_cast(0x04)); // object + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + + + auto document_size_offset = oa->reserve_characters(4ul); + std::int32_t embedded_document_size = 5ul; + + for (const auto& el : *j.m_value.array) + { + embedded_document_size += write_bson_object_entry("", el); + } + + oa->write_character(static_cast(0x00)); + write_number_little_endian_at(document_size_offset, embedded_document_size); + + return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + } + std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) @@ -8637,6 +8688,8 @@ class binary_writer break; case value_t::object: return write_bson_object_internal(name, j); + case value_t::array: + return write_bson_array(name, j); case value_t::boolean: return write_bson_boolean(name, j); case value_t::number_float: diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index c016466d1..b28861107 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -354,7 +354,6 @@ TEST_CASE("BSON") SECTION("non-empty object with object member") { - // directly encoding uint64 is not supported in bson (only for timestamp values) json j = { { "entry", json::object() } @@ -381,6 +380,34 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with array member") + { + json j = + { + { "entry", json::array() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + SECTION("Some more complex document") { // directly encoding uint64 is not supported in bson (only for timestamp values) From df33a90774c8a63140b49685c47be964a2409167 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 15 Sep 2018 14:08:38 +0200 Subject: [PATCH 13/36] BSON: Bugfix for non-empty arrays --- .../nlohmann/detail/input/binary_reader.hpp | 37 +++++-------------- single_include/nlohmann/json.hpp | 37 +++++-------------- test/src/unit-bson.cpp | 35 ++++++++++++++++++ 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 140cd8ab1..f4049bd3b 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -157,17 +157,21 @@ class binary_reader return success; } - void parse_bson_entries() + void parse_bson_entries(bool is_array) { while (auto entry_type = get()) { + string_t key; + get_bson_cstr(key); + if (!is_array) + { + sax->key(key); + } + switch (entry_type) { case 0x01: // double { - string_t key; - get_bson_cstr(key); - sax->key(key); double number; get_number_little_endian(number); sax->number_float(static_cast(number), ""); @@ -175,9 +179,6 @@ class binary_reader break; case 0x02: // string { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int32_t len; string_t value; get_number_little_endian(len); @@ -188,17 +189,11 @@ class binary_reader break; case 0x08: // boolean { - string_t key; - get_bson_cstr(key); - sax->key(key); sax->boolean(static_cast(get())); } break; case 0x10: // int32 { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int32_t value; get_number_little_endian(value); sax->number_integer(static_cast(value)); @@ -206,9 +201,6 @@ class binary_reader break; case 0x12: // int64 { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int64_t value; get_number_little_endian(value); sax->number_integer(static_cast(value)); @@ -216,25 +208,16 @@ class binary_reader break; case 0x0A: // null { - string_t key; - get_bson_cstr(key); - sax->key(key); sax->null(); } break; case 0x03: // object { - string_t key; - get_bson_cstr(key); - sax->key(key); parse_bson_internal(); } break; case 0x04: // array { - string_t key; - get_bson_cstr(key); - sax->key(key); parse_bson_array(); } break; @@ -252,7 +235,7 @@ class binary_reader return false; } - parse_bson_entries(); + parse_bson_entries(/*is_array*/true); const auto result = sax->end_array(); @@ -269,7 +252,7 @@ class binary_reader return false; } - parse_bson_entries(); + parse_bson_entries(/*is_array*/false); const auto result = sax->end_object(); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index adb0d2941..6f481a8f6 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6141,17 +6141,21 @@ class binary_reader return success; } - void parse_bson_entries() + void parse_bson_entries(bool is_array) { while (auto entry_type = get()) { + string_t key; + get_bson_cstr(key); + if (!is_array) + { + sax->key(key); + } + switch (entry_type) { case 0x01: // double { - string_t key; - get_bson_cstr(key); - sax->key(key); double number; get_number_little_endian(number); sax->number_float(static_cast(number), ""); @@ -6159,9 +6163,6 @@ class binary_reader break; case 0x02: // string { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int32_t len; string_t value; get_number_little_endian(len); @@ -6172,17 +6173,11 @@ class binary_reader break; case 0x08: // boolean { - string_t key; - get_bson_cstr(key); - sax->key(key); sax->boolean(static_cast(get())); } break; case 0x10: // int32 { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int32_t value; get_number_little_endian(value); sax->number_integer(static_cast(value)); @@ -6190,9 +6185,6 @@ class binary_reader break; case 0x12: // int64 { - string_t key; - get_bson_cstr(key); - sax->key(key); std::int64_t value; get_number_little_endian(value); sax->number_integer(static_cast(value)); @@ -6200,25 +6192,16 @@ class binary_reader break; case 0x0A: // null { - string_t key; - get_bson_cstr(key); - sax->key(key); sax->null(); } break; case 0x03: // object { - string_t key; - get_bson_cstr(key); - sax->key(key); parse_bson_internal(); } break; case 0x04: // array { - string_t key; - get_bson_cstr(key); - sax->key(key); parse_bson_array(); } break; @@ -6236,7 +6219,7 @@ class binary_reader return false; } - parse_bson_entries(); + parse_bson_entries(/*is_array*/true); const auto result = sax->end_array(); @@ -6253,7 +6236,7 @@ class binary_reader return false; } - parse_bson_entries(); + parse_bson_entries(/*is_array*/false); const auto result = sax->end_object(); diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index b28861107..fcaf51d6f 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -408,6 +408,41 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with non-empty array member") + { + json j = + { + { "entry", json::array({1, 2, 3, 4, 5, 6, 7, 8}) } + }; + + std::vector expected = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + SECTION("Some more complex document") { // directly encoding uint64 is not supported in bson (only for timestamp values) From 763705c2a7fca0dd3b6569e783403b917ee03b09 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Mon, 24 Sep 2018 19:29:39 +0200 Subject: [PATCH 14/36] Fix: Add missing `begin()` and `end()` member functions to `alt_string` --- test/src/unit-alt-string.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/src/unit-alt-string.cpp b/test/src/unit-alt-string.cpp index 356835c01..ba52d6a42 100644 --- a/test/src/unit-alt-string.cpp +++ b/test/src/unit-alt-string.cpp @@ -102,6 +102,10 @@ class alt_string str_impl.resize(n, c); } + auto begin() -> std::string::iterator { return str_impl.begin(); } + + auto end() -> std::string::iterator { return str_impl.end(); } + template bool operator<(const op_type& op) const { From bce4816275bc4b856643219a5418198ab258c522 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Mon, 24 Sep 2018 23:35:19 +0200 Subject: [PATCH 15/36] BSON: Added test case for the different input/output_adapters --- test/src/unit-alt-string.cpp | 10 +++++-- test/src/unit-bson.cpp | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/test/src/unit-alt-string.cpp b/test/src/unit-alt-string.cpp index ba52d6a42..d866ed703 100644 --- a/test/src/unit-alt-string.cpp +++ b/test/src/unit-alt-string.cpp @@ -102,9 +102,15 @@ class alt_string str_impl.resize(n, c); } - auto begin() -> std::string::iterator { return str_impl.begin(); } + auto begin() -> std::string::iterator + { + return str_impl.begin(); + } - auto end() -> std::string::iterator { return str_impl.end(); } + auto end() -> std::string::iterator + { + return str_impl.end(); + } template bool operator<(const op_type& op) const diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index fcaf51d6f..c701af40e 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -476,3 +476,61 @@ TEST_CASE("BSON") } } } + +TEST_CASE("BSON input/output_adapters") +{ + json json_representation = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector bson_representation = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + json j2; + CHECK_NOTHROW(j2 = json::from_bson(bson_representation)); + + // compare parsed JSON values + CHECK(json_representation == j2); + + SECTION("roundtrips") + { + SECTION("std::ostringstream") + { + std::ostringstream ss; + json::to_bson(json_representation, ss); + std::istringstream iss(ss.str()); + json j3 = json::from_bson(iss); + CHECK(json_representation == j3); + } + + SECTION("std::string") + { + std::string s; + json::to_bson(json_representation, s); + json j3 = json::from_bson(s); + CHECK(json_representation == j3); + } + + SECTION("std::vector") + { + std::vector v; + json::to_bson(json_representation, v); + json j3 = json::from_bson(v); + CHECK(json_representation == j3); + } + } +} From ef358ae695d1d3c6f20681a4a4c3aa7b64b967ad Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Tue, 25 Sep 2018 20:34:25 +0200 Subject: [PATCH 16/36] BSON: Fixed hangup in case of incomplete bson input and improved test coverage --- .../nlohmann/detail/input/binary_reader.hpp | 35 ++-- .../nlohmann/detail/output/binary_writer.hpp | 59 +++--- single_include/nlohmann/json.hpp | 94 +++++----- test/src/unit-bson.cpp | 174 ++++++++++++++++++ 4 files changed, 260 insertions(+), 102 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f4049bd3b..0a95fef5a 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -142,9 +142,9 @@ class binary_reader bool get_bson_cstr(string_t& result) { bool success = true; - generate_until(std::back_inserter(result), [](char c) + generate_until(std::back_inserter(result), [&success](char c) { - return c == 0x00; + return c == 0x00 || !success; }, [this, &success] { get(); @@ -157,12 +157,16 @@ class binary_reader return success; } - void parse_bson_entries(bool is_array) + bool parse_bson_entries(bool is_array) { while (auto entry_type = get()) { string_t key; - get_bson_cstr(key); + if (!get_bson_cstr(key)) + { + return false; + } + if (!is_array) { sax->key(key); @@ -223,6 +227,7 @@ class binary_reader break; } } + return true; } bool parse_bson_array() @@ -230,16 +235,17 @@ class binary_reader std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(-1))) { return false; } - parse_bson_entries(/*is_array*/true); + if (!parse_bson_entries(/*is_array*/true)) + { + return false; + } - const auto result = sax->end_array(); - - return result; + return sax->end_array(); } bool parse_bson_internal() @@ -247,16 +253,17 @@ class binary_reader std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_object(-1))) + if (JSON_UNLIKELY(not sax->start_object(-1))) { return false; } - parse_bson_entries(/*is_array*/false); + if (!parse_bson_entries(/*is_array*/false)) + { + return false; + } - const auto result = sax->end_object(); - - return result; + return sax->end_object(); } /*! diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 24333935d..d9655f93f 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -692,7 +692,7 @@ class binary_writer oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(j.m_value.number_float); + write_number(j.m_value.number_float); return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; } @@ -703,7 +703,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.string->size() + 1ul)); + write_number(static_cast(j.m_value.string->size() + 1ul)); oa->write_characters( reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size() + 1); @@ -731,7 +731,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(n)); + write_number(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -742,7 +742,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number(static_cast(j.m_value.number_integer)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); } @@ -758,7 +758,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(n)); + write_number(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -769,7 +769,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number(static_cast(j.m_value.number_integer)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); } @@ -804,7 +804,7 @@ class binary_writer } oa->write_character(static_cast(0x00)); - write_number_little_endian_at(document_size_offset, embedded_document_size); + write_number_at(document_size_offset, embedded_document_size); return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } @@ -813,9 +813,11 @@ class binary_writer { switch (j.type()) { + // LCOV_EXCL_START default: - JSON_THROW(type_error::create(317, "JSON value of type be serialized to requested format: " + std::to_string((int)j.type()))); + assert(false); break; + // LCOV_EXCL_STOP case value_t::object: return write_bson_object_internal(name, j); case value_t::array: @@ -853,7 +855,7 @@ class binary_writer } oa->write_character(static_cast(0x00)); - write_number_little_endian_at(document_size_offset, document_size); + write_number_at(document_size_offset, document_size); return document_size; } @@ -883,12 +885,14 @@ class binary_writer @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian @note This function needs to respect the system's endianess, because bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template void write_number(const NumberType n) { // step 1: write number to array of length NumberType @@ -896,30 +900,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); - } - - /* - @brief write a number to output in little endian format - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - */ - template - void write_number_little_endian(const NumberType n) - { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (!is_little_endian) + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); @@ -934,20 +915,24 @@ class binary_writer @param[in] offset The offset where to start writing @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian */ - template - void write_number_little_endian_at(std::size_t offset, const NumberType n) + template + void write_number_at(std::size_t offset, const NumberType n) { // step 1: write number to array of length NumberType std::array vec; std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (!is_little_endian) + // LCOV_EXCL_START + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); } + // LCOV_EXCL_STOP oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 6f481a8f6..3f7cd4e70 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6126,9 +6126,9 @@ class binary_reader bool get_bson_cstr(string_t& result) { bool success = true; - generate_until(std::back_inserter(result), [](char c) + generate_until(std::back_inserter(result), [&success](char c) { - return c == 0x00; + return c == 0x00 || !success; }, [this, &success] { get(); @@ -6141,12 +6141,16 @@ class binary_reader return success; } - void parse_bson_entries(bool is_array) + bool parse_bson_entries(bool is_array) { while (auto entry_type = get()) { string_t key; - get_bson_cstr(key); + if (!get_bson_cstr(key)) + { + return false; + } + if (!is_array) { sax->key(key); @@ -6207,6 +6211,7 @@ class binary_reader break; } } + return true; } bool parse_bson_array() @@ -6214,16 +6219,17 @@ class binary_reader std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(-1))) { return false; } - parse_bson_entries(/*is_array*/true); + if (!parse_bson_entries(/*is_array*/true)) + { + return false; + } - const auto result = sax->end_array(); - - return result; + return sax->end_array(); } bool parse_bson_internal() @@ -6231,16 +6237,17 @@ class binary_reader std::int32_t documentSize; get_number_little_endian(documentSize); - if (not JSON_UNLIKELY(sax->start_object(-1))) + if (JSON_UNLIKELY(not sax->start_object(-1))) { return false; } - parse_bson_entries(/*is_array*/false); + if (!parse_bson_entries(/*is_array*/false)) + { + return false; + } - const auto result = sax->end_object(); - - return result; + return sax->end_object(); } /*! @@ -8545,7 +8552,7 @@ class binary_writer oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(j.m_value.number_float); + write_number(j.m_value.number_float); return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; } @@ -8556,7 +8563,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.string->size() + 1ul)); + write_number(static_cast(j.m_value.string->size() + 1ul)); oa->write_characters( reinterpret_cast(j.m_value.string->c_str()), j.m_value.string->size() + 1); @@ -8584,7 +8591,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(n)); + write_number(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -8595,7 +8602,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number(static_cast(j.m_value.number_integer)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); } @@ -8611,7 +8618,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(n)); + write_number(static_cast(n)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); } @@ -8622,7 +8629,7 @@ class binary_writer reinterpret_cast(name.c_str()), name.size() + 1u); - write_number_little_endian(static_cast(j.m_value.number_integer)); + write_number(static_cast(j.m_value.number_integer)); return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); } @@ -8657,7 +8664,7 @@ class binary_writer } oa->write_character(static_cast(0x00)); - write_number_little_endian_at(document_size_offset, embedded_document_size); + write_number_at(document_size_offset, embedded_document_size); return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } @@ -8666,9 +8673,11 @@ class binary_writer { switch (j.type()) { + // LCOV_EXCL_START default: - JSON_THROW(type_error::create(317, "JSON value of type be serialized to requested format: " + std::to_string((int)j.type()))); + assert(false); break; + // LCOV_EXCL_STOP case value_t::object: return write_bson_object_internal(name, j); case value_t::array: @@ -8706,7 +8715,7 @@ class binary_writer } oa->write_character(static_cast(0x00)); - write_number_little_endian_at(document_size_offset, document_size); + write_number_at(document_size_offset, document_size); return document_size; } @@ -8736,12 +8745,14 @@ class binary_writer @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian @note This function needs to respect the system's endianess, because bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template void write_number(const NumberType n) { // step 1: write number to array of length NumberType @@ -8749,30 +8760,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); - } - - /* - @brief write a number to output in little endian format - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - */ - template - void write_number_little_endian(const NumberType n) - { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (!is_little_endian) + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); @@ -8787,20 +8775,24 @@ class binary_writer @param[in] offset The offset where to start writing @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian */ - template - void write_number_little_endian_at(std::size_t offset, const NumberType n) + template + void write_number_at(std::size_t offset, const NumberType n) { // step 1: write number to array of length NumberType std::array vec; std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (!is_little_endian) + // LCOV_EXCL_START + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); } + // LCOV_EXCL_STOP oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); } diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index c701af40e..cbb6785e9 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -352,6 +352,31 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("non-empty object with small unsigned integer member") + { + json j = + { + { "entry", std::uint64_t{0x42} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x42, 0x00, 0x00, 0x00, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with object member") { json j = @@ -534,3 +559,152 @@ TEST_CASE("BSON input/output_adapters") } } } + + + + + +class SaxCountdown +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() + { + return events_left-- > 0; + } + + bool boolean(bool) + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) + { + return events_left-- > 0; + } + + bool string(std::string&) + { + return events_left-- > 0; + } + + bool start_object(std::size_t) + { + return events_left-- > 0; + } + + bool key(std::string&) + { + return events_left-- > 0; + } + + bool end_object() + { + return events_left-- > 0; + } + + bool start_array(std::size_t) + { + return events_left-- > 0; + } + + bool end_array() + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) + { + return false; + } + + private: + int events_left = 0; +}; + + +TEST_CASE("Incomplete BSON INPUT") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't' // unexpected EOF + }; + + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at 9: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + +TEST_CASE("Incomplete BSON INPUT 2") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean, unexpected EOF + }; + + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at 6: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + +TEST_CASE("Incomplete BSON INPUT 3") +{ + std::vector incomplete_bson = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 + // missing input data... + }; + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at 29: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(1); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + + +TEST_CASE("Incomplete BSON INPUT 4") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, // size (incomplete), unexpected EOF + }; + + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at 3: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + From 0a09db9cc24fe23e226fd15ced3182068dba06ae Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 29 Sep 2018 11:33:01 +0200 Subject: [PATCH 17/36] BSON: Extend `binary_reader::get_number` to be able to hanlde little endian input to get rid of `binary_reader::get_number_little_endian` --- .../nlohmann/detail/input/binary_reader.hpp | 44 ++++--------------- single_include/nlohmann/json.hpp | 44 ++++--------------- 2 files changed, 16 insertions(+), 72 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 0a95fef5a..d68a60919 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -177,7 +177,7 @@ class binary_reader case 0x01: // double { double number; - get_number_little_endian(number); + get_number(number); sax->number_float(static_cast(number), ""); } break; @@ -185,7 +185,7 @@ class binary_reader { std::int32_t len; string_t value; - get_number_little_endian(len); + get_number(len); get_string(len - 1ul, value); get(); sax->string(value); @@ -199,14 +199,14 @@ class binary_reader case 0x10: // int32 { std::int32_t value; - get_number_little_endian(value); + get_number(value); sax->number_integer(static_cast(value)); } break; case 0x12: // int64 { std::int64_t value; - get_number_little_endian(value); + get_number(value); sax->number_integer(static_cast(value)); } break; @@ -233,7 +233,7 @@ class binary_reader bool parse_bson_array() { std::int32_t documentSize; - get_number_little_endian(documentSize); + get_number(documentSize); if (JSON_UNLIKELY(not sax->start_array(-1))) { @@ -251,7 +251,7 @@ class binary_reader bool parse_bson_internal() { std::int32_t documentSize; - get_number_little_endian(documentSize); + get_number(documentSize); if (JSON_UNLIKELY(not sax->start_object(-1))) { @@ -1016,7 +1016,7 @@ class binary_reader bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template bool get_number(NumberType& result) { // step 1: read input into array with system's byte order @@ -1030,7 +1030,7 @@ class binary_reader } // reverse byte order prior to conversion if necessary - if (is_little_endian) + if (is_little_endian && !InputIsLittleEndian) { vec[sizeof(NumberType) - i - 1] = static_cast(current); } @@ -1045,34 +1045,6 @@ class binary_reader return true; } - template - bool get_number_little_endian(NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (!is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } /*! @brief create a string by reading characters from the input diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 3f7cd4e70..ac11591a2 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6161,7 +6161,7 @@ class binary_reader case 0x01: // double { double number; - get_number_little_endian(number); + get_number(number); sax->number_float(static_cast(number), ""); } break; @@ -6169,7 +6169,7 @@ class binary_reader { std::int32_t len; string_t value; - get_number_little_endian(len); + get_number(len); get_string(len - 1ul, value); get(); sax->string(value); @@ -6183,14 +6183,14 @@ class binary_reader case 0x10: // int32 { std::int32_t value; - get_number_little_endian(value); + get_number(value); sax->number_integer(static_cast(value)); } break; case 0x12: // int64 { std::int64_t value; - get_number_little_endian(value); + get_number(value); sax->number_integer(static_cast(value)); } break; @@ -6217,7 +6217,7 @@ class binary_reader bool parse_bson_array() { std::int32_t documentSize; - get_number_little_endian(documentSize); + get_number(documentSize); if (JSON_UNLIKELY(not sax->start_array(-1))) { @@ -6235,7 +6235,7 @@ class binary_reader bool parse_bson_internal() { std::int32_t documentSize; - get_number_little_endian(documentSize); + get_number(documentSize); if (JSON_UNLIKELY(not sax->start_object(-1))) { @@ -7000,7 +7000,7 @@ class binary_reader bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template bool get_number(NumberType& result) { // step 1: read input into array with system's byte order @@ -7014,7 +7014,7 @@ class binary_reader } // reverse byte order prior to conversion if necessary - if (is_little_endian) + if (is_little_endian && !InputIsLittleEndian) { vec[sizeof(NumberType) - i - 1] = static_cast(current); } @@ -7029,34 +7029,6 @@ class binary_reader return true; } - template - bool get_number_little_endian(NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (!is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } /*! @brief create a string by reading characters from the input From e8730e5e82493d82c9dae8bd73ae6eedc9267de5 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sat, 29 Sep 2018 11:50:01 +0200 Subject: [PATCH 18/36] BSON: Reworked `binary_reader::get_bson_cstr()` --- .../nlohmann/detail/input/binary_reader.hpp | 31 +++++++------------ single_include/nlohmann/json.hpp | 31 +++++++------------ 2 files changed, 22 insertions(+), 40 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index d68a60919..0b718dd49 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -125,36 +125,27 @@ class binary_reader private: - template - OutputIt generate_until(OutputIt&& d_first, UnaryPredicate&& pred, Gen&& gen) - { - for (auto x = gen(); !pred(x); x = gen()) - { - *d_first++ = x; - } - - return d_first; - } - /*! @return whether array creation completed */ bool get_bson_cstr(string_t& result) { - bool success = true; - generate_until(std::back_inserter(result), [&success](char c) - { - return c == 0x00 || !success; - }, [this, &success] + auto out = std::back_inserter(result); + while (true) { get(); if (JSON_UNLIKELY(not unexpect_eof())) { - success = false; + return false; } - return static_cast(current); - }); - return success; + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; } bool parse_bson_entries(bool is_array) diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index ac11591a2..a661ecb6d 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6109,36 +6109,27 @@ class binary_reader private: - template - OutputIt generate_until(OutputIt&& d_first, UnaryPredicate&& pred, Gen&& gen) - { - for (auto x = gen(); !pred(x); x = gen()) - { - *d_first++ = x; - } - - return d_first; - } - /*! @return whether array creation completed */ bool get_bson_cstr(string_t& result) { - bool success = true; - generate_until(std::back_inserter(result), [&success](char c) - { - return c == 0x00 || !success; - }, [this, &success] + auto out = std::back_inserter(result); + while (true) { get(); if (JSON_UNLIKELY(not unexpect_eof())) { - success = false; + return false; } - return static_cast(current); - }); - return success; + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; } bool parse_bson_entries(bool is_array) From 81f4b34e068df134447c2e13fd05fd4c23f8018f Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sun, 7 Oct 2018 07:52:12 +0200 Subject: [PATCH 19/36] BSON: Improved documentation and error handling/reporting --- include/nlohmann/detail/exceptions.hpp | 1 + .../nlohmann/detail/input/binary_reader.hpp | 179 ++++++++---- .../nlohmann/detail/output/binary_writer.hpp | 2 +- include/nlohmann/json.hpp | 79 +++++- single_include/nlohmann/json.hpp | 261 ++++++++++++++---- test/src/unit-bson.cpp | 19 ++ 6 files changed, 421 insertions(+), 120 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index 274a88c7f..7edc00326 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -91,6 +91,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 0b718dd49..2b3ff1abe 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -126,7 +126,12 @@ class binary_reader private: /*! - @return whether array creation completed + @brief Parses a C-style string from the BSON input. + @param [out] result A reference to the string variable where the read string + is to be stored. + @return `true` if the \x00-byte indicating the end of the + string was encountered before the EOF. + `false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -148,12 +153,112 @@ class binary_reader return true; } - bool parse_bson_entries(bool is_array) + /*! + @brief Parses a zero-terminated string of length @a len from the BSON input. + @param [in] len The length (including the zero-byte at the end) of the string to be read. + @param [out] result A reference to the string variable where the read string + is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) { - while (auto entry_type = get()) + return get_string(len - static_cast(1), result) + && get() != std::char_traits::eof(); + } + + /*! + @return A hexadecimal string representation of the given @a byte + @param byte The byte to convert to a string + */ + static std::string byte_hexstring(unsigned char byte) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%02hhX", byte); + return std::string{cr}; + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param element_type_parse_position The position in the input stream, where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported @a element_type will + give rise to a parse_error.114: Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + { + switch (element_type) { + case 0x01: // double + { + double number; + return get_number(number) + && sax->number_float(static_cast(number), ""); + } + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(len) + && get_bson_string(len, value) + && sax->string(value); + } + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + case 0x10: // int32 + { + std::int32_t value; + return get_number(value) + && sax->number_integer(static_cast(value)); + } + case 0x12: // int64 + { + std::int64_t value; + return get_number(value) + && sax->number_integer(static_cast(value)); + } + case 0x0A: // null + { + return sax->null(); + } + case 0x03: // object + { + return parse_bson_internal(); + } + case 0x04: // array + { + return parse_bson_array(); + } + default: // anything else not supported (yet) + { + auto element_type_str = byte_hexstring(element_type); + return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) from the input + and passes it to the SAX-parser. + The same binary layout is used for objects and arrays, hence it must + be indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + @param is_array Determines if the element list being read is to be treated as + an object (@a is_array == false), or as an array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(bool is_array) + { + while (auto element_type = get()) + { + const std::size_t element_type_parse_position = chars_read; string_t key; - if (!get_bson_cstr(key)) + if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } @@ -163,64 +268,18 @@ class binary_reader sax->key(key); } - switch (entry_type) + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) { - case 0x01: // double - { - double number; - get_number(number); - sax->number_float(static_cast(number), ""); - } - break; - case 0x02: // string - { - std::int32_t len; - string_t value; - get_number(len); - get_string(len - 1ul, value); - get(); - sax->string(value); - } - break; - case 0x08: // boolean - { - sax->boolean(static_cast(get())); - } - break; - case 0x10: // int32 - { - std::int32_t value; - get_number(value); - sax->number_integer(static_cast(value)); - } - break; - case 0x12: // int64 - { - std::int64_t value; - get_number(value); - sax->number_integer(static_cast(value)); - } - break; - case 0x0A: // null - { - sax->null(); - } - break; - case 0x03: // object - { - parse_bson_internal(); - } - break; - case 0x04: // array - { - parse_bson_array(); - } - break; + return false; } } return true; } + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ bool parse_bson_array() { std::int32_t documentSize; @@ -231,7 +290,7 @@ class binary_reader return false; } - if (!parse_bson_entries(/*is_array*/true)) + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) { return false; } @@ -239,6 +298,10 @@ class binary_reader return sax->end_array(); } + /*! + @brief Reads in a BSON-object and pass it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ bool parse_bson_internal() { std::int32_t documentSize; @@ -249,7 +312,7 @@ class binary_reader return false; } - if (!parse_bson_entries(/*is_array*/false)) + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) { return false; } diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index d9655f93f..363bb9b25 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -868,7 +868,7 @@ class binary_writer switch (j.type()) { default: - JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); break; case value_t::discarded: break; diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 8b6a01707..c5d2e0dbe 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6591,7 +6591,13 @@ class basic_json } - + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + @param j The JSON object to convert to BSON. + @return The BSON representation of the JSON input `j`. + @pre The input `j` shall be an object: `j.is_object() == true` + */ static std::vector to_bson(const basic_json& j) { std::vector result; @@ -6599,11 +6605,21 @@ class basic_json return result; } + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + */ static void to_bson(const basic_json& j, detail::output_adapter o) { binary_writer(o).write_bson(j); } + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ static void to_bson(const basic_json& j, detail::output_adapter o) { binary_writer(o).write_bson(j); @@ -6804,6 +6820,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -6889,6 +6907,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ @@ -6920,7 +6940,61 @@ class basic_json + /*! + @brief Create a JSON value from an input in BSON format + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ static basic_json from_bson(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) @@ -6931,6 +7005,9 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ template::value, int> = 0> static basic_json from_bson(A1 && a1, A2 && a2, diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index a661ecb6d..52fd79ca7 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -690,6 +690,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -6110,7 +6111,12 @@ class binary_reader private: /*! - @return whether array creation completed + @brief Parses a C-style string from the BSON input. + @param [out] result A reference to the string variable where the read string + is to be stored. + @return `true` if the \x00-byte indicating the end of the + string was encountered before the EOF. + `false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -6132,12 +6138,112 @@ class binary_reader return true; } - bool parse_bson_entries(bool is_array) + /*! + @brief Parses a zero-terminated string of length @a len from the BSON input. + @param [in] len The length (including the zero-byte at the end) of the string to be read. + @param [out] result A reference to the string variable where the read string + is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) { - while (auto entry_type = get()) + return get_string(len - static_cast(1), result) + && get() != std::char_traits::eof(); + } + + /*! + @return A hexadecimal string representation of the given @a byte + @param byte The byte to convert to a string + */ + static std::string byte_hexstring(unsigned char byte) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%02hhX", byte); + return std::string{cr}; + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param element_type_parse_position The position in the input stream, where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported @a element_type will + give rise to a parse_error.114: Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + { + switch (element_type) { + case 0x01: // double + { + double number; + return get_number(number) + && sax->number_float(static_cast(number), ""); + } + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(len) + && get_bson_string(len, value) + && sax->string(value); + } + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + case 0x10: // int32 + { + std::int32_t value; + return get_number(value) + && sax->number_integer(static_cast(value)); + } + case 0x12: // int64 + { + std::int64_t value; + return get_number(value) + && sax->number_integer(static_cast(value)); + } + case 0x0A: // null + { + return sax->null(); + } + case 0x03: // object + { + return parse_bson_internal(); + } + case 0x04: // array + { + return parse_bson_array(); + } + default: // anything else not supported (yet) + { + auto element_type_str = byte_hexstring(element_type); + return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) from the input + and passes it to the SAX-parser. + The same binary layout is used for objects and arrays, hence it must + be indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + @param is_array Determines if the element list being read is to be treated as + an object (@a is_array == false), or as an array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(bool is_array) + { + while (auto element_type = get()) + { + const std::size_t element_type_parse_position = chars_read; string_t key; - if (!get_bson_cstr(key)) + if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } @@ -6147,64 +6253,18 @@ class binary_reader sax->key(key); } - switch (entry_type) + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) { - case 0x01: // double - { - double number; - get_number(number); - sax->number_float(static_cast(number), ""); - } - break; - case 0x02: // string - { - std::int32_t len; - string_t value; - get_number(len); - get_string(len - 1ul, value); - get(); - sax->string(value); - } - break; - case 0x08: // boolean - { - sax->boolean(static_cast(get())); - } - break; - case 0x10: // int32 - { - std::int32_t value; - get_number(value); - sax->number_integer(static_cast(value)); - } - break; - case 0x12: // int64 - { - std::int64_t value; - get_number(value); - sax->number_integer(static_cast(value)); - } - break; - case 0x0A: // null - { - sax->null(); - } - break; - case 0x03: // object - { - parse_bson_internal(); - } - break; - case 0x04: // array - { - parse_bson_array(); - } - break; + return false; } } return true; } + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ bool parse_bson_array() { std::int32_t documentSize; @@ -6215,7 +6275,7 @@ class binary_reader return false; } - if (!parse_bson_entries(/*is_array*/true)) + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) { return false; } @@ -6223,6 +6283,10 @@ class binary_reader return sax->end_array(); } + /*! + @brief Reads in a BSON-object and pass it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ bool parse_bson_internal() { std::int32_t documentSize; @@ -6233,7 +6297,7 @@ class binary_reader return false; } - if (!parse_bson_entries(/*is_array*/false)) + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) { return false; } @@ -8691,7 +8755,7 @@ class binary_writer switch (j.type()) { default: - JSON_THROW(type_error::create(317, "JSON value cannot be serialized to requested format")); + JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); break; case value_t::discarded: break; @@ -18080,7 +18144,13 @@ class basic_json } - + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + @param j The JSON object to convert to BSON. + @return The BSON representation of the JSON input `j`. + @pre The input `j` shall be an object: `j.is_object() == true` + */ static std::vector to_bson(const basic_json& j) { std::vector result; @@ -18088,11 +18158,21 @@ class basic_json return result; } + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + */ static void to_bson(const basic_json& j, detail::output_adapter o) { binary_writer(o).write_bson(j); } + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ static void to_bson(const basic_json& j, detail::output_adapter o) { binary_writer(o).write_bson(j); @@ -18293,6 +18373,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -18378,6 +18460,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @allow_exceptions parameter since 3.2.0 */ @@ -18409,7 +18493,61 @@ class basic_json + /*! + @brief Create a JSON value from an input in BSON format + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ static basic_json from_bson(detail::input_adapter&& i, const bool strict = true, const bool allow_exceptions = true) @@ -18420,6 +18558,9 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ template::value, int> = 0> static basic_json from_bson(A1 && a1, A2 && a2, diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index cbb6785e9..33ef25c43 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -708,3 +708,22 @@ TEST_CASE("Incomplete BSON INPUT 4") } +TEST_CASE("Unsupported BSON input") +{ + std::vector bson = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0xFF, // entry type: Min key (not supported yet) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + CHECK_THROWS_WITH(json::from_bson(bson), + "[json.exception.parse_error.114] parse error at 5: Unsupported BSON record type 0xFF"); + CHECK(json::from_bson(bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson)); +} + + From 062aeaf7b654f1573c57929d229861845031185b Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sun, 7 Oct 2018 17:57:13 +0200 Subject: [PATCH 20/36] BSON: Reworked the `binary_writer` such that it precomputes the size of the BSON-output. This way, the output_adapter can work on simple output iterators and no longer requires random access iterators. --- .../nlohmann/detail/output/binary_writer.hpp | 345 ++++++++------ .../detail/output/output_adapters.hpp | 43 -- include/nlohmann/json.hpp | 43 +- single_include/nlohmann/json.hpp | 431 ++++++++++-------- test/src/unit-alt-string.cpp | 10 - 5 files changed, 488 insertions(+), 384 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 363bb9b25..bce5116dc 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -9,6 +9,7 @@ #include #include + namespace nlohmann { namespace detail @@ -676,187 +677,278 @@ class binary_writer } } - std::size_t write_bson_boolean(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - oa->write_character(static_cast(0x08)); // boolean + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - oa->write_character(j.m_value.boolean ? static_cast(0x01) : static_cast(0x00)); - return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; } - std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) { - oa->write_character(static_cast(0x01)); // double - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - write_number(j.m_value.number_float); - return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); } - std::size_t write_bson_string(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, const double value) { - oa->write_character(static_cast(0x02)); // string (UTF-8) - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.string->size() + 1ul)); - oa->write_characters( - reinterpret_cast(j.m_value.string->c_str()), - j.m_value.string->size() + 1); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; + write_bson_entry_header(name, 0x01); + write_number(value); } - std::size_t write_bson_null(const typename BasicJsonType::string_t& name, const BasicJsonType&) + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) { - oa->write_character(static_cast(0x0A)); // null - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - return /*id*/ 1ul + name.size() + 1ul; + return sizeof(std::int32_t) + value.size() + 1ul; } - std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) { - auto n = j.m_value.number_integer; - if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(n)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + return sizeof(std::int32_t); } else { - oa->write_character(static_cast(0x12)); // int64 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.number_integer)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + return sizeof(std::int64_t); } } - std::size_t write_bson_unsigned(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) { - auto n = j.m_value.number_integer; - if (n <= static_cast((std::numeric_limits::max)())) + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(n)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); } else { - oa->write_character(static_cast(0x12)); // int64 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.number_integer)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); } } - std::size_t write_bson_object_internal(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) { - oa->write_character(static_cast(0x03)); // object - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - auto const embedded_document_size = write_bson_object(j); - - return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } } - std::size_t write_bson_array(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) { - oa->write_character(static_cast(0x04)); // object - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - - auto document_size_offset = oa->reserve_characters(4ul); - std::int32_t embedded_document_size = 5ul; - - for (const auto& el : *j.m_value.array) + if (value <= static_cast((std::numeric_limits::max)())) { - embedded_document_size += write_bson_object_entry("", el); + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size("", el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(calc_bson_array_size(value)); + + for (const auto& el : value) + { + write_bson_element("", el); } oa->write_character(static_cast(0x00)); - write_number_at(document_size_offset, embedded_document_size); - - return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } - std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::boolean: + return header_size + 1ul; + case value_t::number_float: + return header_size + 8ul; + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + case value_t::null: + return header_size + 0ul; + }; + } + + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. + @param name The name to associate with the JSON entity @a j within the current BSON document + @return The size of the bson entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) { // LCOV_EXCL_START default: assert(false); - break; + return; // LCOV_EXCL_STOP case value_t::object: - return write_bson_object_internal(name, j); + return write_bson_object_entry(name, *j.m_value.object); case value_t::array: - return write_bson_array(name, j); + return write_bson_array(name, *j.m_value.array); case value_t::boolean: - return write_bson_boolean(name, j); + return write_bson_boolean(name, j.m_value.boolean); case value_t::number_float: - return write_bson_double(name, j); + return write_bson_double(name, j.m_value.number_float); case value_t::number_integer: - return write_bson_integer(name, j); + return write_bson_integer(name, j.m_value.number_integer); case value_t::number_unsigned: - return write_bson_unsigned(name, j); + return write_bson_unsigned(name, j.m_value.number_unsigned); case value_t::string: - return write_bson_string(name, j); + return write_bson_string(name, *j.m_value.string); case value_t::null: - return write_bson_null(name, j); + return write_bson_null(name); }; + } - return 0ul; + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; } /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object */ - std::size_t write_bson_object(const BasicJsonType& j) + void write_bson_object(const typename BasicJsonType::object_t& value) { - assert(j.type() == value_t::object); - auto document_size_offset = oa->reserve_characters(4ul); - std::int32_t document_size = 5ul; + write_number(calc_bson_object_size(value)); - for (const auto& el : *j.m_value.object) + for (const auto& el : value) { - document_size += write_bson_object_entry(el.first, el.second); + write_bson_element(el.first, el.second); } oa->write_character(static_cast(0x00)); - write_number_at(document_size_offset, document_size); - return document_size; } /*! @@ -873,7 +965,7 @@ class binary_writer case value_t::discarded: break; case value_t::object: - write_bson_object(j); + write_bson_object(*j.m_value.object); break; } } @@ -909,35 +1001,6 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } - /* - @brief write a number to output in little endian format - - @param[in] offset The offset where to start writing - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - */ - template - void write_number_at(std::size_t offset, const NumberType n) - { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - // LCOV_EXCL_START - if (is_little_endian && !OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - // LCOV_EXCL_STOP - - oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); - } - - // UBJSON: write number (floating point) template::value, int>::type = 0> diff --git a/include/nlohmann/detail/output/output_adapters.hpp b/include/nlohmann/detail/output/output_adapters.hpp index 64960011a..ff86a6e19 100644 --- a/include/nlohmann/detail/output/output_adapters.hpp +++ b/include/nlohmann/detail/output/output_adapters.hpp @@ -18,8 +18,6 @@ template struct output_adapter_protocol { virtual void write_character(CharType c) = 0; virtual void write_characters(const CharType* s, std::size_t length) = 0; - virtual void write_characters_at(std::size_t position, const CharType* s, std::size_t length) = 0; - virtual std::size_t reserve_characters(std::size_t length) = 0; virtual ~output_adapter_protocol() = default; }; @@ -44,18 +42,6 @@ class output_vector_adapter : public output_adapter_protocol std::copy(s, s + length, std::back_inserter(v)); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - std::copy(s, s + length, std::begin(v) + position); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = v.size(); - std::fill_n(std::back_inserter(v), length, static_cast(0x00)); - return position; - } - private: std::vector& v; }; @@ -77,22 +63,6 @@ class output_stream_adapter : public output_adapter_protocol stream.write(s, static_cast(length)); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - const auto orig_offset = stream.tellp(); - stream.seekp(static_cast::pos_type>(position)); - stream.write(s, static_cast(length)); - stream.seekp(orig_offset); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = stream.tellp(); - std::vector empty(length, static_cast(0)); - stream.write(empty.data(), length); - return static_cast(position); - } - private: std::basic_ostream& stream; }; @@ -114,19 +84,6 @@ class output_string_adapter : public output_adapter_protocol str.append(s, length); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - std::copy(s, s + length, std::begin(str) + position); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = str.size(); - std::fill_n(std::back_inserter(str), length, static_cast(0x00)); - return position; - } - - private: StringType& str; }; diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index c5d2e0dbe..1b61e9950 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6594,9 +6594,45 @@ class basic_json /*! @brief Serializes the given JSON object `j` to BSON and returns a vector containing the corresponding BSON-representation. - @param j The JSON object to convert to BSON. - @return The BSON representation of the JSON input `j`. - @pre The input `j` shall be an object: `j.is_object() == true` + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + + @pre The input `j` is required to be an object: `j.is_object() == true` + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format */ static std::vector to_bson(const basic_json& j) { @@ -6611,6 +6647,7 @@ class basic_json @param j The JSON object to convert to BSON. @param o The output adapter that receives the binary BSON representation. @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) */ static void to_bson(const basic_json& j, detail::output_adapter o) { diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 52fd79ca7..48b17d17d 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -5839,8 +5839,6 @@ template struct output_adapter_protocol { virtual void write_character(CharType c) = 0; virtual void write_characters(const CharType* s, std::size_t length) = 0; - virtual void write_characters_at(std::size_t position, const CharType* s, std::size_t length) = 0; - virtual std::size_t reserve_characters(std::size_t length) = 0; virtual ~output_adapter_protocol() = default; }; @@ -5865,18 +5863,6 @@ class output_vector_adapter : public output_adapter_protocol std::copy(s, s + length, std::back_inserter(v)); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - std::copy(s, s + length, std::begin(v) + position); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = v.size(); - std::fill_n(std::back_inserter(v), length, static_cast(0x00)); - return position; - } - private: std::vector& v; }; @@ -5898,22 +5884,6 @@ class output_stream_adapter : public output_adapter_protocol stream.write(s, static_cast(length)); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - const auto orig_offset = stream.tellp(); - stream.seekp(static_cast::pos_type>(position)); - stream.write(s, static_cast(length)); - stream.seekp(orig_offset); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = stream.tellp(); - std::vector empty(length, static_cast(0)); - stream.write(empty.data(), length); - return static_cast(position); - } - private: std::basic_ostream& stream; }; @@ -5935,19 +5905,6 @@ class output_string_adapter : public output_adapter_protocol str.append(s, length); } - void write_characters_at(std::size_t position, const CharType* s, std::size_t length) override - { - std::copy(s, s + length, std::begin(str) + position); - } - - std::size_t reserve_characters(std::size_t length) override - { - const auto position = str.size(); - std::fill_n(std::back_inserter(str), length, static_cast(0x00)); - return position; - } - - private: StringType& str; }; @@ -7896,6 +7853,7 @@ class binary_reader // #include + namespace nlohmann { namespace detail @@ -8563,187 +8521,278 @@ class binary_writer } } - std::size_t write_bson_boolean(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - oa->write_character(static_cast(0x08)); // boolean + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean oa->write_characters( reinterpret_cast(name.c_str()), name.size() + 1u); - oa->write_character(j.m_value.boolean ? static_cast(0x01) : static_cast(0x00)); - return /*id*/ 1ul + name.size() + 1u + /*boolean value*/ 1u; } - std::size_t write_bson_double(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) { - oa->write_character(static_cast(0x01)); // double - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - write_number(j.m_value.number_float); - return /*id*/ 1ul + name.size() + 1u + /*double value*/ 8u; + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); } - std::size_t write_bson_string(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, const double value) { - oa->write_character(static_cast(0x02)); // string (UTF-8) - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.string->size() + 1ul)); - oa->write_characters( - reinterpret_cast(j.m_value.string->c_str()), - j.m_value.string->size() + 1); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t) + j.m_value.string->size() + 1ul; + write_bson_entry_header(name, 0x01); + write_number(value); } - std::size_t write_bson_null(const typename BasicJsonType::string_t& name, const BasicJsonType&) + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) { - oa->write_character(static_cast(0x0A)); // null - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - return /*id*/ 1ul + name.size() + 1ul; + return sizeof(std::int32_t) + value.size() + 1ul; } - std::size_t write_bson_integer(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) { - auto n = j.m_value.number_integer; - if ((std::numeric_limits::min)() <= n and n <= (std::numeric_limits::max)()) + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(n)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + return sizeof(std::int32_t); } else { - oa->write_character(static_cast(0x12)); // int64 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.number_integer)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + return sizeof(std::int64_t); } } - std::size_t write_bson_unsigned(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) { - auto n = j.m_value.number_integer; - if (n <= static_cast((std::numeric_limits::max)())) + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x10)); // int32 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(n)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int32_t); + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); } else { - oa->write_character(static_cast(0x12)); // int64 - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - write_number(static_cast(j.m_value.number_integer)); - - return /*id*/ 1ul + name.size() + 1ul + sizeof(std::int64_t); + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); } } - std::size_t write_bson_object_internal(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) { - oa->write_character(static_cast(0x03)); // object - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - auto const embedded_document_size = write_bson_object(j); - - return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } } - std::size_t write_bson_array(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) { - oa->write_character(static_cast(0x04)); // object - oa->write_characters( - reinterpret_cast(name.c_str()), - name.size() + 1u); - - - auto document_size_offset = oa->reserve_characters(4ul); - std::int32_t embedded_document_size = 5ul; - - for (const auto& el : *j.m_value.array) + if (value <= static_cast((std::numeric_limits::max)())) { - embedded_document_size += write_bson_object_entry("", el); + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size("", el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(calc_bson_array_size(value)); + + for (const auto& el : value) + { + write_bson_element("", el); } oa->write_character(static_cast(0x00)); - write_number_at(document_size_offset, embedded_document_size); - - return /*id*/ 1ul + name.size() + 1ul + embedded_document_size; } - std::size_t write_bson_object_entry(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::boolean: + return header_size + 1ul; + case value_t::number_float: + return header_size + 8ul; + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + case value_t::null: + return header_size + 0ul; + }; + } + + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. + @param name The name to associate with the JSON entity @a j within the current BSON document + @return The size of the bson entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) { switch (j.type()) { // LCOV_EXCL_START default: assert(false); - break; + return; // LCOV_EXCL_STOP case value_t::object: - return write_bson_object_internal(name, j); + return write_bson_object_entry(name, *j.m_value.object); case value_t::array: - return write_bson_array(name, j); + return write_bson_array(name, *j.m_value.array); case value_t::boolean: - return write_bson_boolean(name, j); + return write_bson_boolean(name, j.m_value.boolean); case value_t::number_float: - return write_bson_double(name, j); + return write_bson_double(name, j.m_value.number_float); case value_t::number_integer: - return write_bson_integer(name, j); + return write_bson_integer(name, j.m_value.number_integer); case value_t::number_unsigned: - return write_bson_unsigned(name, j); + return write_bson_unsigned(name, j.m_value.number_unsigned); case value_t::string: - return write_bson_string(name, j); + return write_bson_string(name, *j.m_value.string); case value_t::null: - return write_bson_null(name, j); + return write_bson_null(name); }; + } - return 0ul; + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; } /*! @param[in] j JSON value to serialize @pre j.type() == value_t::object */ - std::size_t write_bson_object(const BasicJsonType& j) + void write_bson_object(const typename BasicJsonType::object_t& value) { - assert(j.type() == value_t::object); - auto document_size_offset = oa->reserve_characters(4ul); - std::int32_t document_size = 5ul; + write_number(calc_bson_object_size(value)); - for (const auto& el : *j.m_value.object) + for (const auto& el : value) { - document_size += write_bson_object_entry(el.first, el.second); + write_bson_element(el.first, el.second); } oa->write_character(static_cast(0x00)); - write_number_at(document_size_offset, document_size); - return document_size; } /*! @@ -8760,7 +8809,7 @@ class binary_writer case value_t::discarded: break; case value_t::object: - write_bson_object(j); + write_bson_object(*j.m_value.object); break; } } @@ -8796,35 +8845,6 @@ class binary_writer oa->write_characters(vec.data(), sizeof(NumberType)); } - /* - @brief write a number to output in little endian format - - @param[in] offset The offset where to start writing - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - */ - template - void write_number_at(std::size_t offset, const NumberType n) - { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - // LCOV_EXCL_START - if (is_little_endian && !OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - // LCOV_EXCL_STOP - - oa->write_characters_at(offset, vec.data(), sizeof(NumberType)); - } - - // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -18147,9 +18167,45 @@ class basic_json /*! @brief Serializes the given JSON object `j` to BSON and returns a vector containing the corresponding BSON-representation. - @param j The JSON object to convert to BSON. - @return The BSON representation of the JSON input `j`. - @pre The input `j` shall be an object: `j.is_object() == true` + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + + @pre The input `j` is required to be an object: `j.is_object() == true` + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format */ static std::vector to_bson(const basic_json& j) { @@ -18164,6 +18220,7 @@ class basic_json @param j The JSON object to convert to BSON. @param o The output adapter that receives the binary BSON representation. @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) */ static void to_bson(const basic_json& j, detail::output_adapter o) { diff --git a/test/src/unit-alt-string.cpp b/test/src/unit-alt-string.cpp index d866ed703..356835c01 100644 --- a/test/src/unit-alt-string.cpp +++ b/test/src/unit-alt-string.cpp @@ -102,16 +102,6 @@ class alt_string str_impl.resize(n, c); } - auto begin() -> std::string::iterator - { - return str_impl.begin(); - } - - auto end() -> std::string::iterator - { - return str_impl.end(); - } - template bool operator<(const op_type& op) const { From df0f612d1b32f04a191078db75c8f3deb4655cdb Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Sun, 7 Oct 2018 20:08:05 +0200 Subject: [PATCH 21/36] BSON: allow and discard values and object entries of type `value_t::discarded` --- .../nlohmann/detail/output/binary_writer.hpp | 5 +++- single_include/nlohmann/json.hpp | 5 +++- test/src/unit-bson.cpp | 25 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index bce5116dc..0ec237eaf 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -9,7 +9,6 @@ #include #include - namespace nlohmann { namespace detail @@ -864,6 +863,8 @@ class binary_writer assert(false); return 0ul; // LCOV_EXCL_STOP + case value_t::discarded: + return 0ul; case value_t::object: return header_size + calc_bson_object_size(*j.m_value.object); case value_t::array: @@ -898,6 +899,8 @@ class binary_writer assert(false); return; // LCOV_EXCL_STOP + case value_t::discarded: + return; case value_t::object: return write_bson_object_entry(name, *j.m_value.object); case value_t::array: diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 48b17d17d..98c6b1f18 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -7853,7 +7853,6 @@ class binary_reader // #include - namespace nlohmann { namespace detail @@ -8708,6 +8707,8 @@ class binary_writer assert(false); return 0ul; // LCOV_EXCL_STOP + case value_t::discarded: + return 0ul; case value_t::object: return header_size + calc_bson_object_size(*j.m_value.object); case value_t::array: @@ -8742,6 +8743,8 @@ class binary_writer assert(false); return; // LCOV_EXCL_STOP + case value_t::discarded: + return; case value_t::object: return write_bson_object_entry(name, *j.m_value.object); case value_t::array: diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 33ef25c43..4a1b387e4 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -376,6 +376,31 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } + SECTION("discarded values are not serialized") + { + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("discarded members are not serialized") + { + json j = + { + { "entry", json::value_t::discarded } + }; + + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + } + SECTION("non-empty object with object member") { From 5bccacda3018a944756156f1b9b5930c128fb32f Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Tue, 16 Oct 2018 19:13:07 +0200 Subject: [PATCH 22/36] BSON: throw json.exception.out_of_range.407 in case a value of type `std::uint64_t` is serialized to BSON. Also, added a missing EOF-check to binary_reader. --- include/nlohmann/detail/exceptions.hpp | 2 +- .../nlohmann/detail/input/binary_reader.hpp | 5 + .../nlohmann/detail/output/binary_writer.hpp | 7 +- single_include/nlohmann/json.hpp | 14 +- test/src/unit-bson.cpp | 354 +++++++++++++++++- 5 files changed, 376 insertions(+), 6 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index 7edc00326..e23dd0d7e 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -264,7 +264,7 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | @liveexample{The following code shows how an `out_of_range` exception can be diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 2b3ff1abe..8dfe3b98a 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -256,6 +256,11 @@ class binary_reader { while (auto element_type = get()) { + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } + const std::size_t element_type_parse_position = chars_read; string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 0ec237eaf..0a7cf45fb 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -800,11 +800,16 @@ class binary_writer write_bson_entry_header(name, 0x10); // int32 write_number(static_cast(value)); } - else + else if (value <= static_cast((std::numeric_limits::max)())) { write_bson_entry_header(name, 0x12); // int64 write_number(static_cast(value)); } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + } /*! diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 98c6b1f18..4bd80c548 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -863,7 +863,7 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | @liveexample{The following code shows how an `out_of_range` exception can be @@ -6198,6 +6198,11 @@ class binary_reader { while (auto element_type = get()) { + if (JSON_UNLIKELY(not unexpect_eof())) + { + return false; + } + const std::size_t element_type_parse_position = chars_read; string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) @@ -8644,11 +8649,16 @@ class binary_writer write_bson_entry_header(name, 0x10); // int32 write_number(static_cast(value)); } - else + else if (value <= static_cast((std::numeric_limits::max)())) { write_bson_entry_header(name, 0x12); // int64 write_number(static_cast(value)); } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + } /*! diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 4a1b387e4..58ad71bd6 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -31,7 +31,6 @@ SOFTWARE. #include using nlohmann::json; - #include TEST_CASE("BSON") @@ -708,7 +707,7 @@ TEST_CASE("Incomplete BSON INPUT 3") // missing input data... }; CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at 29: unexpected end of input"); + "[json.exception.parse_error.110] parse error at 28: unexpected end of input"); CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); SaxCountdown scp(1); @@ -752,3 +751,354 @@ TEST_CASE("Unsupported BSON input") } + +TEST_CASE("BSON numerical data") +{ + SECTION("number") + { + SECTION("signed") + { + SECTION("std::int64_t: INT64_MIN .. INT32_MIN-1") + { + std::vector numbers + { + INT64_MIN, + -1000000000000000000LL, + -100000000000000000LL, + -10000000000000000LL, + -1000000000000000LL, + -100000000000000LL, + -10000000000000LL, + -1000000000000LL, + -100000000000LL, + -10000000000LL, + static_cast(INT32_MIN) - 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + + SECTION("signed std::int32_t: INT32_MIN .. INT32_MAX") + { + std::vector numbers + { + INT32_MIN, + -2147483647L, + -1000000000L, + -100000000L, + -10000000L, + -1000000L, + -100000L, + -10000L, + -1000L, + -100L, + -10L, + -1L, + 0L, + 1L, + 10L, + 100L, + 1000L, + 10000L, + 100000L, + 1000000L, + 10000000L, + 100000000L, + 1000000000L, + 2147483646L, + INT32_MAX + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("signed std::int64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + INT64_MAX, + 1000000000000000000LL, + 100000000000000000LL, + 10000000000000000LL, + 1000000000000000LL, + 100000000000000LL, + 10000000000000LL, + 1000000000000LL, + 100000000000LL, + 10000000000LL, + static_cast(INT32_MAX) + 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + } + + SECTION("unsigned") + { + SECTION("unsigned std::uint64_t: 0 .. INT32_MAX") + { + std::vector numbers + { + 0ULL, + 1ULL, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 2147483646ULL, + static_cast(INT32_MAX) + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("unsigned std::uint64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + static_cast(INT32_MAX) + 1, + 4000000000ULL, + static_cast(UINT32_MAX), + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + static_cast(INT64_MAX), + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + } + } + + SECTION("unsigned std::uint64_t: INT64_MAX+1 .. UINT64_MAX") + { + std::vector numbers + { + static_cast(INT64_MAX) + 1ULL, + 10000000000000000000ULL, + 18000000000000000000ULL, + UINT64_MAX - 1ULL, + UINT64_MAX, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.407] number overflow serializing " + std::to_string(i)); + } + } + + } + } +} From daa3ca8a2e42446a3d7fd0ae83bcffa59713b6c7 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Tue, 16 Oct 2018 19:29:42 +0200 Subject: [PATCH 23/36] BSON: Adjusted documentation of `binary_writer::to_bson()` --- include/nlohmann/json.hpp | 7 ++++++- single_include/nlohmann/json.hpp | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index 1b61e9950..c973f0a0c 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6609,6 +6609,7 @@ class basic_json number_integer | 2147483648..9223372036854775807 | int64 | 0x12 number_unsigned | 0..2147483647 | int32 | 0x10 number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- number_float | *any value* | double | 0x01 string | *any value* | string | 0x02 array | *any value* | document | 0x04 @@ -6616,8 +6617,12 @@ class basic_json @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON. - @pre The input `j` is required to be an object: `j.is_object() == true` + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. @note Any BSON output created via @ref to_bson can be successfully parsed by @ref from_bson. diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 4bd80c548..87abf2ca0 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -18195,6 +18195,7 @@ class basic_json number_integer | 2147483648..9223372036854775807 | int64 | 0x12 number_unsigned | 0..2147483647 | int32 | 0x10 number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- number_float | *any value* | double | 0x01 string | *any value* | string | 0x02 array | *any value* | document | 0x04 @@ -18202,8 +18203,12 @@ class basic_json @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON. - @pre The input `j` is required to be an object: `j.is_object() == true` + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. @note Any BSON output created via @ref to_bson can be successfully parsed by @ref from_bson. From 978c3c4116e610ef67ecc93b8bf410e462fada0e Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Tue, 16 Oct 2018 20:42:00 +0200 Subject: [PATCH 24/36] BSON: throw `json.exception.out_of_range.409` in case a key to be serialized to BSON contains a U+0000 --- include/nlohmann/detail/exceptions.hpp | 1 + include/nlohmann/detail/output/binary_writer.hpp | 5 +++++ include/nlohmann/json.hpp | 5 ++++- single_include/nlohmann/json.hpp | 11 ++++++++++- test/src/unit-bson.cpp | 10 ++++++++++ 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index e23dd0d7e..bab76b62f 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -266,6 +266,7 @@ json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch op json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 0a7cf45fb..24c080016 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -681,6 +681,11 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { + if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + { + JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); + } + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; } diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index c973f0a0c..64b32a7fd 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6617,9 +6617,12 @@ class basic_json @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. - Also, integers larger than 9223372036854775807 cannot be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) @throw type_error.317 if `!j.is_object()` @pre The input `j` is required to be an object: `j.is_object() == true`. diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 87abf2ca0..b5aaecf80 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -865,6 +865,7 @@ json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch op json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} @@ -8530,6 +8531,11 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { + if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + { + JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); + } + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; } @@ -18203,9 +18209,12 @@ class basic_json @warning The mapping is **incomplete**, since only JSON-objects (and things contained therein) can be serialized to BSON. - Also, integers larger than 9223372036854775807 cannot be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) @throw type_error.317 if `!j.is_object()` @pre The input `j` is required to be an object: `j.is_object() == true`. diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 58ad71bd6..f622ff302 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -91,6 +91,16 @@ TEST_CASE("BSON") } } + SECTION("keys containing code-point U+0000 cannot be serialized to BSON") + { + json j = + { + { std::string("en\0try", 6), true } + }; + REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000"); + } + SECTION("objects") { SECTION("empty object") From 8de10c518b28b2c2d9b79e84f49d8c5e53229407 Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Wed, 17 Oct 2018 21:47:01 +0200 Subject: [PATCH 25/36] BSON: Hopefully fixing ambiguity (on some compilers) to call to string::find() --- include/nlohmann/detail/output/binary_writer.hpp | 2 +- single_include/nlohmann/json.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 631acce2b..f659476e6 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -683,7 +683,7 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 94a0be696..e739eb5ce 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8840,7 +8840,7 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); } From 5ba812d518e49d3952b2bc46e63c081ec3ca8e1a Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Thu, 18 Oct 2018 06:38:34 +0200 Subject: [PATCH 26/36] BSON: fixed incorrect casting in unit-bson.cpp --- test/src/unit-bson.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 13dfd52c7..633dbd2e8 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -869,7 +869,7 @@ TEST_CASE("BSON numerical data") }; CHECK(j.at("entry").is_number_integer()); - std::uint64_t iu = *reinterpret_cast(&i); + std::uint32_t iu = *reinterpret_cast(&i); std::vector expected_bson = { 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) From ad11b6c35ecebb3b4a22c8f5b814c37969160f7b Mon Sep 17 00:00:00 2001 From: Julian Becker Date: Thu, 18 Oct 2018 20:05:46 +0200 Subject: [PATCH 27/36] BSON: Improved exception-related tests and report location of U+0000 in the key-string as part of `out_of_range.409`-message --- include/nlohmann/detail/exceptions.hpp | 2 +- .../nlohmann/detail/output/binary_writer.hpp | 6 ++-- single_include/nlohmann/json.hpp | 8 +++-- test/src/unit-bson.cpp | 31 +++++++++++++------ 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index 5b0420e2f..1ac2606bd 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -282,7 +282,7 @@ json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch op json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | -json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index f659476e6..bebfa9363 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -683,9 +683,11 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + const auto it = name.find(static_cast(0)); + if (it != BasicJsonType::string_t::npos) { - JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); } return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index e739eb5ce..bbcef7848 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -923,7 +923,7 @@ json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch op json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | -json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} @@ -8840,9 +8840,11 @@ class binary_writer */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { - if (name.find(static_cast(0)) != BasicJsonType::string_t::npos) + const auto it = name.find(static_cast(0)); + if (it != BasicJsonType::string_t::npos) { - JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000")); + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); } return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 633dbd2e8..3449b698e 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -48,7 +48,8 @@ TEST_CASE("BSON") SECTION("null") { json j = nullptr; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 0 cannot be serialized to requested format"); } SECTION("boolean") @@ -56,38 +57,44 @@ TEST_CASE("BSON") SECTION("true") { json j = true; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); } SECTION("false") { json j = false; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); } } SECTION("number") { json j = 42; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 5 cannot be serialized to requested format"); } SECTION("float") { json j = 4.2; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 7 cannot be serialized to requested format"); } SECTION("string") { json j = "not supported"; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 3 cannot be serialized to requested format"); } SECTION("array") { json j = std::vector {1, 2, 3, 4, 5, 6, 7}; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error); + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 2 cannot be serialized to requested format"); } } @@ -97,8 +104,8 @@ TEST_CASE("BSON") { { std::string("en\0try", 6), true } }; - REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000"); + REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)"); } SECTION("objects") @@ -678,6 +685,7 @@ TEST_CASE("Incomplete BSON INPUT") 'e', 'n', 't' // unexpected EOF }; + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); CHECK_THROWS_WITH(json::from_bson(incomplete_bson), "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); @@ -695,6 +703,7 @@ TEST_CASE("Incomplete BSON INPUT 2") 0x08, // entry: boolean, unexpected EOF }; + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); CHECK_THROWS_WITH(json::from_bson(incomplete_bson), "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); @@ -717,6 +726,8 @@ TEST_CASE("Incomplete BSON INPUT 3") 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 // missing input data... }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); CHECK_THROWS_WITH(json::from_bson(incomplete_bson), "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); @@ -734,6 +745,7 @@ TEST_CASE("Incomplete BSON INPUT 4") 0x0D, 0x00, // size (incomplete), unexpected EOF }; + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); CHECK_THROWS_WITH(json::from_bson(incomplete_bson), "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); @@ -753,6 +765,7 @@ TEST_CASE("Unsupported BSON input") 0x00 // end marker }; + CHECK_THROWS_AS(json::from_bson(bson), json::parse_error&); CHECK_THROWS_WITH(json::from_bson(bson), "[json.exception.parse_error.114] parse error at byte 5: Unsupported BSON record type 0xFF"); CHECK(json::from_bson(bson, true, false).is_discarded()); From e2c5913a5067c1868f732184944d14d747c68bbe Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 24 Oct 2018 15:43:37 +0200 Subject: [PATCH 28/36] :construction: some changes to the BSON code - added fuzz testers - added some reference files - made an exception text more clear --- Makefile | 9 ++ include/nlohmann/detail/meta/type_traits.hpp | 12 +- .../nlohmann/detail/output/binary_writer.hpp | 20 +++- single_include/nlohmann/json.hpp | 32 ++++-- test/Makefile | 6 +- test/data/json.org/1.json.bson | Bin 0 -> 393 bytes test/data/json.org/2.json.bson | Bin 0 -> 216 bytes test/data/json.org/3.json.bson | Bin 0 -> 406 bytes test/data/json.org/4.json.bson | Bin 0 -> 2786 bytes test/data/json.org/5.json.bson | Bin 0 -> 730 bytes test/data/json_tests/pass3.json.bson | Bin 0 -> 123 bytes test/src/fuzzer-parse_bson.cpp | 68 ++++++++++++ test/src/unit-bson.cpp | 103 ++++++++++++++++-- 13 files changed, 218 insertions(+), 32 deletions(-) create mode 100644 test/data/json.org/1.json.bson create mode 100644 test/data/json.org/2.json.bson create mode 100644 test/data/json.org/3.json.bson create mode 100644 test/data/json.org/4.json.bson create mode 100644 test/data/json.org/5.json.bson create mode 100644 test/data/json_tests/pass3.json.bson create mode 100644 test/src/fuzzer-parse_bson.cpp diff --git a/Makefile b/Makefile index 135db65a8..b67841510 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,7 @@ all: @echo "cppcheck - analyze code with cppcheck" @echo "doctest - compile example files and check their output" @echo "fuzz_testing - prepare fuzz testing of the JSON parser" + @echo "fuzz_testing_bson - prepare fuzz testing of the BSON parser" @echo "fuzz_testing_cbor - prepare fuzz testing of the CBOR parser" @echo "fuzz_testing_msgpack - prepare fuzz testing of the MessagePack parser" @echo "fuzz_testing_ubjson - prepare fuzz testing of the UBJSON parser" @@ -220,6 +221,14 @@ fuzz_testing: find test/data/json_tests -size -5k -name *json | xargs -I{} cp "{}" fuzz-testing/testcases @echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer" +fuzz_testing_bson: + rm -fr fuzz-testing + mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out + $(MAKE) parse_bson_fuzzer -C test CXX=afl-clang++ + mv test/parse_bson_fuzzer fuzz-testing/fuzzer + find test/data -size -5k -name *.bson | xargs -I{} cp "{}" fuzz-testing/testcases + @echo "Execute: afl-fuzz -i fuzz-testing/testcases -o fuzz-testing/out fuzz-testing/fuzzer" + fuzz_testing_cbor: rm -fr fuzz-testing mkdir -p fuzz-testing fuzz-testing/testcases fuzz-testing/out diff --git a/include/nlohmann/detail/meta/type_traits.hpp b/include/nlohmann/detail/meta/type_traits.hpp index efe878f62..4c4c4d3dd 100644 --- a/include/nlohmann/detail/meta/type_traits.hpp +++ b/include/nlohmann/detail/meta/type_traits.hpp @@ -193,13 +193,13 @@ struct is_constructible_object_type_impl < static constexpr bool value = std::is_constructible::value and - std::is_same::value or - (has_from_json::value or - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); + (has_from_json::value or + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value)); }; template diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index bebfa9363..7f9f0be37 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -976,14 +976,22 @@ class binary_writer { switch (j.type()) { - default: - JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); - break; - case value_t::discarded: - break; case value_t::object: + { write_bson_object(*j.m_value.object); break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } } } @@ -1009,7 +1017,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian && !OutputIsLittleEndian) + if (is_little_endian and not OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 29f0cd0db..0664cc756 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -553,13 +553,13 @@ struct is_constructible_object_type_impl < static constexpr bool value = std::is_constructible::value and - std::is_same::value or - (has_from_json::value or - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); + (has_from_json::value or + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value)); }; template @@ -9234,14 +9234,22 @@ class binary_writer { switch (j.type()) { - default: - JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); - break; - case value_t::discarded: - break; case value_t::object: + { write_bson_object(*j.m_value.object); break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } } } @@ -9267,7 +9275,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian && !OutputIsLittleEndian) + if (is_little_endian and not OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); diff --git a/test/Makefile b/test/Makefile index afbb1ba5c..4f00cbc7a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -10,6 +10,7 @@ SOURCES = src/unit.cpp \ src/unit-algorithms.cpp \ src/unit-allocator.cpp \ src/unit-alt-string.cpp \ + src/unit-bson.cpp \ src/unit-capacity.cpp \ src/unit-cbor.cpp \ src/unit-class_const_iterator.cpp \ @@ -90,12 +91,15 @@ check: $(OBJECTS) $(TESTCASES) ############################################################################## FUZZER_ENGINE = src/fuzzer-driver_afl.cpp -FUZZERS = parse_afl_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer +FUZZERS = parse_afl_fuzzer parse_bson_fuzzer parse_cbor_fuzzer parse_msgpack_fuzzer parse_ubjson_fuzzer fuzzers: $(FUZZERS) parse_afl_fuzzer: $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_json.cpp -o $@ +parse_bson_fuzzer: + $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_bson.cpp -o $@ + parse_cbor_fuzzer: $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(FUZZER_ENGINE) src/fuzzer-parse_cbor.cpp -o $@ diff --git a/test/data/json.org/1.json.bson b/test/data/json.org/1.json.bson new file mode 100644 index 0000000000000000000000000000000000000000..e14e9b92304d8a37a803af2e6c870496679aefd6 GIT binary patch literal 393 zcmZ9IF;BxV5QPs@#8!!s?HJeqi4Ksm1qDTsQgsLe>oqw-R40yXr=ne$`PW>UHkDYc z``)wP)A{!d;JnnvIkH>W^%VCMRU252lvd8eY{a+5%jFwk6|Pp6H!uZ&BwJz-JkMVq z=fRUWxi!tUh6}N>c#$51`4fIbn(S*b=1@BW*PGk0NsuC6dCjtpQmu!qnj6S*&dD!MMN!TM69v*R`#?sxft=33008zvFbx0z literal 0 HcmV?d00001 diff --git a/test/data/json.org/3.json.bson b/test/data/json.org/3.json.bson new file mode 100644 index 0000000000000000000000000000000000000000..deb7c5391fc76590baf686212ee5a4407c404dc6 GIT binary patch literal 406 zcmZuuJx{|h5Ir0L14v|N@xTBb+H^#Si3NlZLsh|mgq--|SU7eRUnu1t@CW!8{5vKl z&Mgukb$jpb-Mg>%Er4ucwLyY6#zJE`7{~x1c*tz448j95p`10oMNhEHv|@lgZANkMQxLJQ;DFgxGKrJf;K!XekU#&afsF8l;3Ji9>>#uv1$&&oaJG7q zm)PT`wmYmyR@^uEn=nM2cQE2$nq?e1Rasifb9wxaPvo>HPUJ|Q7H5!0`-*UsMv1g^ z>s89&^zX=;lj&YwhXvd-&KtC*A^1$UTkXHk(Nm7JNwUDjBQ_uNr%tp>@OVdSYa-eaP03l zp`ggjNQYz9rJi5PI-L6Q!(bLJrh$S?2P@uHYi6<8^QkAq#7Hxe{FuI`?>wGHHr%GGP z&2pC~namF}E){zh>I<9wG}jY--Hmm&W-N2-xB?2P^8?4;hr{ViDxL^6pGp4p&gsFL z&s<*#Ba$aehPtz!p7QJew>btn_5ti7nc>aYC?@-EDyM5cK;mS4%H52$$pM~DMw=9AQNYnF)i3m{U76}UKexDeAT#NhuUDz;8+j=V>mU$n)6HaT zwAEABeVI(+u6?Db?S$!$o)1R;Y2{FbKSmSwaE0}KQ6U`Y^hhefzM?!9)>UiA4M*eMN8deuyw^wW6K)0J-cL0;q$=GxijEm)%yLM@lrNN{S{5V0 zK7@`NbV?=b&Kz2MWjv79mB!T2swN1QFYWxbAz{N>_`5wES+z5MjT#I@o}zXC-NHy> zJfuO%Fjb*hKLw6Pvz}EJtYrl^@$4{9WiA@PK7!j+n^8?~Lke}521L5)rVAOYyPbg* zob6Bic)mD%(m6gkii{U_lL>3N6+aC$=)i2Ev{sD7gkU+1)AKV&xbQicakMOk3xNjt-A1S&?_C1ACWO1UR1~ zM%4`j$Mf}OKyMd-0;dDgh6L9m9?%+iO+;@)Np6VPEIeVPx4k*wJAgQ?X%X?8R zq66A~H9a+YF+c9JJ8-%?5qNo?TiKSURnVVr7EcJog&O3ewp7?X8ywrJfQW|@-u;c?U}2*kb<Nx)@}vLm*`L#Hy)Tn};vRA>MkKgD zBy_j9mvb0G6vuKGJ3?PbZM4?VJBVLv=nV5!CcU|3%C7c>f%~=%GZ+~_SKzE|f)cJ?2`17T^*ofA z&D_{kwnXtke-v@w)v}aRy@tfEtu0;i1-BE$+Y(=FJ7>VS@29pAhGx!HifP=rst5_; zi2r`SkIPpo>nTph)~(vcw4pUi+Q~Jjr@pAz0_>j!;MyWy2~K34p*9T9i@_vYEXB|| zQXr$!YwZIiduiRy-Z>Z_C^h*Ki+8;2+21Wu0I literal 0 HcmV?d00001 diff --git a/test/data/json_tests/pass3.json.bson b/test/data/json_tests/pass3.json.bson new file mode 100644 index 0000000000000000000000000000000000000000..a9c07cf31a86c952d6916dc358cb3f545c8b3473 GIT binary patch literal 123 zcmYL>F%E-35Cd1_1@tNW0-C%6iV!KZDHgO6Ah?U}_K;9MA9s}YjQtalTlemt7%V$p z>TW6Et2sa9Ls+!|J}((gE;XCh!KeXoTy643s$n+7!r>nIap+b|?lkwpINIeRCTL1} GHl%-k10(+c literal 0 HcmV?d00001 diff --git a/test/src/fuzzer-parse_bson.cpp b/test/src/fuzzer-parse_bson.cpp new file mode 100644 index 000000000..4ba20dffd --- /dev/null +++ b/test/src/fuzzer-parse_bson.cpp @@ -0,0 +1,68 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (fuzz test support) +| | |__ | | | | | | version 3.3.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +This file implements a parser test suitable for fuzz testing. Given a byte +array data, it performs the following steps: + +- j1 = from_bson(data) +- vec = to_bson(j1) +- j2 = from_bson(vec) +- assert(j1 == j2) + +The provided function `LLVMFuzzerTestOneInput` can be used in different fuzzer +drivers. + +Licensed under the MIT License . +*/ + +#include +#include +#include + +using json = nlohmann::json; + +// see http://llvm.org/docs/LibFuzzer.html +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) +{ + try + { + // step 1: parse input + std::vector vec1(data, data + size); + json j1 = json::from_bson(vec1); + + try + { + // step 2: round trip + std::vector vec2 = json::to_bson(j1); + + // parse serialization + json j2 = json::from_bson(vec2); + + // serializations must match + assert(json::to_bson(j2) == vec2); + } + catch (const json::parse_error&) + { + // parsing a CBOR serialization must not fail + assert(false); + } + } + catch (const json::parse_error&) + { + // parse errors are ok, because input may be random bytes + } + catch (const json::type_error&) + { + // type errors can occur during parsing, too + } + catch (const json::out_of_range&) + { + // out of range errors can occur during parsing, too + } + + // return 0 - non-zero return values are reserved for future use + return 0; +} diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 3449b698e..ef94a807b 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -49,7 +49,7 @@ TEST_CASE("BSON") { json j = nullptr; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 0 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null"); } SECTION("boolean") @@ -58,14 +58,14 @@ TEST_CASE("BSON") { json j = true; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } SECTION("false") { json j = false; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } } @@ -73,28 +73,28 @@ TEST_CASE("BSON") { json j = 42; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 5 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("float") { json j = 4.2; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 7 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("string") { json j = "not supported"; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 3 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); } SECTION("array") { json j = std::vector {1, 2, 3, 4, 5, 6, 7}; REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 2 cannot be serialized to requested format"); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); } } @@ -1126,3 +1126,92 @@ TEST_CASE("BSON numerical data") } } } + +TEST_CASE("BSON roundtrips", "[hide]") +{ + SECTION("reference files") + { + for (std::string filename : + { + "test/data/json.org/1.json", + "test/data/json.org/2.json", + "test/data/json.org/3.json", + "test/data/json.org/4.json", + "test/data/json.org/5.json" + }) + { + CAPTURE(filename); + + SECTION(filename + ": std::vector") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + json j2; + CHECK_NOTHROW(j2 = json::from_bson(packed)); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": std::ifstream") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + json j2; + CHECK_NOTHROW(j2 = json::from_bson(f_bson)); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": uint8_t* and size") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + json j2; + CHECK_NOTHROW(j2 = json::from_bson({packed.data(), packed.size()})); + + // compare parsed JSON values + CHECK(j1 == j2); + } + + SECTION(filename + ": output to output adapters") + { + // parse JSON file + std::ifstream f_json(filename); + json j1 = json::parse(f_json); + + // parse BSON file + std::ifstream f_bson(filename + ".bson", std::ios::binary); + std::vector packed( + (std::istreambuf_iterator(f_bson)), + std::istreambuf_iterator()); + + SECTION(filename + ": output adapters: std::vector") + { + std::vector vec; + json::to_bson(j1, vec); + CHECK(vec == packed); + } + } + } + } +} From 4d1eaace8cc1166f82b867f61650f25f32018919 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 24 Oct 2018 18:55:08 +0200 Subject: [PATCH 29/36] :hammer: fixed fuzz code to avoid false positives in case of discarded values --- test/src/fuzzer-parse_bson.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/src/fuzzer-parse_bson.cpp b/test/src/fuzzer-parse_bson.cpp index 4ba20dffd..1d9337678 100644 --- a/test/src/fuzzer-parse_bson.cpp +++ b/test/src/fuzzer-parse_bson.cpp @@ -33,6 +33,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) std::vector vec1(data, data + size); json j1 = json::from_bson(vec1); + if (j1.is_discarded()) + { + return 0; + } + try { // step 2: round trip From 1968e5c7939ad63262de6649a7ee8c6f50aecaff Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Wed, 24 Oct 2018 23:39:30 +0200 Subject: [PATCH 30/36] :art: clean up binary formats --- .../nlohmann/detail/input/binary_reader.hpp | 790 ++++++------ .../nlohmann/detail/output/binary_writer.hpp | 312 +++-- single_include/nlohmann/json.hpp | 1102 +++++++++-------- test/src/unit-bson.cpp | 184 +-- test/src/unit-regression.cpp | 8 +- 5 files changed, 1274 insertions(+), 1122 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index c213d8558..615ab73cf 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -125,14 +125,38 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } /*! @brief Parses a C-style string from the BSON input. - @param [out] result A reference to the string variable where the read string - is to be stored. - @return `true` if the \x00-byte indicating the end of the - string was encountered before the EOF. - `false` indicates an unexpected EOF. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -155,107 +179,107 @@ class binary_reader } /*! - @brief Parses a zero-terminated string of length @a len from the BSON input. - @param [in] len The length (including the zero-byte at the end) of the string to be read. - @param [out] result A reference to the string variable where the read string - is to be stored. + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. @tparam NumberType The type of the length @a len @pre len > 0 @return `true` if the string was successfully parsed */ - template + template bool get_bson_string(const NumberType len, string_t& result) { - return get_string(input_format_t::bson, len - static_cast(1), result) - && get() != std::char_traits::eof(); - } - - /*! - @return A hexadecimal string representation of the given @a byte - @param byte The byte to convert to a string - */ - static std::string byte_hexstring(unsigned char byte) - { - char cr[3]; - snprintf(cr, sizeof(cr), "%02hhX", byte); - return std::string{cr}; + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } /*! @brief Read a BSON document element of the given @a element_type. - @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html - @param element_type_parse_position The position in the input stream, where the `element_type` was read. - @warning Not all BSON element types are supported yet. An unsupported @a element_type will - give rise to a parse_error.114: Unsupported BSON record type 0x... + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) { switch (element_type) { case 0x01: // double { double number; - return get_number(input_format_t::bson, number) - && sax->number_float(static_cast(number), ""); + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); } + case 0x02: // string { std::int32_t len; string_t value; - return get_number(input_format_t::bson, len) - && get_bson_string(len, value) - && sax->string(value); - } - case 0x08: // boolean - { - return sax->boolean(static_cast(get())); - } - case 0x10: // int32 - { - std::int32_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x12: // int64 - { - std::int64_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x0A: // null - { - return sax->null(); + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); } + case 0x03: // object { return parse_bson_internal(); } + case 0x04: // array { return parse_bson_array(); } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + default: // anything else not supported (yet) { - auto element_type_str = byte_hexstring(element_type); - return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); } } } /*! - @brief Read a BSON element list (as specified in the BSON-spec) from the input - and passes it to the SAX-parser. - The same binary layout is used for objects and arrays, hence it must - be indicated with the argument @a is_array which one is expected - (true --> array, false --> object). - @param is_array Determines if the element list being read is to be treated as - an object (@a is_array == false), or as an array (@a is_array == true). + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_list(bool is_array) + bool parse_bson_element_list(const bool is_array) { - while (auto element_type = get()) + string_t key; + while (int element_type = get()) { if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) { @@ -263,13 +287,12 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; - string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } - if (!is_array) + if (not is_array) { sax->key(key); } @@ -278,7 +301,11 @@ class binary_reader { return false; } + + // get_bson_cstr only appends + key.clear(); } + return true; } @@ -291,7 +318,7 @@ class binary_reader std::int32_t documentSize; get_number(input_format_t::bson, documentSize); - if (JSON_UNLIKELY(not sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) { return false; } @@ -304,27 +331,9 @@ class binary_reader return sax->end_array(); } - /*! - @brief Reads in a BSON-object and pass it to the SAX-parser. - @return whether a valid BSON-value was passed to the SAX parser - */ - bool parse_bson_internal() - { - std::int32_t documentSize; - get_number(input_format_t::bson, documentSize); - - if (JSON_UNLIKELY(not sax->start_object(-1))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) - { - return false; - } - - return sax->end_object(); - } + ////////// + // CBOR // + ////////// /*! @param[in] get_char whether a new character should be retrieved from the @@ -664,6 +673,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -1026,301 +1220,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian && !InputIsLittleEndian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -1455,6 +1354,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -1869,6 +1784,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -1894,7 +1916,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -1934,6 +1955,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 7f9f0be37..64a02aac8 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -35,7 +35,34 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -279,7 +306,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -678,13 +705,19 @@ class binary_writer } } + private: + ////////// + // BSON // + ////////// + /*! - @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { const auto it = name.find(static_cast(0)); - if (it != BasicJsonType::string_t::npos) + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); @@ -696,7 +729,8 @@ class binary_writer /*! @brief Writes the given @a element_type and @a name to the output adapter */ - void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) { oa->write_character(static_cast(element_type)); // boolean oa->write_characters( @@ -707,7 +741,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and boolean value @a value */ - void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) { write_bson_entry_header(name, 0x08); oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); @@ -716,7 +751,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and double value @a value */ - void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) { write_bson_entry_header(name, 0x01); write_number(value); @@ -733,7 +769,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and string value @a value */ - void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) { write_bson_entry_header(name, 0x02); @@ -769,7 +806,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and integer @a value */ - void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) { if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { @@ -783,7 +821,6 @@ class binary_writer } } - /*! @return The size of the BSON-encoded unsigned integer in @a j */ @@ -802,7 +839,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and unsigned @a value */ - void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) { if (value <= static_cast((std::numeric_limits::max)())) { @@ -824,13 +862,13 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and object @a value */ - void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) { write_bson_entry_header(name, 0x03); // object write_bson_object(value); } - /*! @return The size of the BSON-encoded array @a value */ @@ -849,10 +887,11 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and array @a value */ - void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) { write_bson_entry_header(name, 0x04); // array - write_number(calc_bson_array_size(value)); + write_number(static_cast(calc_bson_array_size(value))); for (const auto& el : value) { @@ -862,75 +901,95 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. */ - static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { const auto header_size = calc_bson_entry_header_size(name); switch (j.type()) { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + // LCOV_EXCL_START default: assert(false); return 0ul; - // LCOV_EXCL_STOP - case value_t::discarded: - return 0ul; - case value_t::object: - return header_size + calc_bson_object_size(*j.m_value.object); - case value_t::array: - return header_size + calc_bson_array_size(*j.m_value.array); - case value_t::boolean: - return header_size + 1ul; - case value_t::number_float: - return header_size + 8ul; - case value_t::number_integer: - return header_size + calc_bson_integer_size(j.m_value.number_integer); - case value_t::number_unsigned: - return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); - case value_t::string: - return header_size + calc_bson_string_size(*j.m_value.string); - case value_t::null: - return header_size + 0ul; + // LCOV_EXCL_STOP }; } - /*! - @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. - @param name The name to associate with the JSON entity @a j within the current BSON document - @return The size of the bson entry + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry */ - void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { switch (j.type()) { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + // LCOV_EXCL_START default: assert(false); return; - // LCOV_EXCL_STOP - case value_t::discarded: - return; - case value_t::object: - return write_bson_object_entry(name, *j.m_value.object); - case value_t::array: - return write_bson_array(name, *j.m_value.array); - case value_t::boolean: - return write_bson_boolean(name, j.m_value.boolean); - case value_t::number_float: - return write_bson_double(name, j.m_value.number_float); - case value_t::number_integer: - return write_bson_integer(name, j.m_value.number_integer); - case value_t::number_unsigned: - return write_bson_unsigned(name, j.m_value.number_unsigned); - case value_t::string: - return write_bson_string(name, *j.m_value.string); - case value_t::null: - return write_bson_null(name); + // LCOV_EXCL_STOP }; } @@ -958,7 +1017,7 @@ class binary_writer */ void write_bson_object(const typename BasicJsonType::object_t& value) { - write_number(calc_bson_object_size(value)); + write_number(static_cast(calc_bson_object_size(value))); for (const auto& el : value) { @@ -968,64 +1027,38 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! - @param[in] j JSON value to serialize - @pre j.type() == value_t::object - */ - void write_bson(const BasicJsonType& j) + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) { - switch (j.type()) - { - case value_t::object: - { - write_bson_object(*j.m_value.object); - break; - } - - case value_t::discarded: - { - break; - } - - default: - { - JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; - } - } + return static_cast(0xFA); // Single-Precision Float } - - private: - /* - @brief write a number to output input - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. - */ - template - void write_number(const NumberType n) + static constexpr CharType get_cbor_float_prefix(double /*unused*/) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian and not OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); + return static_cast(0xFB); // Double-Precision Float } + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -1226,26 +1259,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -1256,6 +1269,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 0664cc756..cdd4e680e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6426,14 +6426,38 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } /*! @brief Parses a C-style string from the BSON input. - @param [out] result A reference to the string variable where the read string - is to be stored. - @return `true` if the \x00-byte indicating the end of the - string was encountered before the EOF. - `false` indicates an unexpected EOF. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. */ bool get_bson_cstr(string_t& result) { @@ -6456,107 +6480,107 @@ class binary_reader } /*! - @brief Parses a zero-terminated string of length @a len from the BSON input. - @param [in] len The length (including the zero-byte at the end) of the string to be read. - @param [out] result A reference to the string variable where the read string - is to be stored. + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. @tparam NumberType The type of the length @a len @pre len > 0 @return `true` if the string was successfully parsed */ - template + template bool get_bson_string(const NumberType len, string_t& result) { - return get_string(input_format_t::bson, len - static_cast(1), result) - && get() != std::char_traits::eof(); - } - - /*! - @return A hexadecimal string representation of the given @a byte - @param byte The byte to convert to a string - */ - static std::string byte_hexstring(unsigned char byte) - { - char cr[3]; - snprintf(cr, sizeof(cr), "%02hhX", byte); - return std::string{cr}; + return get_string(input_format_t::bson, len - static_cast(1), result) and get() != std::char_traits::eof(); } /*! @brief Read a BSON document element of the given @a element_type. - @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html - @param element_type_parse_position The position in the input stream, where the `element_type` was read. - @warning Not all BSON element types are supported yet. An unsupported @a element_type will - give rise to a parse_error.114: Unsupported BSON record type 0x... + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) { switch (element_type) { case 0x01: // double { double number; - return get_number(input_format_t::bson, number) - && sax->number_float(static_cast(number), ""); + return get_number(input_format_t::bson, number) and sax->number_float(static_cast(number), ""); } + case 0x02: // string { std::int32_t len; string_t value; - return get_number(input_format_t::bson, len) - && get_bson_string(len, value) - && sax->string(value); - } - case 0x08: // boolean - { - return sax->boolean(static_cast(get())); - } - case 0x10: // int32 - { - std::int32_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x12: // int64 - { - std::int64_t value; - return get_number(input_format_t::bson, value) - && sax->number_integer(static_cast(value)); - } - case 0x0A: // null - { - return sax->null(); + return get_number(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); } + case 0x03: // object { return parse_bson_internal(); } + case 0x04: // array { return parse_bson_array(); } + + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + } + default: // anything else not supported (yet) { - auto element_type_str = byte_hexstring(element_type); - return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + char cr[3]; + snprintf(cr, sizeof(cr), "%.2hhX", static_cast(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); } } } /*! - @brief Read a BSON element list (as specified in the BSON-spec) from the input - and passes it to the SAX-parser. - The same binary layout is used for objects and arrays, hence it must - be indicated with the argument @a is_array which one is expected - (true --> array, false --> object). - @param is_array Determines if the element list being read is to be treated as - an object (@a is_array == false), or as an array (@a is_array == true). + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). @return whether a valid BSON-object/array was passed to the SAX parser */ - bool parse_bson_element_list(bool is_array) + bool parse_bson_element_list(const bool is_array) { - while (auto element_type = get()) + string_t key; + while (int element_type = get()) { if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) { @@ -6564,13 +6588,12 @@ class binary_reader } const std::size_t element_type_parse_position = chars_read; - string_t key; if (JSON_UNLIKELY(not get_bson_cstr(key))) { return false; } - if (!is_array) + if (not is_array) { sax->key(key); } @@ -6579,7 +6602,11 @@ class binary_reader { return false; } + + // get_bson_cstr only appends + key.clear(); } + return true; } @@ -6592,7 +6619,7 @@ class binary_reader std::int32_t documentSize; get_number(input_format_t::bson, documentSize); - if (JSON_UNLIKELY(not sax->start_array(-1))) + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) { return false; } @@ -6605,27 +6632,9 @@ class binary_reader return sax->end_array(); } - /*! - @brief Reads in a BSON-object and pass it to the SAX-parser. - @return whether a valid BSON-value was passed to the SAX parser - */ - bool parse_bson_internal() - { - std::int32_t documentSize; - get_number(input_format_t::bson, documentSize); - - if (JSON_UNLIKELY(not sax->start_object(-1))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) - { - return false; - } - - return sax->end_object(); - } + ////////// + // CBOR // + ////////// /*! @param[in] get_char whether a new character should be retrieved from the @@ -6965,6 +6974,191 @@ class binary_reader } } + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + /*! @return whether a valid MessagePack value was passed to the SAX parser */ @@ -7327,301 +7521,6 @@ class binary_reader } } - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian && !InputIsLittleEndian) - { - vec[sizeof(NumberType) - i - 1] = static_cast(current); - } - else - { - vec[i] = static_cast(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template - bool get_string(const input_format_t format, const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - /*! @brief reads a MessagePack string @@ -7756,6 +7655,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -8170,6 +8085,113 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast(current); + } + else + { + vec[i] = static_cast(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast(current); + }); + return success; + } + /*! @param[in] format the current format (for diagnostics) @param[in] context further context information (for diagnostics) @@ -8195,7 +8217,6 @@ class binary_reader return std::string{cr}; } - private: /*! @param[in] format the current format @param[in] detail a detailed error message @@ -8235,6 +8256,7 @@ class binary_reader return error_msg + " " + context + ": " + detail; } + private: /// input adapter input_adapter_t ia = nullptr; @@ -8293,7 +8315,34 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + case value_t::discarded: + { + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + break; + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -8537,7 +8586,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -8936,13 +8985,19 @@ class binary_writer } } + private: + ////////// + // BSON // + ////////// + /*! - @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). */ static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) { const auto it = name.find(static_cast(0)); - if (it != BasicJsonType::string_t::npos) + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) { JSON_THROW(out_of_range::create(409, "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); @@ -8954,7 +9009,8 @@ class binary_writer /*! @brief Writes the given @a element_type and @a name to the output adapter */ - void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + void write_bson_entry_header(const typename BasicJsonType::string_t& name, + std::uint8_t element_type) { oa->write_character(static_cast(element_type)); // boolean oa->write_characters( @@ -8965,7 +9021,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and boolean value @a value */ - void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + void write_bson_boolean(const typename BasicJsonType::string_t& name, + const bool value) { write_bson_entry_header(name, 0x08); oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); @@ -8974,7 +9031,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and double value @a value */ - void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + void write_bson_double(const typename BasicJsonType::string_t& name, + const double value) { write_bson_entry_header(name, 0x01); write_number(value); @@ -8991,7 +9049,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and string value @a value */ - void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + void write_bson_string(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::string_t& value) { write_bson_entry_header(name, 0x02); @@ -9027,7 +9086,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and integer @a value */ - void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + void write_bson_integer(const typename BasicJsonType::string_t& name, + const std::int64_t value) { if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { @@ -9041,7 +9101,6 @@ class binary_writer } } - /*! @return The size of the BSON-encoded unsigned integer in @a j */ @@ -9060,7 +9119,8 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and unsigned @a value */ - void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + void write_bson_unsigned(const typename BasicJsonType::string_t& name, + const std::uint64_t value) { if (value <= static_cast((std::numeric_limits::max)())) { @@ -9082,13 +9142,13 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and object @a value */ - void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + void write_bson_object_entry(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::object_t& value) { write_bson_entry_header(name, 0x03); // object write_bson_object(value); } - /*! @return The size of the BSON-encoded array @a value */ @@ -9107,10 +9167,11 @@ class binary_writer /*! @brief Writes a BSON element with key @a name and array @a value */ - void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + void write_bson_array(const typename BasicJsonType::string_t& name, + const typename BasicJsonType::array_t& value) { write_bson_entry_header(name, 0x04); // array - write_number(calc_bson_array_size(value)); + write_number(static_cast(calc_bson_array_size(value))); for (const auto& el : value) { @@ -9120,75 +9181,95 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! @brief Calculates the size necessary to serialize the JSON value @a j with its @a name @return The calculated size for the BSON document entry for @a j with the given @a name. */ - static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { const auto header_size = calc_bson_entry_header_size(name); switch (j.type()) { + case value_t::discarded: + return 0ul; + + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + // LCOV_EXCL_START default: assert(false); return 0ul; - // LCOV_EXCL_STOP - case value_t::discarded: - return 0ul; - case value_t::object: - return header_size + calc_bson_object_size(*j.m_value.object); - case value_t::array: - return header_size + calc_bson_array_size(*j.m_value.array); - case value_t::boolean: - return header_size + 1ul; - case value_t::number_float: - return header_size + 8ul; - case value_t::number_integer: - return header_size + calc_bson_integer_size(j.m_value.number_integer); - case value_t::number_unsigned: - return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); - case value_t::string: - return header_size + calc_bson_string_size(*j.m_value.string); - case value_t::null: - return header_size + 0ul; + // LCOV_EXCL_STOP }; } - /*! - @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. - @param name The name to associate with the JSON entity @a j within the current BSON document - @return The size of the bson entry + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry */ - void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + void write_bson_element(const typename BasicJsonType::string_t& name, + const BasicJsonType& j) { switch (j.type()) { + case value_t::discarded: + return; + + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + // LCOV_EXCL_START default: assert(false); return; - // LCOV_EXCL_STOP - case value_t::discarded: - return; - case value_t::object: - return write_bson_object_entry(name, *j.m_value.object); - case value_t::array: - return write_bson_array(name, *j.m_value.array); - case value_t::boolean: - return write_bson_boolean(name, j.m_value.boolean); - case value_t::number_float: - return write_bson_double(name, j.m_value.number_float); - case value_t::number_integer: - return write_bson_integer(name, j.m_value.number_integer); - case value_t::number_unsigned: - return write_bson_unsigned(name, j.m_value.number_unsigned); - case value_t::string: - return write_bson_string(name, *j.m_value.string); - case value_t::null: - return write_bson_null(name); + // LCOV_EXCL_STOP }; } @@ -9216,7 +9297,7 @@ class binary_writer */ void write_bson_object(const typename BasicJsonType::object_t& value) { - write_number(calc_bson_object_size(value)); + write_number(static_cast(calc_bson_object_size(value))); for (const auto& el : value) { @@ -9226,64 +9307,38 @@ class binary_writer oa->write_character(static_cast(0x00)); } - /*! - @param[in] j JSON value to serialize - @pre j.type() == value_t::object - */ - void write_bson(const BasicJsonType& j) + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) { - switch (j.type()) - { - case value_t::object: - { - write_bson_object(*j.m_value.object); - break; - } - - case value_t::discarded: - { - break; - } - - default: - { - JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; - } - } + return static_cast(0xFA); // Single-Precision Float } - - private: - /* - @brief write a number to output input - - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number - @tparam OutputIsLittleEndian Set to true if output data is - required to be little endian - - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. - */ - template - void write_number(const NumberType n) + static constexpr CharType get_cbor_float_prefix(double /*unused*/) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); - - // step 2: write array to output (with possible reordering) - if (is_little_endian and not OutputIsLittleEndian) - { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); - } - - oa->write_characters(vec.data(), sizeof(NumberType)); + return static_cast(0xFB); // Double-Precision Float } + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return static_cast(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return static_cast(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -9484,26 +9539,6 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float /*unused*/) - { - return static_cast(0xFA); // Single-Precision Float - } - - static constexpr CharType get_cbor_float_prefix(double /*unused*/) - { - return static_cast(0xFB); // Double-Precision Float - } - - static constexpr CharType get_msgpack_float_prefix(float /*unused*/) - { - return static_cast(0xCA); // float 32 - } - - static constexpr CharType get_msgpack_float_prefix(double /*unused*/) - { - return static_cast(0xCB); // float 64 - } - static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { return 'd'; // float 32 @@ -9514,6 +9549,39 @@ class binary_writer return 'D'; // float 64 } + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + private: /// whether we can assume little endianess const bool is_little_endian = binary_reader::little_endianess(); diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index ef94a807b..711e8a64a 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -48,7 +48,7 @@ TEST_CASE("BSON") SECTION("null") { json j = nullptr; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is null"); } @@ -57,14 +57,14 @@ TEST_CASE("BSON") SECTION("true") { json j = true; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } SECTION("false") { json j = false; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is boolean"); } } @@ -72,29 +72,29 @@ TEST_CASE("BSON") SECTION("number") { json j = 42; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("float") { json j = 4.2; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is number"); } SECTION("string") { json j = "not supported"; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is string"); } SECTION("array") { json j = std::vector {1, 2, 3, 4, 5, 6, 7}; - REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); - CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); + CHECK_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] to serialize to BSON, top-level type must be object, but is array"); } } @@ -104,7 +104,7 @@ TEST_CASE("BSON") { { std::string("en\0try", 6), true } }; - REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)"); } @@ -541,6 +541,30 @@ TEST_CASE("BSON") CHECK(json::from_bson(result, true, false) == j); } } + + SECTION("Examples from http://bsonspec.org/faq.html") + { + SECTION("Example 1") + { + std::vector input = {0x16, 0x00, 0x00, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x06, 0x00, 0x00, 0x00, 'w', 'o', 'r', 'l', 'd', 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"hello", "world"}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + CHECK(dumped == input); + } + + SECTION("Example 2") + { + std::vector input = {0x31, 0x00, 0x00, 0x00, 0x04, 'B', 'S', 'O', 'N', 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x30, 0x00, 0x08, 0x00, 0x00, 0x00, 'a', 'w', 'e', 's', 'o', 'm', 'e', 0x00, 0x01, 0x31, 0x00, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x14, 0x40, 0x10, 0x32, 0x00, 0xc2, 0x07, 0x00, 0x00, 0x00, 0x00}; + json parsed = json::from_bson(input); + json expected = {{"BSON", {"awesome", 5.05, 1986}}}; + CHECK(parsed == expected); + auto dumped = json::to_bson(parsed); + //CHECK(dumped == input); // see https://github.com/nlohmann/json/pull/1254#issuecomment-432831216 + CHECK(json::from_bson(dumped) == expected); + } + } } TEST_CASE("BSON input/output_adapters") @@ -601,10 +625,6 @@ TEST_CASE("BSON input/output_adapters") } } - - - - class SaxCountdown { public: @@ -675,86 +695,84 @@ class SaxCountdown int events_left = 0; }; - -TEST_CASE("Incomplete BSON INPUT") +TEST_CASE("Incomplete BSON Input") { - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 1") { - 0x0D, 0x00, 0x00, 0x00, // size (little endian) - 0x08, // entry: boolean - 'e', 'n', 't' // unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't' // unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } -TEST_CASE("Incomplete BSON INPUT 2") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 2") { - 0x0D, 0x00, 0x00, 0x00, // size (little endian) - 0x08, // entry: boolean, unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean, unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } - -TEST_CASE("Incomplete BSON INPUT 3") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 3") { - 0x41, 0x00, 0x00, 0x00, // size (little endian) - 0x04, /// entry: embedded document - 'e', 'n', 't', 'r', 'y', '\x00', + std::vector incomplete_bson = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', - 0x35, 0x00, 0x00, 0x00, // size (little endian) - 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 - // missing input data... - }; + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 + // missing input data... + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(1); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); -} + SaxCountdown scp(1); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } - - -TEST_CASE("Incomplete BSON INPUT 4") -{ - std::vector incomplete_bson = + SECTION("Incomplete BSON Input 4") { - 0x0D, 0x00, // size (incomplete), unexpected EOF - }; + std::vector incomplete_bson = + { + 0x0D, 0x00, // size (incomplete), unexpected EOF + }; - CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); - CHECK_THROWS_WITH(json::from_bson(incomplete_bson), - "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); - CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); - SaxCountdown scp(0); - CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); + } } - TEST_CASE("Unsupported BSON input") { std::vector bson = @@ -774,8 +792,6 @@ TEST_CASE("Unsupported BSON input") CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson)); } - - TEST_CASE("BSON numerical data") { SECTION("number") @@ -1205,12 +1221,12 @@ TEST_CASE("BSON roundtrips", "[hide]") (std::istreambuf_iterator(f_bson)), std::istreambuf_iterator()); - SECTION(filename + ": output adapters: std::vector") - { - std::vector vec; - json::to_bson(j1, vec); - CHECK(vec == packed); - } + SECTION(filename + ": output adapters: std::vector") + { + std::vector vec; + json::to_bson(j1, vec); + CHECK(vec == packed); + } } } } diff --git a/test/src/unit-regression.cpp b/test/src/unit-regression.cpp index 2c21576d1..c7736b714 100644 --- a/test/src/unit-regression.cpp +++ b/test/src/unit-regression.cpp @@ -139,10 +139,10 @@ bool operator==(Data const& lhs, Data const& rhs) return lhs.a == rhs.a && lhs.b == rhs.b; } -bool operator!=(Data const& lhs, Data const& rhs) -{ - return !(lhs == rhs); -} +//bool operator!=(Data const& lhs, Data const& rhs) +//{ +// return !(lhs == rhs); +//} } ///////////////////////////////////////////////////////////////////// From 62126278a605f8ec097f2420dfa717d83912cc70 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 25 Oct 2018 13:01:18 +0200 Subject: [PATCH 31/36] :hammer: added fix for arrays --- README.md | 16 +++++++-- .../nlohmann/detail/output/binary_writer.hpp | 7 ++-- single_include/nlohmann/json.hpp | 7 ++-- test/data/json.org/1.json.bson | Bin 393 -> 395 bytes test/data/json.org/2.json.bson | Bin 216 -> 219 bytes test/data/json.org/3.json.bson | Bin 406 -> 406 bytes test/data/json.org/4.json.bson | Bin 2786 -> 2791 bytes test/data/json.org/5.json.bson | Bin 730 -> 764 bytes test/src/unit-bson.cpp | 32 +++++++++++------- 9 files changed, 43 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index eebf86df6..0572199f7 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ - [JSON Merge Patch](#json-merge-patch) - [Implicit conversions](#implicit-conversions) - [Conversions to/from arbitrary types](#arbitrary-types-conversions) - - [Binary formats (CBOR, MessagePack, and UBJSON)](#binary-formats-cbor-messagepack-and-ubjson) + - [Binary formats (CBOR, BSON, MessagePack, and UBJSON)](#binary-formats-bson-cbor-messagepack-and-ubjson) - [Supported compilers](#supported-compilers) - [License](#license) - [Contact](#contact) @@ -874,14 +874,22 @@ struct bad_serializer }; ``` -### Binary formats (CBOR, MessagePack, and UBJSON) +### Binary formats (CBOR, BSON, MessagePack, and UBJSON -Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supports [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors. +Though JSON is a ubiquitous data format, it is not a very compact format suitable for data exchange, for instance over a network. Hence, the library supportsĀ [BSON](http://bsonspec.org) (Binary JSON), [CBOR](http://cbor.io) (Concise Binary Object Representation), [MessagePack](http://msgpack.org), and [UBJSON](http://ubjson.org) (Universal Binary JSON Specification) to efficiently encode JSON values to byte vectors and to decode such vectors. ```cpp // create a JSON value json j = R"({"compact": true, "schema": 0})"_json; +// serialize to BSON +std::vector v_bson = json::to_bson(j); + +// 0x1B, 0x00, 0x00, 0x00, 0x08, 0x63, 0x6F, 0x6D, 0x70, 0x61, 0x63, 0x74, 0x00, 0x01, 0x10, 0x73, 0x63, 0x68, 0x65, 0x6D, 0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + +// roundtrip +json j_from_bson = json::from_bson(v_bson); + // serialize to CBOR std::vector v_cbor = json::to_cbor(j); @@ -1138,6 +1146,8 @@ I deeply appreciate the help of the following people. - [Henry Schreiner](https://github.com/henryiii) added support for GCC 4.8. - [knilch](https://github.com/knilch0r) made sure the test suite does not stall when run in the wrong directory. - [Antonio Borondo](https://github.com/antonioborondo) fixed an MSVC 2017 warning. +- [efp](https://github.com/efp) added line and column information to parse errors. +- [julian-becker](https://github.com/julian-becker) added BSON support. Thanks a lot for helping out! Please [let me know](mailto:mail@nlohmann.me) if I forgot someone. diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 64a02aac8..27833ab6a 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -875,10 +875,11 @@ class binary_writer static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) { std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; for (const auto& el : value) { - embedded_document_size += calc_bson_element_size("", el); + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); } return sizeof(std::int32_t) + embedded_document_size + 1ul; @@ -893,9 +894,11 @@ class binary_writer write_bson_entry_header(name, 0x04); // array write_number(static_cast(calc_bson_array_size(value))); + std::size_t array_index = 0ul; + for (const auto& el : value) { - write_bson_element("", el); + write_bson_element(std::to_string(array_index++), el); } oa->write_character(static_cast(0x00)); diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index cdd4e680e..c9060781e 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -9155,10 +9155,11 @@ class binary_writer static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) { std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; for (const auto& el : value) { - embedded_document_size += calc_bson_element_size("", el); + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); } return sizeof(std::int32_t) + embedded_document_size + 1ul; @@ -9173,9 +9174,11 @@ class binary_writer write_bson_entry_header(name, 0x04); // array write_number(static_cast(calc_bson_array_size(value))); + std::size_t array_index = 0ul; + for (const auto& el : value) { - write_bson_element("", el); + write_bson_element(std::to_string(array_index++), el); } oa->write_character(static_cast(0x00)); diff --git a/test/data/json.org/1.json.bson b/test/data/json.org/1.json.bson index e14e9b92304d8a37a803af2e6c870496679aefd6..a2466af8367caa6a96e7881eabd1657453354e81 100644 GIT binary patch delta 196 zcmeBV?q(L~W@KPsPS43NE>0|}WT;`BoXIF>$?OgiaLFuV2n0$qm1LIWq%truFfar& zK%{&!i%S?xKw7~nUGqwSnw1$DCfhK|Pv&Ga6>v;0%FnCJWncwqa`*L_XeZ4BH8nMj zp^;(YyigYwu-@R*RL7j+dWMp7qPS43NE>0|}WT*o1+(A5-%rXXlIL{}uxP-w7&UMWz0V-BxWSDqb z+Lg&MxhOxcGM9msfq@~|-PZ?ZLTVaAJy3`RY+i6`s$))ZK7%Asgo%L#NP<*CSP{NH j6F-GcE?^X!*k(F0QClx3UG;*SV9FPV8K(#6# diff --git a/test/data/json.org/2.json.bson b/test/data/json.org/2.json.bson index 0f4356e16daadc3fd1e5d602ac5b01a9637b0747..743822c57eda0cce5a56fc1a738786be582d0016 100644 GIT binary patch literal 219 zcmcc3z`($qo0?b3Z~@3-%1mKk1=4AmIjIaxWr;bZsbIbvh|gS*Ur<`WupB7Q0@9IL zlA6mf1Ek-8!2qNgqMilF^h+&gV9L)+&dE&9X5a@3I2WZRmZSoOUGkGPG=W+S8FUa@ pKvwz}q~;;?a6v_2%8eNG5X#wrOy`{ZVxSXYc5uUlpc;T&1^{E^FsuLo literal 216 zcmcb?z`($qo0?b3a2Cj7%1mKk1=4AmIjIcH1^ES~1q@4od=`+3%#zexhN&R+3Qmu!qnj6S*&dD!MMN!TM69v*R`#?sxft=33008zvFbx0z diff --git a/test/data/json.org/3.json.bson b/test/data/json.org/3.json.bson index deb7c5391fc76590baf686212ee5a4407c404dc6..2c43e3518d29c74c0d79c3b3d32e04655251c97e 100644 GIT binary patch literal 406 zcmZ`!u};G<5Ir0L14v|N@xTBb+H^#Si3NmEhYG=fgq--|SU7eRUnu1d_yPWczhh$J zOgm5sw)gJ6d++Xj3m{uqZIIxNvCvo!1~LE$J}F*@1>7?gWXT~MGMuZnb11KauT?oX zMWL5>s|?{)i?Gk8R$2eJ-p{##8u;Xbyg_Rk`oWOdRvCnc6d(Hv4j5deQJl;NKkmGN zRp2%Yaqv@KpVf8bW50NkKmQ^^Ict22o?w@?iUIcg9N`PW2Tz1nq+pNHg|pR@yu=6l0*h>pbqDp%;anZkJO@6 z1}rwP0abu>8BUI5w3@^yr3yDO1t`P7T%MVil3&gc0b~ngq-LgPlrVf@WSHnLK2b}J OPoO+Ar6dC+00IE2Z#VV; diff --git a/test/data/json.org/4.json.bson b/test/data/json.org/4.json.bson index 31812125a1c55004e0cc4ca87c8e67ce553d59ba..aceba2d8ab6f741a77fac37908bc4fabf4612ce9 100644 GIT binary patch delta 570 zcmY*WJ#Q015S_cd5Bo%jm0%o2QBXdlASY)#9|$C*fJt0{ECIGDNQ2h7_2uw=akmbp zfiC@O3OYbYRQv`CC}p}Jdfbj_LIaqT_>R)RO~uCN$`F6*7w|y8ieL2^ ze6LU9MXijMwlbC*r0hof7Jzy`jd4?(CE0mTD+_-?++7rni9&}3u}?WmI|<$GgCl+~ zi|-TSuqfV4i`S*GZ7<=ET8ug%{o%acz(3kLy85~CZ1^_iw9TW0fr4k%WqhmE@I0Bv zJ@pd4B0aoD0(_^g;tO>O3#5c~b;)iQ3N=?NM+*)-w{+QId8@0|uug((`;a zs=|R;kz+QXrk)6NqR3D2Wq#wols_ndPx1@+EZ4$c<_?nlV{GMao{sw0!>uUfY{)^D zd@|=sncshkf!wASx^R%?WnIRCbp{_;(^u5ps7Hs#QXtn0Ri3F<%d(YI*Y~>6%#5Gh V?Lp#YiNkSIh5HUZwJJmce*tQkrPu%f delta 519 zcmY+A&ubGw9K~m6H?uz|ZD?5xLL(`Owv|n_1gaELf5d~LRGNxNy^PIR*X)n5JKDs9 zh-X2=I|zc{e-Qryz4o98T0{^py+{#nq8^=X+JlGr@#gX6z4?51cij0MhY)>J-12y* zL$;MLQjy#bgd|^W<&ZX0yC3<|>+pn!WP;%bt>6niiY+=mx*^hrpUB>d;4R|dpj(WmU8ZV}QS)ca3r{hwW2H)c_1$MDHGg)vsZc5V@CtmwA5DVBJea;~Qs3A)yk=FrHxG?m2)Q3*hMLBJO^uvwg?`i=vcx;HfV#=pzW>~;;(=Lo zl{`WJ=lem!XS;+~)*5>DGFGhRu^^7y7bC?&imoIY{#cJuwMy7EYbtsYf9=Wfy|lfO lb36;4o5iTJm|uPr|Jqo{MvFQ^1+HZ+;Ml2M<@&~)+*Mr|fTv&qa%K+1fw9+M%Hp~d8Q cCQTVbO9o~m0~F03sW}Be%|?ck=P}s;0KuUt)&Kwi delta 205 zcmeyvdW)6&783&lb8c#0DZ}}R+=`NGKwJhP1_lPE%oGMzAnjj}ng zF3_HM#?%F expected = { - 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x49, 0x00, 0x00, 0x00, // size (little endian) 0x04, /// entry: embedded document 'e', 'n', 't', 'r', 'y', '\x00', - 0x35, 0x00, 0x00, 0x00, // size (little endian) - 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x3D, 0x00, 0x00, 0x00, // size (little endian) + 0x10, '0', 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, '1', 0x00, 0x02, 0x00, 0x00, 0x00, + 0x10, '2', 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, '3', 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, '4', 0x00, 0x05, 0x00, 0x00, 0x00, + 0x10, '5', 0x00, 0x06, 0x00, 0x00, 0x00, + 0x10, '6', 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, '7', 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, // end marker (embedded document) 0x00 // end marker @@ -552,6 +552,7 @@ TEST_CASE("BSON") CHECK(parsed == expected); auto dumped = json::to_bson(parsed); CHECK(dumped == input); + CHECK(json::from_bson(dumped) == expected); } SECTION("Example 2") @@ -561,7 +562,7 @@ TEST_CASE("BSON") json expected = {{"BSON", {"awesome", 5.05, 1986}}}; CHECK(parsed == expected); auto dumped = json::to_bson(parsed); - //CHECK(dumped == input); // see https://github.com/nlohmann/json/pull/1254#issuecomment-432831216 + CHECK(dumped == input); CHECK(json::from_bson(dumped) == expected); } } @@ -1225,7 +1226,14 @@ TEST_CASE("BSON roundtrips", "[hide]") { std::vector vec; json::to_bson(j1, vec); - CHECK(vec == packed); + + if (vec != packed) + { + // the exact serializations may differ due to the order of + // object keys; in these cases, just compare whether both + // serializations create the same JSON value + CHECK(json::from_bson(vec) == json::from_bson(packed)); + } } } } From 19647e083c0fb6889a0dcd1cf459bc99d46a4ab0 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 25 Oct 2018 14:27:55 +0200 Subject: [PATCH 32/36] :rotating_light: fixed compiler warnings --- include/nlohmann/detail/input/binary_reader.hpp | 4 ++-- include/nlohmann/detail/output/serializer.hpp | 1 + single_include/nlohmann/json.hpp | 5 +++-- test/src/fuzzer-parse_json.cpp | 4 ---- test/src/unit-bson.cpp | 6 +++--- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 615ab73cf..7d2787360 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -246,13 +246,13 @@ class binary_reader case 0x10: // int32 { std::int32_t value; - return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + return get_number(input_format_t::bson, value) and sax->number_integer(value); } case 0x12: // int64 { std::int64_t value; - return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + return get_number(input_format_t::bson, value) and sax->number_integer(value); } default: // anything else not supported (yet) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 1d107ce04..8c48e36e9 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -463,6 +463,7 @@ class serializer continue; } } + break; } default: // decode found yet incomplete multi-byte code point diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c9060781e..9bb67ff16 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -6547,13 +6547,13 @@ class binary_reader case 0x10: // int32 { std::int32_t value; - return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + return get_number(input_format_t::bson, value) and sax->number_integer(value); } case 0x12: // int64 { std::int64_t value; - return get_number(input_format_t::bson, value) and sax->number_integer(static_cast(value)); + return get_number(input_format_t::bson, value) and sax->number_integer(value); } default: // anything else not supported (yet) @@ -11161,6 +11161,7 @@ class serializer continue; } } + break; } default: // decode found yet incomplete multi-byte code point diff --git a/test/src/fuzzer-parse_json.cpp b/test/src/fuzzer-parse_json.cpp index ed5863851..7aa3dc21f 100644 --- a/test/src/fuzzer-parse_json.cpp +++ b/test/src/fuzzer-parse_json.cpp @@ -60,10 +60,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) // parse errors are ok, because input may be random bytes } catch (const json::out_of_range&) - { - // parse errors are ok, because input may be random bytes - } - catch (const json::out_of_range&) { // out of range errors may happen if provided sizes are excessive } diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp index 5a16aefd1..aeb7b8987 100644 --- a/test/src/unit-bson.cpp +++ b/test/src/unit-bson.cpp @@ -1013,7 +1013,7 @@ TEST_CASE("BSON numerical data") { "entry", i } }; - std::uint64_t iu = *reinterpret_cast(&i); + auto iu = i; std::vector expected_bson = { 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) @@ -1068,7 +1068,7 @@ TEST_CASE("BSON numerical data") { "entry", i } }; - std::uint64_t iu = *reinterpret_cast(&i); + auto iu = i; std::vector expected_bson = { 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) @@ -1118,7 +1118,7 @@ TEST_CASE("BSON numerical data") { "entry", i } }; - std::uint64_t iu = *reinterpret_cast(&i); + auto iu = i; std::vector expected_bson = { 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) From 7ce720b7006727ee2ca948fa55be81f91dff4191 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 25 Oct 2018 18:21:50 +0200 Subject: [PATCH 33/36] :rotating_light: fixed coverage --- include/nlohmann/detail/output/serializer.hpp | 2 +- single_include/nlohmann/json.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 8c48e36e9..090f22b1d 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -463,7 +463,7 @@ class serializer continue; } } - break; + break; // LCOV_EXCL_LINE } default: // decode found yet incomplete multi-byte code point diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 9bb67ff16..c22953997 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -11161,7 +11161,7 @@ class serializer continue; } } - break; + break; // LCOV_EXCL_LINE } default: // decode found yet incomplete multi-byte code point From d97fa30795f83e57a3b1c62505248fec9f29a7f1 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Thu, 25 Oct 2018 22:29:27 +0200 Subject: [PATCH 34/36] :ok_hand: fixed comment #1320 --- test/src/fuzzer-parse_bson.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/fuzzer-parse_bson.cpp b/test/src/fuzzer-parse_bson.cpp index 1d9337678..86b9f176d 100644 --- a/test/src/fuzzer-parse_bson.cpp +++ b/test/src/fuzzer-parse_bson.cpp @@ -51,7 +51,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) } catch (const json::parse_error&) { - // parsing a CBOR serialization must not fail + // parsing a BSON serialization must not fail assert(false); } } From 544150d5a54058f91dbda330087cc93844ecd6aa Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 26 Oct 2018 11:10:49 +0200 Subject: [PATCH 35/36] :rotating_light: fixed another linter warning --- include/nlohmann/detail/output/serializer.hpp | 4 ++-- single_include/nlohmann/json.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/nlohmann/detail/output/serializer.hpp b/include/nlohmann/detail/output/serializer.hpp index 090f22b1d..41f248a29 100644 --- a/include/nlohmann/detail/output/serializer.hpp +++ b/include/nlohmann/detail/output/serializer.hpp @@ -460,10 +460,10 @@ class serializer // continue processing the string state = UTF8_ACCEPT; - continue; + break; } } - break; // LCOV_EXCL_LINE + break; } default: // decode found yet incomplete multi-byte code point diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c22953997..9b2636adb 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -11158,10 +11158,10 @@ class serializer // continue processing the string state = UTF8_ACCEPT; - continue; + break; } } - break; // LCOV_EXCL_LINE + break; } default: // decode found yet incomplete multi-byte code point From 6384fe28db241e1a112d7da1f8d4b8fdc5c60346 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Fri, 26 Oct 2018 23:12:41 +0200 Subject: [PATCH 36/36] :rotating_light: fixed another linter warning --- include/nlohmann/detail/output/binary_writer.hpp | 1 - single_include/nlohmann/json.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index 27833ab6a..7c0e6939b 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -56,7 +56,6 @@ class binary_writer default: { JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; } } } diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index 9b2636adb..79f74e4cf 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -8336,7 +8336,6 @@ class binary_writer default: { JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); - break; } } }