diff --git a/include/nlohmann/detail/exceptions.hpp b/include/nlohmann/detail/exceptions.hpp index dd0468040..1ac2606bd 100644 --- a/include/nlohmann/detail/exceptions.hpp +++ b/include/nlohmann/detail/exceptions.hpp @@ -93,6 +93,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -236,6 +237,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -278,8 +280,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index 57889644e..c213d8558 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -80,6 +80,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -121,6 +125,207 @@ class binary_reader } private: + + /*! + @brief Parses a C-style string from the BSON input. + @param [out] result A reference to the string variable where the read string + is to be stored. + @return `true` if the \x00-byte indicating the end of the + string was encountered before the EOF. + `false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON input. + @param [in] len The length (including the zero-byte at the end) of the string to be read. + @param [out] result A reference to the string variable where the read string + is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) + { + return get_string(input_format_t::bson, len - static_cast(1), result) + && get() != std::char_traits::eof(); + } + + /*! + @return A hexadecimal string representation of the given @a byte + @param byte The byte to convert to a string + */ + static std::string byte_hexstring(unsigned char byte) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%02hhX", byte); + return std::string{cr}; + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param element_type_parse_position The position in the input stream, where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported @a element_type will + give rise to a parse_error.114: Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number(input_format_t::bson, number) + && sax->number_float(static_cast(number), ""); + } + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(input_format_t::bson, len) + && get_bson_string(len, value) + && sax->string(value); + } + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) + && sax->number_integer(static_cast(value)); + } + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) + && sax->number_integer(static_cast(value)); + } + case 0x0A: // null + { + return sax->null(); + } + case 0x03: // object + { + return parse_bson_internal(); + } + case 0x04: // array + { + return parse_bson_array(); + } + default: // anything else not supported (yet) + { + auto element_type_str = byte_hexstring(element_type); + return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) from the input + and passes it to the SAX-parser. + The same binary layout is used for objects and arrays, hence it must + be indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + @param is_array Determines if the element list being read is to be treated as + an object (@a is_array == false), or as an array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(bool is_array) + { + while (auto element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + string_t key; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (!is_array) + { + sax->key(key); + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + } + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_array(-1))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + /*! + @brief Reads in a BSON-object and pass it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(-1))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -875,7 +1080,7 @@ class binary_reader bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template bool get_number(const input_format_t format, NumberType& result) { // step 1: read input into array with system's byte order @@ -889,7 +1094,7 @@ class binary_reader } // reverse byte order prior to conversion if necessary - if (is_little_endian) + if (is_little_endian && !InputIsLittleEndian) { vec[sizeof(NumberType) - i - 1] = static_cast(current); } @@ -904,6 +1109,7 @@ class binary_reader return true; } + /*! @brief create a string by reading characters from the input @@ -1715,6 +1921,10 @@ class binary_reader error_msg += "UBJSON"; break; + case input_format_t::bson: + error_msg += "BSON"; + break; + // LCOV_EXCL_START default: assert(false); diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index 706c5c5b9..8b7852b81 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -18,7 +18,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // diff --git a/include/nlohmann/detail/output/binary_writer.hpp b/include/nlohmann/detail/output/binary_writer.hpp index d4b5e98f9..bebfa9363 100644 --- a/include/nlohmann/detail/output/binary_writer.hpp +++ b/include/nlohmann/detail/output/binary_writer.hpp @@ -678,18 +678,330 @@ class binary_writer } } + /*! + @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) + { + const auto it = name.find(static_cast(0)); + if (it != BasicJsonType::string_t::npos) + { + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); + } + + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + { + write_bson_entry_header(name, 0x01); + write_number(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size("", el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(calc_bson_array_size(value)); + + for (const auto& el : value) + { + write_bson_element("", el); + } + + oa->write_character(static_cast(0x00)); + } + + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + case value_t::discarded: + return 0ul; + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::boolean: + return header_size + 1ul; + case value_t::number_float: + return header_size + 8ul; + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + case value_t::null: + return header_size + 0ul; + }; + } + + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. + @param name The name to associate with the JSON entity @a j within the current BSON document + @return The size of the bson entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + case value_t::discarded: + return; + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + case value_t::array: + return write_bson_array(name, *j.m_value.array); + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + case value_t::string: + return write_bson_string(name, *j.m_value.string); + case value_t::null: + return write_bson_null(name); + }; + } + + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number(calc_bson_object_size(value)); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(static_cast(0x00)); + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); + break; + case value_t::discarded: + break; + case value_t::object: + write_bson_object(*j.m_value.object); + break; + } + } + + private: /* @brief write a number to output input @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian @note This function needs to respect the system's endianess, because bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template void write_number(const NumberType n) { // step 1: write number to array of length NumberType @@ -697,7 +1009,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian) + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); diff --git a/include/nlohmann/json.hpp b/include/nlohmann/json.hpp index a5f7d47d0..0a46d0c27 100644 --- a/include/nlohmann/json.hpp +++ b/include/nlohmann/json.hpp @@ -6627,6 +6627,87 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -6821,6 +6902,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -6906,6 +6989,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 */ @@ -6934,6 +7019,91 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index b132e850d..29f0cd0db 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -835,6 +835,7 @@ json.exception.parse_error.109 | parse error: array index 'one' is not a number json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). @note For an input with n bytes, 1 is the index of the first character and n+1 is the index of the terminating null byte or the end of file. This also @@ -978,6 +979,7 @@ json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | @liveexample{The following code shows how a `type_error` exception can be caught.,type_error} @@ -1020,8 +1022,9 @@ json.exception.out_of_range.403 | key 'foo' not found | The provided key was not json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. -json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON only supports integers numbers up to 9223372036854775807. | +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | @liveexample{The following code shows how an `out_of_range` exception can be caught.,out_of_range} @@ -2038,7 +2041,7 @@ namespace nlohmann namespace detail { /// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson }; +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; //////////////////// // input adapters // @@ -6378,6 +6381,10 @@ class binary_reader result = parse_ubjson_internal(); break; + case input_format_t::bson: + result = parse_bson_internal(); + break; + // LCOV_EXCL_START default: assert(false); @@ -6419,6 +6426,207 @@ class binary_reader } private: + + /*! + @brief Parses a C-style string from the BSON input. + @param [out] result A reference to the string variable where the read string + is to be stored. + @return `true` if the \x00-byte indicating the end of the + string was encountered before the EOF. + `false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON input. + @param [in] len The length (including the zero-byte at the end) of the string to be read. + @param [out] result A reference to the string variable where the read string + is to be stored. + @tparam NumberType The type of the length @a len + @pre len > 0 + @return `true` if the string was successfully parsed + */ + template + bool get_bson_string(const NumberType len, string_t& result) + { + return get_string(input_format_t::bson, len - static_cast(1), result) + && get() != std::char_traits::eof(); + } + + /*! + @return A hexadecimal string representation of the given @a byte + @param byte The byte to convert to a string + */ + static std::string byte_hexstring(unsigned char byte) + { + char cr[3]; + snprintf(cr, sizeof(cr), "%02hhX", byte); + return std::string{cr}; + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param element_type_parse_position The position in the input stream, where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported @a element_type will + give rise to a parse_error.114: Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(int element_type, std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number(input_format_t::bson, number) + && sax->number_float(static_cast(number), ""); + } + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number(input_format_t::bson, len) + && get_bson_string(len, value) + && sax->string(value); + } + case 0x08: // boolean + { + return sax->boolean(static_cast(get())); + } + case 0x10: // int32 + { + std::int32_t value; + return get_number(input_format_t::bson, value) + && sax->number_integer(static_cast(value)); + } + case 0x12: // int64 + { + std::int64_t value; + return get_number(input_format_t::bson, value) + && sax->number_integer(static_cast(value)); + } + case 0x0A: // null + { + return sax->null(); + } + case 0x03: // object + { + return parse_bson_internal(); + } + case 0x04: // array + { + return parse_bson_array(); + } + default: // anything else not supported (yet) + { + auto element_type_str = byte_hexstring(element_type); + return sax->parse_error(element_type_parse_position, element_type_str, parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + element_type_str)); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) from the input + and passes it to the SAX-parser. + The same binary layout is used for objects and arrays, hence it must + be indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + @param is_array Determines if the element list being read is to be treated as + an object (@a is_array == false), or as an array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(bool is_array) + { + while (auto element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + string_t key; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (!is_array) + { + sax->key(key); + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + } + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_array(-1))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + /*! + @brief Reads in a BSON-object and pass it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t documentSize; + get_number(input_format_t::bson, documentSize); + + if (JSON_UNLIKELY(not sax->start_object(-1))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -7173,7 +7381,7 @@ class binary_reader bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template bool get_number(const input_format_t format, NumberType& result) { // step 1: read input into array with system's byte order @@ -7187,7 +7395,7 @@ class binary_reader } // reverse byte order prior to conversion if necessary - if (is_little_endian) + if (is_little_endian && !InputIsLittleEndian) { vec[sizeof(NumberType) - i - 1] = static_cast(current); } @@ -7202,6 +7410,7 @@ class binary_reader return true; } + /*! @brief create a string by reading characters from the input @@ -8013,6 +8222,10 @@ class binary_reader error_msg += "UBJSON"; break; + case input_format_t::bson: + error_msg += "BSON"; + break; + // LCOV_EXCL_START default: assert(false); @@ -8723,18 +8936,330 @@ class binary_writer } } + /*! + @return The size of a BSON document entry header, including the id marker and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const typename BasicJsonType::string_t& name) + { + const auto it = name.find(static_cast(0)); + if (it != BasicJsonType::string_t::npos) + { + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); + } + + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const typename BasicJsonType::string_t& name, std::uint8_t element_type) + { + oa->write_character(static_cast(element_type)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const typename BasicJsonType::string_t& name, const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? static_cast(0x01) : static_cast(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const typename BasicJsonType::string_t& name, const double value) + { + write_bson_entry_header(name, 0x01); + write_number(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const typename BasicJsonType::string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const typename BasicJsonType::string_t& name, const typename BasicJsonType::string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const typename BasicJsonType::string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const typename BasicJsonType::string_t& name, const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static std::size_t calc_bson_unsigned_size(const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const typename BasicJsonType::string_t& name, const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(value))); + } + + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const typename BasicJsonType::string_t& name, const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size("", el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const typename BasicJsonType::string_t& name, const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(calc_bson_array_size(value)); + + for (const auto& el : value) + { + write_bson_element("", el); + } + + oa->write_character(static_cast(0x00)); + } + + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + case value_t::discarded: + return 0ul; + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + case value_t::boolean: + return header_size + 1ul; + case value_t::number_float: + return header_size + 8ul; + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + case value_t::null: + return header_size + 0ul; + }; + } + + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the key @a name. + @param name The name to associate with the JSON entity @a j within the current BSON document + @return The size of the bson entry + */ + void write_bson_element(const typename BasicJsonType::string_t& name, const BasicJsonType& j) + { + switch (j.type()) + { + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + case value_t::discarded: + return; + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + case value_t::array: + return write_bson_array(name, *j.m_value.array); + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + case value_t::string: + return write_bson_string(name, *j.m_value.string); + case value_t::null: + return write_bson_null(name); + }; + } + + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = 0; + + for (const auto& el : value) + { + document_size += calc_bson_element_size(el.first, el.second); + } + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number(calc_bson_object_size(value)); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(static_cast(0x00)); + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + default: + JSON_THROW(type_error::create(317, "JSON value of type " + std::to_string(static_cast(j.type())) + " cannot be serialized to requested format")); + break; + case value_t::discarded: + break; + case value_t::object: + write_bson_object(*j.m_value.object); + break; + } + } + + private: /* @brief write a number to output input @param[in] n number of type @a NumberType @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian @note This function needs to respect the system's endianess, because bytes in CBOR, MessagePack, and UBJSON are stored in network order (big endian) and therefore need reordering on little endian systems. */ - template + template void write_number(const NumberType n) { // step 1: write number to array of length NumberType @@ -8742,7 +9267,7 @@ class binary_writer std::memcpy(vec.data(), &n, sizeof(NumberType)); // step 2: write array to output (with possible reordering) - if (is_little_endian) + if (is_little_endian && !OutputIsLittleEndian) { // reverse byte order prior to conversion if necessary std::reverse(vec.begin(), vec.end()); @@ -18216,6 +18741,87 @@ class basic_json binary_writer(o).write_ubjson(j, use_size, use_type); } + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&) for the related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector to_bson(const basic_json& j) + { + std::vector result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter) + */ + static void to_bson(const basic_json& j, detail::output_adapter o) + { + binary_writer(o).write_bson(j); + } + + /*! @brief create a JSON value from an input in CBOR format @@ -18410,6 +19016,8 @@ class basic_json related CBOR format @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the related UBJSON format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 2.0.9; parameter @a start_index since 2.1.1; changed to consume input adapters, removed start_index parameter, and added @@ -18495,6 +19103,8 @@ class basic_json related CBOR format @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the related MessagePack format + @sa @ref from_bson(detail::input_adapter, const bool, const bool) for + the related BSON format @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 */ @@ -18523,6 +19133,91 @@ class basic_json return res ? result : basic_json(value_t::discarded); } + + + + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward(a1), std::forward(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + /// @} ////////////////////////// diff --git a/test/src/unit-bson.cpp b/test/src/unit-bson.cpp new file mode 100644 index 000000000..3449b698e --- /dev/null +++ b/test/src/unit-bson.cpp @@ -0,0 +1,1128 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ (test suite) +| | |__ | | | | | | version 3.2.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License . +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann . + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "catch.hpp" + +#include +using nlohmann::json; +#include + +TEST_CASE("BSON") +{ + SECTION("individual values not supported") + { + SECTION("discarded") + { + // discarded values are not serialized + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("null") + { + json j = nullptr; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 0 cannot be serialized to requested format"); + } + + SECTION("boolean") + { + SECTION("true") + { + json j = true; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); + } + + SECTION("false") + { + json j = false; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 4 cannot be serialized to requested format"); + } + } + + SECTION("number") + { + json j = 42; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 5 cannot be serialized to requested format"); + } + + SECTION("float") + { + json j = 4.2; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 7 cannot be serialized to requested format"); + } + + SECTION("string") + { + json j = "not supported"; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 3 cannot be serialized to requested format"); + } + + SECTION("array") + { + json j = std::vector {1, 2, 3, 4, 5, 6, 7}; + REQUIRE_THROWS_AS(json::to_bson(j), json::type_error&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.type_error.317] JSON value of type 2 cannot be serialized to requested format"); + } + } + + SECTION("keys containing code-point U+0000 cannot be serialized to BSON") + { + json j = + { + { std::string("en\0try", 6), true } + }; + REQUIRE_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.409] BSON key cannot contain code point U+0000 (at byte 2)"); + } + + SECTION("objects") + { + SECTION("empty object") + { + json j = json::object(); + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with bool") + { + json j = + { + { "entry", true } + }; + + std::vector expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, // value = true + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with bool") + { + json j = + { + { "entry", false } + }; + + std::vector expected = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00, // value = false + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with double") + { + json j = + { + { "entry", 4.2 } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x01, /// entry: double + 'e', 'n', 't', 'r', 'y', '\x00', + 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with string") + { + json j = + { + { "entry", "bsonstr" } + }; + + std::vector expected = + { + 0x18, 0x00, 0x00, 0x00, // size (little endian) + 0x02, /// entry: string (UTF-8) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x08, 0x00, 0x00, 0x00, 'b', 's', 'o', 'n', 's', 't', 'r', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with null member") + { + json j = + { + { "entry", nullptr } + }; + + std::vector expected = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0x0A, /// entry: null + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{0x12345678} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with negative integer (32-bit) member") + { + json j = + { + { "entry", std::int32_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with negative integer (64-bit) member") + { + json j = + { + { "entry", std::int64_t{-1} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0xFF, 0xFF, 0xFF, 0xFF, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with unsigned integer (64-bit) member") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + { "entry", std::uint64_t{0x1234567804030201} } + }; + + std::vector expected = + { + 0x14, 0x00, 0x00, 0x00, // size (little endian) + 0x12, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x01, 0x02, 0x03, 0x04, 0x78, 0x56, 0x34, 0x12, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with small unsigned integer member") + { + json j = + { + { "entry", std::uint64_t{0x42} } + }; + + std::vector expected = + { + 0x10, 0x00, 0x00, 0x00, // size (little endian) + 0x10, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + 0x42, 0x00, 0x00, 0x00, + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("discarded values are not serialized") + { + json j = json::value_t::discarded; + const auto result = json::to_bson(j); + CHECK(result.empty()); + } + + SECTION("discarded members are not serialized") + { + json j = + { + { "entry", json::value_t::discarded } + }; + + std::vector expected = + { + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + } + + + SECTION("non-empty object with object member") + { + json j = + { + { "entry", json::object() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x03, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with array member") + { + json j = + { + { "entry", json::array() } + }; + + std::vector expected = + { + 0x11, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x05, 0x00, 0x00, 0x00, // size (little endian) + // no entries + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("non-empty object with non-empty array member") + { + json j = + { + { "entry", json::array({1, 2, 3, 4, 5, 6, 7, 8}) } + }; + + std::vector expected = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, // end marker (embedded document) + + 0x00 // end marker + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + + SECTION("Some more complex document") + { + // directly encoding uint64 is not supported in bson (only for timestamp values) + json j = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector expected = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + const auto result = json::to_bson(j); + CHECK(result == expected); + + // roundtrip + CHECK(json::from_bson(result) == j); + CHECK(json::from_bson(result, true, false) == j); + } + } +} + +TEST_CASE("BSON input/output_adapters") +{ + json json_representation = + { + {"double", 42.5}, + {"entry", 4.2}, + {"number", 12345}, + {"object", {{ "string", "value" }}} + }; + + std::vector bson_representation = + { + /*size */ 0x4f, 0x00, 0x00, 0x00, + /*entry*/ 0x01, 'd', 'o', 'u', 'b', 'l', 'e', 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x45, 0x40, + /*entry*/ 0x01, 'e', 'n', 't', 'r', 'y', 0x00, 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, + /*entry*/ 0x10, 'n', 'u', 'm', 'b', 'e', 'r', 0x00, 0x39, 0x30, 0x00, 0x00, + /*entry*/ 0x03, 'o', 'b', 'j', 'e', 'c', 't', 0x00, + /*entry: obj-size */ 0x17, 0x00, 0x00, 0x00, + /*entry: obj-entry*/0x02, 's', 't', 'r', 'i', 'n', 'g', 0x00, 0x06, 0x00, 0x00, 0x00, 'v', 'a', 'l', 'u', 'e', 0, + /*entry: obj-term.*/0x00, + /*obj-term*/ 0x00 + }; + + json j2; + CHECK_NOTHROW(j2 = json::from_bson(bson_representation)); + + // compare parsed JSON values + CHECK(json_representation == j2); + + SECTION("roundtrips") + { + SECTION("std::ostringstream") + { + std::ostringstream ss; + json::to_bson(json_representation, ss); + std::istringstream iss(ss.str()); + json j3 = json::from_bson(iss); + CHECK(json_representation == j3); + } + + SECTION("std::string") + { + std::string s; + json::to_bson(json_representation, s); + json j3 = json::from_bson(s); + CHECK(json_representation == j3); + } + + SECTION("std::vector") + { + std::vector v; + json::to_bson(json_representation, v); + json j3 = json::from_bson(v); + CHECK(json_representation == j3); + } + } +} + + + + + +class SaxCountdown +{ + public: + explicit SaxCountdown(const int count) : events_left(count) + {} + + bool null() + { + return events_left-- > 0; + } + + bool boolean(bool) + { + return events_left-- > 0; + } + + bool number_integer(json::number_integer_t) + { + return events_left-- > 0; + } + + bool number_unsigned(json::number_unsigned_t) + { + return events_left-- > 0; + } + + bool number_float(json::number_float_t, const std::string&) + { + return events_left-- > 0; + } + + bool string(std::string&) + { + return events_left-- > 0; + } + + bool start_object(std::size_t) + { + return events_left-- > 0; + } + + bool key(std::string&) + { + return events_left-- > 0; + } + + bool end_object() + { + return events_left-- > 0; + } + + bool start_array(std::size_t) + { + return events_left-- > 0; + } + + bool end_array() + { + return events_left-- > 0; + } + + bool parse_error(std::size_t, const std::string&, const json::exception&) + { + return false; + } + + private: + int events_left = 0; +}; + + +TEST_CASE("Incomplete BSON INPUT") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean + 'e', 'n', 't' // unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 9: syntax error while parsing BSON cstring: unexpected end of input"); + + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + +TEST_CASE("Incomplete BSON INPUT 2") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, 0x00, 0x00, // size (little endian) + 0x08, // entry: boolean, unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 6: syntax error while parsing BSON cstring: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + +TEST_CASE("Incomplete BSON INPUT 3") +{ + std::vector incomplete_bson = + { + 0x41, 0x00, 0x00, 0x00, // size (little endian) + 0x04, /// entry: embedded document + 'e', 'n', 't', 'r', 'y', '\x00', + + 0x35, 0x00, 0x00, 0x00, // size (little endian) + 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x02, 0x00, 0x00, 0x00 + // missing input data... + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 28: syntax error while parsing BSON element list: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(1); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + + +TEST_CASE("Incomplete BSON INPUT 4") +{ + std::vector incomplete_bson = + { + 0x0D, 0x00, // size (incomplete), unexpected EOF + }; + + CHECK_THROWS_AS(json::from_bson(incomplete_bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(incomplete_bson), + "[json.exception.parse_error.110] parse error at byte 3: syntax error while parsing BSON number: unexpected end of input"); + CHECK(json::from_bson(incomplete_bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(incomplete_bson, &scp, json::input_format_t::bson)); +} + + +TEST_CASE("Unsupported BSON input") +{ + std::vector bson = + { + 0x0C, 0x00, 0x00, 0x00, // size (little endian) + 0xFF, // entry type: Min key (not supported yet) + 'e', 'n', 't', 'r', 'y', '\x00', + 0x00 // end marker + }; + + CHECK_THROWS_AS(json::from_bson(bson), json::parse_error&); + CHECK_THROWS_WITH(json::from_bson(bson), + "[json.exception.parse_error.114] parse error at byte 5: Unsupported BSON record type 0xFF"); + CHECK(json::from_bson(bson, true, false).is_discarded()); + + SaxCountdown scp(0); + CHECK(not json::sax_parse(bson, &scp, json::input_format_t::bson)); +} + + + +TEST_CASE("BSON numerical data") +{ + SECTION("number") + { + SECTION("signed") + { + SECTION("std::int64_t: INT64_MIN .. INT32_MIN-1") + { + std::vector numbers + { + INT64_MIN, + -1000000000000000000LL, + -100000000000000000LL, + -10000000000000000LL, + -1000000000000000LL, + -100000000000000LL, + -10000000000000LL, + -1000000000000LL, + -100000000000LL, + -10000000000LL, + static_cast(INT32_MIN) - 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + + SECTION("signed std::int32_t: INT32_MIN .. INT32_MAX") + { + std::vector numbers + { + INT32_MIN, + -2147483647L, + -1000000000L, + -100000000L, + -10000000L, + -1000000L, + -100000L, + -10000L, + -1000L, + -100L, + -10L, + -1L, + 0L, + 1L, + 10L, + 100L, + 1000L, + 10000L, + 100000L, + 1000000L, + 10000000L, + 100000000L, + 1000000000L, + 2147483646L, + INT32_MAX + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint32_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("signed std::int64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + INT64_MAX, + 1000000000000000000LL, + 100000000000000000LL, + 10000000000000000LL, + 1000000000000000LL, + 100000000000000LL, + 10000000000000LL, + 1000000000000LL, + 100000000000LL, + 10000000000LL, + static_cast(INT32_MAX) + 1, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + CHECK(j.at("entry").is_number_integer()); + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + } + + SECTION("unsigned") + { + SECTION("unsigned std::uint64_t: 0 .. INT32_MAX") + { + std::vector numbers + { + 0ULL, + 1ULL, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 2147483646ULL, + static_cast(INT32_MAX) + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x10u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x10u, /// entry: int32 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + + } + } + + SECTION("unsigned std::uint64_t: INT32_MAX+1 .. INT64_MAX") + { + std::vector numbers + { + static_cast(INT32_MAX) + 1, + 4000000000ULL, + static_cast(UINT32_MAX), + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL, + static_cast(INT64_MAX), + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + const auto bson = json::to_bson(j); + CHECK(bson == expected_bson); + + auto j_roundtrip = json::from_bson(bson); + + CHECK(j.at("entry").is_number_unsigned()); + CHECK(j_roundtrip.at("entry").is_number_integer()); + CHECK(j_roundtrip == j); + CHECK(json::from_bson(bson, true, false) == j); + } + } + + SECTION("unsigned std::uint64_t: INT64_MAX+1 .. UINT64_MAX") + { + std::vector numbers + { + static_cast(INT64_MAX) + 1ULL, + 10000000000000000000ULL, + 18000000000000000000ULL, + UINT64_MAX - 1ULL, + UINT64_MAX, + }; + + for (auto i : numbers) + { + + CAPTURE(i); + + json j = + { + { "entry", i } + }; + + std::uint64_t iu = *reinterpret_cast(&i); + std::vector expected_bson = + { + 0x14u, 0x00u, 0x00u, 0x00u, // size (little endian) + 0x12u, /// entry: int64 + 'e', 'n', 't', 'r', 'y', '\x00', + static_cast((iu >> (8u * 0u)) & 0xffu), + static_cast((iu >> (8u * 1u)) & 0xffu), + static_cast((iu >> (8u * 2u)) & 0xffu), + static_cast((iu >> (8u * 3u)) & 0xffu), + static_cast((iu >> (8u * 4u)) & 0xffu), + static_cast((iu >> (8u * 5u)) & 0xffu), + static_cast((iu >> (8u * 6u)) & 0xffu), + static_cast((iu >> (8u * 7u)) & 0xffu), + 0x00u // end marker + }; + + CHECK_THROWS_AS(json::to_bson(j), json::out_of_range&); + CHECK_THROWS_WITH(json::to_bson(j), "[json.exception.out_of_range.407] number overflow serializing " + std::to_string(i)); + } + } + + } + } +}