From 32242022f74083f3c7bd4ead638cbea33d978f8f Mon Sep 17 00:00:00 2001 From: Florian Albrechtskirchinger Date: Wed, 3 Aug 2022 09:15:37 +0200 Subject: [PATCH] Minor BJData fixes (#3637) * Replace vector/map LUTs in binary_reader with arrays * Replace string_t::npos in binary_reader --- .../nlohmann/detail/input/binary_reader.hpp | 73 ++++++++++++---- include/nlohmann/detail/meta/cpp_future.hpp | 13 ++- single_include/nlohmann/json.hpp | 86 ++++++++++++++----- tests/src/unit-bjdata.cpp | 17 +++- 4 files changed, 143 insertions(+), 46 deletions(-) diff --git a/include/nlohmann/detail/input/binary_reader.hpp b/include/nlohmann/detail/input/binary_reader.hpp index f4134efdc..b394f29f2 100644 --- a/include/nlohmann/detail/input/binary_reader.hpp +++ b/include/nlohmann/detail/input/binary_reader.hpp @@ -20,7 +20,6 @@ #include // char_traits, string #include // make_pair, move #include // vector -#include // map #include #include @@ -1953,7 +1952,7 @@ class binary_reader return false; } - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (size_and_type.second != 0) { @@ -2186,7 +2185,7 @@ class binary_reader for (auto i : dim) { result *= i; - if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type() + if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type() { return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); } @@ -2232,7 +2231,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result, bool inside_ndarray = false) { - result.first = string_t::npos; // size + result.first = npos; // size result.second = 0; // type bool is_ndarray = false; @@ -2240,10 +2239,9 @@ class binary_reader if (current == '$') { - std::vector bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type - result.second = get(); // must not ignore 'N', because 'N' maybe the type - if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )) + if (input_format == input_format_t::bjdata + && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second))) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, @@ -2492,23 +2490,23 @@ class binary_reader // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) { - std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, - {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} - }; - size_and_type.second &= ~(static_cast(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker - + auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t) + { + return p.first < t; + }); string_t key = "_ArrayType_"; - if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) + if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second)) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) + string_t type = it->second; // sax->string() takes a reference + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) { return false; } @@ -2535,7 +2533,7 @@ class binary_reader return (sax->end_array() && sax->end_object()); } - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) { @@ -2598,7 +2596,7 @@ class binary_reader } // do not accept ND-array size in objects in BJData - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, @@ -2606,7 +2604,7 @@ class binary_reader } string_t key; - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) { @@ -2950,6 +2948,8 @@ class binary_reader } private: + static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast(-1); + /// input adapter InputAdapterType ia; @@ -2967,7 +2967,44 @@ class binary_reader /// the SAX parser json_sax_t* sax = nullptr; + + // excluded markers in bjdata optimized type +#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \ + make_array('F', 'H', 'N', 'S', 'T', 'Z', '[', '{') + +#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \ + make_array( \ + bjd_type{'C', "char"}, \ + bjd_type{'D', "double"}, \ + bjd_type{'I', "int16"}, \ + bjd_type{'L', "int64"}, \ + bjd_type{'M', "uint64"}, \ + bjd_type{'U', "uint8"}, \ + bjd_type{'d', "single"}, \ + bjd_type{'i', "int8"}, \ + bjd_type{'l', "int32"}, \ + bjd_type{'m', "uint32"}, \ + bjd_type{'u', "uint16"}) + + JSON_PRIVATE_UNLESS_TESTED: + // lookup tables + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers = + JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_; + + using bjd_type = std::pair; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map = + JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_; + +#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ +#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ }; +#ifndef JSON_HAS_CPP_17 + template + constexpr std::size_t binary_reader::npos; +#endif + } // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/include/nlohmann/detail/meta/cpp_future.hpp b/include/nlohmann/detail/meta/cpp_future.hpp index d04ec8e05..ec984dc37 100644 --- a/include/nlohmann/detail/meta/cpp_future.hpp +++ b/include/nlohmann/detail/meta/cpp_future.hpp @@ -9,6 +9,7 @@ #pragma once +#include // array #include // size_t #include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type #include // index_sequence, make_index_sequence, index_sequence_for @@ -152,15 +153,19 @@ template<> struct priority_tag<0> {}; template struct static_const { - static constexpr T value{}; + static JSON_INLINE_VARIABLE constexpr T value{}; }; #ifndef JSON_HAS_CPP_17 - template - constexpr T static_const::value; // NOLINT(readability-redundant-declaration) - + constexpr T static_const::value; #endif +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + } // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index e9bf47551..4d86493e1 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -3028,6 +3028,7 @@ NLOHMANN_JSON_NAMESPACE_END +#include // array #include // size_t #include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type #include // index_sequence, make_index_sequence, index_sequence_for @@ -3172,16 +3173,20 @@ template<> struct priority_tag<0> {}; template struct static_const { - static constexpr T value{}; + static JSON_INLINE_VARIABLE constexpr T value{}; }; #ifndef JSON_HAS_CPP_17 - template - constexpr T static_const::value; // NOLINT(readability-redundant-declaration) - + constexpr T static_const::value; #endif +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + } // namespace detail NLOHMANN_JSON_NAMESPACE_END @@ -5977,7 +5982,6 @@ NLOHMANN_JSON_NAMESPACE_END #include // char_traits, string #include // make_pair, move #include // vector -#include // map // #include @@ -10943,7 +10947,7 @@ class binary_reader return false; } - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (size_and_type.second != 0) { @@ -11176,7 +11180,7 @@ class binary_reader for (auto i : dim) { result *= i; - if (result == 0 || result == string_t::npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be string_t::npos as it is used to initialize size in get_ubjson_size_type() + if (result == 0 || result == npos) // because dim elements shall not have zeros, result = 0 means overflow happened; it also can't be npos as it is used to initialize size in get_ubjson_size_type() { return sax->parse_error(chars_read, get_token_string(), out_of_range::create(408, exception_message(input_format, "excessive ndarray size caused overflow", "size"), nullptr)); } @@ -11222,7 +11226,7 @@ class binary_reader */ bool get_ubjson_size_type(std::pair& result, bool inside_ndarray = false) { - result.first = string_t::npos; // size + result.first = npos; // size result.second = 0; // type bool is_ndarray = false; @@ -11230,10 +11234,9 @@ class binary_reader if (current == '$') { - std::vector bjdx = {'[', '{', 'S', 'H', 'T', 'F', 'N', 'Z'}; // excluded markers in bjdata optimized type - result.second = get(); // must not ignore 'N', because 'N' maybe the type - if (JSON_HEDLEY_UNLIKELY( input_format == input_format_t::bjdata && std::find(bjdx.begin(), bjdx.end(), result.second) != bjdx.end() )) + if (input_format == input_format_t::bjdata + && JSON_HEDLEY_UNLIKELY(std::binary_search(bjd_optimized_type_markers.begin(), bjd_optimized_type_markers.end(), result.second))) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, @@ -11482,23 +11485,23 @@ class binary_reader // if bit-8 of size_and_type.second is set to 1, encode bjdata ndarray as an object in JData annotated array format (https://github.com/NeuroJSON/jdata): // {"_ArrayType_" : "typeid", "_ArraySize_" : [n1, n2, ...], "_ArrayData_" : [v1, v2, ...]} - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) { - std::map bjdtype = {{'U', "uint8"}, {'i', "int8"}, {'u', "uint16"}, {'I', "int16"}, - {'m', "uint32"}, {'l', "int32"}, {'M', "uint64"}, {'L', "int64"}, {'d', "single"}, {'D', "double"}, {'C', "char"} - }; - size_and_type.second &= ~(static_cast(1) << 8); // use bit 8 to indicate ndarray, here we remove the bit to restore the type marker - + auto it = std::lower_bound(bjd_types_map.begin(), bjd_types_map.end(), size_and_type.second, [](const bjd_type & p, char_int_type t) + { + return p.first < t; + }); string_t key = "_ArrayType_"; - if (JSON_HEDLEY_UNLIKELY(bjdtype.count(size_and_type.second) == 0)) + if (JSON_HEDLEY_UNLIKELY(it == bjd_types_map.end() || it->first != size_and_type.second)) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format, "invalid byte: 0x" + last_token, "type"), nullptr)); } - if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(bjdtype[size_and_type.second]) )) + string_t type = it->second; // sax->string() takes a reference + if (JSON_HEDLEY_UNLIKELY(!sax->key(key) || !sax->string(type))) { return false; } @@ -11525,7 +11528,7 @@ class binary_reader return (sax->end_array() && sax->end_object()); } - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_array(size_and_type.first))) { @@ -11588,7 +11591,7 @@ class binary_reader } // do not accept ND-array size in objects in BJData - if (input_format == input_format_t::bjdata && size_and_type.first != string_t::npos && (size_and_type.second & (1 << 8)) != 0) + if (input_format == input_format_t::bjdata && size_and_type.first != npos && (size_and_type.second & (1 << 8)) != 0) { auto last_token = get_token_string(); return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, @@ -11596,7 +11599,7 @@ class binary_reader } string_t key; - if (size_and_type.first != string_t::npos) + if (size_and_type.first != npos) { if (JSON_HEDLEY_UNLIKELY(!sax->start_object(size_and_type.first))) { @@ -11940,6 +11943,8 @@ class binary_reader } private: + static JSON_INLINE_VARIABLE constexpr std::size_t npos = static_cast(-1); + /// input adapter InputAdapterType ia; @@ -11957,8 +11962,45 @@ class binary_reader /// the SAX parser json_sax_t* sax = nullptr; + + // excluded markers in bjdata optimized type +#define JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ \ + make_array('F', 'H', 'N', 'S', 'T', 'Z', '[', '{') + +#define JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ \ + make_array( \ + bjd_type{'C', "char"}, \ + bjd_type{'D', "double"}, \ + bjd_type{'I', "int16"}, \ + bjd_type{'L', "int64"}, \ + bjd_type{'M', "uint64"}, \ + bjd_type{'U', "uint8"}, \ + bjd_type{'d', "single"}, \ + bjd_type{'i', "int8"}, \ + bjd_type{'l', "int32"}, \ + bjd_type{'m', "uint32"}, \ + bjd_type{'u', "uint16"}) + + JSON_PRIVATE_UNLESS_TESTED: + // lookup tables + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_) bjd_optimized_type_markers = + JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_; + + using bjd_type = std::pair; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const decltype(JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_) bjd_types_map = + JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_; + +#undef JSON_BINARY_READER_MAKE_BJD_OPTIMIZED_TYPE_MARKERS_ +#undef JSON_BINARY_READER_MAKE_BJD_TYPES_MAP_ }; +#ifndef JSON_HAS_CPP_17 + template + constexpr std::size_t binary_reader::npos; +#endif + } // namespace detail NLOHMANN_JSON_NAMESPACE_END diff --git a/tests/src/unit-bjdata.cpp b/tests/src/unit-bjdata.cpp index 74e331f75..c245e87e9 100644 --- a/tests/src/unit-bjdata.cpp +++ b/tests/src/unit-bjdata.cpp @@ -8,11 +8,13 @@ #include "doctest_compatibility.h" -#include -#include +#define JSON_TESTS_PRIVATE #include using nlohmann::json; +#include +#include +#include #include #include #include @@ -205,6 +207,17 @@ TEST_CASE_TEMPLATE_INVOKE(value_in_range_of_test, \ TEST_CASE("BJData") { + SECTION("binary_reader BJData LUT arrays are sorted") + { + std::vector data; + auto ia = nlohmann::detail::input_adapter(data); + // NOLINTNEXTLINE(hicpp-move-const-arg,performance-move-const-arg) + nlohmann::detail::binary_reader br{std::move(ia), json::input_format_t::bjdata}; + + CHECK(std::is_sorted(br.bjd_optimized_type_markers.begin(), br.bjd_optimized_type_markers.end())); + CHECK(std::is_sorted(br.bjd_types_map.begin(), br.bjd_types_map.end())); + } + SECTION("individual values") { SECTION("discarded")