From 869f4c68d4fa071a66326b22feb148bb4bcb3bec Mon Sep 17 00:00:00 2001 From: Niels Date: Tue, 22 Nov 2016 07:25:40 +0100 Subject: [PATCH 1/4] :memo: updated thanks section --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e899ac21..d1d1f866d 100644 --- a/README.md +++ b/README.md @@ -500,7 +500,7 @@ I deeply appreciate the help of the following people. - [Vladimir Petrigo](https://github.com/vpetrigo) made a SFINAE hack more readable. - [Denis Andrejew](https://github.com/seeekr) fixed a grammar issue in the README file. - [Pierre-Antoine Lacaze](https://github.com/palacaze) found a subtle bug in the `dump()` function. -- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling. +- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling and found some nice performance improvements in the parser. Thanks a lot for helping out! From f620d74919956105d880c47865ce5f12872b61e3 Mon Sep 17 00:00:00 2001 From: Niels Date: Tue, 22 Nov 2016 07:26:11 +0100 Subject: [PATCH 2/4] :zap: added performance fixes (#365) --- src/json.hpp | 37 ++++++++++++++++++++++--------------- src/json.hpp.re2c | 37 ++++++++++++++++++++++--------------- 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index e71ffc4a2..a4a3fef57 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8734,10 +8734,10 @@ basic_json_parser_66: { // copy unprocessed characters to line buffer m_line_buffer.clear(); - for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) - { - m_line_buffer.append(1, static_cast(*m_cursor)); - } + m_line_buffer.append( + reinterpret_cast(m_start), + static_cast(m_limit - m_start)); + m_cursor = m_limit; } // append n characters to make sure that there is sufficient @@ -8750,10 +8750,12 @@ basic_json_parser_66: // delete processed characters from line buffer m_line_buffer.erase(0, static_cast(offset_start)); // read next line from input stream - std::string line; - std::getline(*m_stream, line, '\n'); + m_line_buffer_tmp.clear(); + std::getline(*m_stream, m_line_buffer_tmp, '\n'); + // add line with newline symbol to the line buffer - m_line_buffer += line + "\n"; + m_line_buffer += m_line_buffer_tmp; + m_line_buffer.push_back('\n'); } // set pointers @@ -8840,9 +8842,18 @@ basic_json_parser_66: // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // process escaped characters - if (*i == '\\') + // number of non-escaped characters + const size_t n = static_cast(std::find(i, m_cursor - 1, '\\') - i); + + if (n != 0) { + result.append(reinterpret_cast(i), n); + i += n - 1; // -1 because will ++i + } + else + { + // processing escaped character + // read next character ++i; @@ -8929,12 +8940,6 @@ basic_json_parser_66: } } } - else - { - // all other characters are just copied to the end of the - // string - result.append(1, static_cast(*i)); - } } return result; @@ -9118,6 +9123,8 @@ basic_json_parser_66: std::istream* m_stream = nullptr; /// line buffer buffer for m_stream string_t m_line_buffer {}; + /// used for filling m_line_buffer + string_t m_line_buffer_tmp {}; /// the buffer pointer const lexer_char_t* m_content = nullptr; /// pointer to the beginning of the current symbol diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 9eccc144f..cb70535af 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7883,10 +7883,10 @@ class basic_json { // copy unprocessed characters to line buffer m_line_buffer.clear(); - for (m_cursor = m_start; m_cursor != m_limit; ++m_cursor) - { - m_line_buffer.append(1, static_cast(*m_cursor)); - } + m_line_buffer.append( + reinterpret_cast(m_start), + static_cast(m_limit - m_start)); + m_cursor = m_limit; } // append n characters to make sure that there is sufficient @@ -7899,10 +7899,12 @@ class basic_json // delete processed characters from line buffer m_line_buffer.erase(0, static_cast(offset_start)); // read next line from input stream - std::string line; - std::getline(*m_stream, line, '\n'); + m_line_buffer_tmp.clear(); + std::getline(*m_stream, m_line_buffer_tmp, '\n'); + // add line with newline symbol to the line buffer - m_line_buffer += line + "\n"; + m_line_buffer += m_line_buffer_tmp; + m_line_buffer.push_back('\n'); } // set pointers @@ -7989,9 +7991,18 @@ class basic_json // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // process escaped characters - if (*i == '\\') + // number of non-escaped characters + const size_t n = static_cast(std::find(i, m_cursor - 1, '\\') - i); + + if (n != 0) { + result.append(reinterpret_cast(i), n); + i += n - 1; // -1 because will ++i + } + else + { + // processing escaped character + // read next character ++i; @@ -8078,12 +8089,6 @@ class basic_json } } } - else - { - // all other characters are just copied to the end of the - // string - result.append(1, static_cast(*i)); - } } return result; @@ -8267,6 +8272,8 @@ class basic_json std::istream* m_stream = nullptr; /// line buffer buffer for m_stream string_t m_line_buffer {}; + /// used for filling m_line_buffer + string_t m_line_buffer_tmp {}; /// the buffer pointer const lexer_char_t* m_content = nullptr; /// pointer to the beginning of the current symbol From 6cc2d58d69a48e06ab77d78cc4adc9b64f38f485 Mon Sep 17 00:00:00 2001 From: Niels Date: Tue, 22 Nov 2016 20:13:47 +0100 Subject: [PATCH 3/4] :bug: hopefully fixing the crashes on Linux (#365) --- src/json.hpp | 17 ++++++----------- src/json.hpp.re2c | 17 ++++++----------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/src/json.hpp b/src/json.hpp index a4a3fef57..8704134a0 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8733,10 +8733,7 @@ basic_json_parser_66: if (m_start != reinterpret_cast(m_line_buffer.data())) { // copy unprocessed characters to line buffer - m_line_buffer.clear(); - m_line_buffer.append( - reinterpret_cast(m_start), - static_cast(m_limit - m_start)); + m_line_buffer.assign(m_start, m_limit); m_cursor = m_limit; } @@ -8842,18 +8839,16 @@ basic_json_parser_66: // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // number of non-escaped characters - const size_t n = static_cast(std::find(i, m_cursor - 1, '\\') - i); - - if (n != 0) + // find next escape character + auto e = std::find(i, m_cursor - 1, '\\'); + if (e != i) { - result.append(reinterpret_cast(i), n); - i += n - 1; // -1 because will ++i + result.append(i, e); + i = e - 1; // -1 because of ++i } else { // processing escaped character - // read next character ++i; diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index cb70535af..389adbc6c 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7882,10 +7882,7 @@ class basic_json if (m_start != reinterpret_cast(m_line_buffer.data())) { // copy unprocessed characters to line buffer - m_line_buffer.clear(); - m_line_buffer.append( - reinterpret_cast(m_start), - static_cast(m_limit - m_start)); + m_line_buffer.assign(m_start, m_limit); m_cursor = m_limit; } @@ -7991,18 +7988,16 @@ class basic_json // iterate the result between the quotes for (const lexer_char_t* i = m_start + 1; i < m_cursor - 1; ++i) { - // number of non-escaped characters - const size_t n = static_cast(std::find(i, m_cursor - 1, '\\') - i); - - if (n != 0) + // find next escape character + auto e = std::find(i, m_cursor - 1, '\\'); + if (e != i) { - result.append(reinterpret_cast(i), n); - i += n - 1; // -1 because will ++i + result.append(i, e); + i = e - 1; // -1 because of ++i } else { // processing escaped character - // read next character ++i; From 2773038cf9e3de7f043d0e03c1ee79fe1d49e667 Mon Sep 17 00:00:00 2001 From: Niels Lohmann Date: Sat, 26 Nov 2016 12:43:23 +0100 Subject: [PATCH 4/4] :zap: added improvements (#365) --- README.md | 2 +- benchmarks/benchmarks.cpp | 2 +- src/json.hpp | 42 ++++++++++++++++++++++++++++----------- src/json.hpp.re2c | 42 ++++++++++++++++++++++++++++----------- 4 files changed, 62 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d1d1f866d..ff17f3d47 100644 --- a/README.md +++ b/README.md @@ -500,7 +500,7 @@ I deeply appreciate the help of the following people. - [Vladimir Petrigo](https://github.com/vpetrigo) made a SFINAE hack more readable. - [Denis Andrejew](https://github.com/seeekr) fixed a grammar issue in the README file. - [Pierre-Antoine Lacaze](https://github.com/palacaze) found a subtle bug in the `dump()` function. -- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling and found some nice performance improvements in the parser. +- [TurpentineDistillery](https://github.com/TurpentineDistillery) pointed to [`std::locale::classic()`](http://en.cppreference.com/w/cpp/locale/locale/classic) to avoid too much locale joggling, found some nice performance improvements in the parser and improved the benchmarking code. Thanks a lot for helping out! diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp index efb26cf20..745123c92 100644 --- a/benchmarks/benchmarks.cpp +++ b/benchmarks/benchmarks.cpp @@ -58,7 +58,7 @@ static void bench(benchpress::context& ctx, for (size_t i = 0; i < ctx.num_iterations(); ++i) { - // clear flags and rewind + // clear flags and rewind istr.clear(); istr.seekg(0); json j; diff --git a/src/json.hpp b/src/json.hpp index 8704134a0..0f6c3c9c3 100644 --- a/src/json.hpp +++ b/src/json.hpp @@ -8719,8 +8719,22 @@ basic_json_parser_66: */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -8729,23 +8743,23 @@ basic_json_parser_66: // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.assign(m_start, m_limit); - m_cursor = m_limit; - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit m_line_buffer.append(1, '\x00'); - m_line_buffer.append(n - 1, '\x01'); + if (n > 0) + { + m_line_buffer.append(n - 1, '\x01'); + } } else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream m_line_buffer_tmp.clear(); std::getline(*m_stream, m_line_buffer_tmp, '\n'); @@ -8756,7 +8770,7 @@ basic_json_parser_66: } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -8843,7 +8857,11 @@ basic_json_parser_66: auto e = std::find(i, m_cursor - 1, '\\'); if (e != i) { - result.append(i, e); + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } i = e - 1; // -1 because of ++i } else diff --git a/src/json.hpp.re2c b/src/json.hpp.re2c index 389adbc6c..c83cd4360 100644 --- a/src/json.hpp.re2c +++ b/src/json.hpp.re2c @@ -7868,8 +7868,22 @@ class basic_json */ void fill_line_buffer(size_t n = 0) { + // if line buffer is used, m_content points to its data + assert(m_line_buffer.empty() + or m_content == reinterpret_cast(m_line_buffer.data())); + + // if line buffer is used, m_limit is set past the end of its data + assert(m_line_buffer.empty() + or m_limit == m_content + m_line_buffer.size()); + + // pointer relationships + assert(m_content <= m_start); + assert(m_start <= m_cursor); + assert(m_cursor <= m_limit); + assert(m_marker == nullptr or m_marker <= m_limit); + // number of processed characters (p) - const auto offset_start = m_start - m_content; + const size_t num_processed_chars = static_cast(m_start - m_content); // offset for m_marker wrt. to m_start const auto offset_marker = (m_marker == nullptr) ? 0 : m_marker - m_start; // number of unprocessed characters (u) @@ -7878,23 +7892,23 @@ class basic_json // no stream is used or end of file is reached if (m_stream == nullptr or m_stream->eof()) { - // skip this part if we are already using the line buffer - if (m_start != reinterpret_cast(m_line_buffer.data())) - { - // copy unprocessed characters to line buffer - m_line_buffer.assign(m_start, m_limit); - m_cursor = m_limit; - } + // m_start may or may not be pointing into m_line_buffer at + // this point. We trust the standand library to do the right + // thing. See http://stackoverflow.com/q/28142011/266378 + m_line_buffer.assign(m_start, m_limit); // append n characters to make sure that there is sufficient // space between m_cursor and m_limit m_line_buffer.append(1, '\x00'); - m_line_buffer.append(n - 1, '\x01'); + if (n > 0) + { + m_line_buffer.append(n - 1, '\x01'); + } } else { // delete processed characters from line buffer - m_line_buffer.erase(0, static_cast(offset_start)); + m_line_buffer.erase(0, num_processed_chars); // read next line from input stream m_line_buffer_tmp.clear(); std::getline(*m_stream, m_line_buffer_tmp, '\n'); @@ -7905,7 +7919,7 @@ class basic_json } // set pointers - m_content = reinterpret_cast(m_line_buffer.c_str()); + m_content = reinterpret_cast(m_line_buffer.data()); assert(m_content != nullptr); m_start = m_content; m_marker = m_start + offset_marker; @@ -7992,7 +8006,11 @@ class basic_json auto e = std::find(i, m_cursor - 1, '\\'); if (e != i) { - result.append(i, e); + // see https://github.com/nlohmann/json/issues/365#issuecomment-262874705 + for (auto k = i; k < e; k++) + { + result.push_back(static_cast(*k)); + } i = e - 1; // -1 because of ++i } else