Skip to content

Commit

Permalink
🔨 small parser performance improvements
Browse files Browse the repository at this point in the history
- replaced list of pairs by flat list in next_byte_in_range
- implemented early exit in case of parse errors
- reused memory for object keys
- direct calls to embedded objects/arrays for insertions
  • Loading branch information
nlohmann committed Jul 30, 2017
1 parent c819a2d commit 7737a29
Showing 1 changed file with 41 additions and 27 deletions.
68 changes: 41 additions & 27 deletions src/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1675,14 +1675,15 @@ class lexer
@return true iff no range violation was detected
*/
bool next_byte_in_range(std::initializer_list<std::pair<int, int>> ranges)
bool next_byte_in_range(std::initializer_list<int> ranges)
{
assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6);
add(current);

for (const auto& range : ranges)
for (auto range = ranges.begin(); range != ranges.end(); ++range)
{
get();
if (JSON_LIKELY(range.first <= current and current <= range.second))
if (JSON_LIKELY(*range <= current and current <= *(++range)))
{
add(current);
}
Expand Down Expand Up @@ -2053,7 +2054,7 @@ class lexer
case 0xde:
case 0xdf:
{
if (JSON_UNLIKELY(not next_byte_in_range({{0x80, 0xBF}})))
if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF})))
{
return token_type::parse_error;
}
Expand All @@ -2063,7 +2064,7 @@ class lexer
// U+0800..U+0FFF: bytes E0 A0..BF 80..BF
case 0xe0:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0xA0, 0xBF}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand All @@ -2087,7 +2088,7 @@ class lexer
case 0xee:
case 0xef:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand All @@ -2097,7 +2098,7 @@ class lexer
// U+D000..U+D7FF: bytes ED 80..9F 80..BF
case 0xed:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x9F}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand All @@ -2107,7 +2108,7 @@ class lexer
// U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
case 0xf0:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0x90, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand All @@ -2119,7 +2120,7 @@ class lexer
case 0xf2:
case 0xf3:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0xBF}, {0x80, 0xBF}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand All @@ -2129,7 +2130,7 @@ class lexer
// U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
case 0xf4:
{
if (JSON_UNLIKELY(not (next_byte_in_range({{0x80, 0x8F}, {0x80, 0xBF}, {0x80, 0xBF}}))))
if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
{
return token_type::parse_error;
}
Expand Down Expand Up @@ -2166,12 +2167,11 @@ class lexer
This function scans a string according to Sect. 6 of RFC 7159.
The function is realized with a deterministic finite state machine
derived from the grammar described in RFC 7159. Starting in state
"init", the input is read and used to determined the next state. Only
state "done" accepts the number. State "error" is a trap state to model
errors. In the table below, "anything" means any character but the ones
listed before.
The function is realized with a deterministic finite state machine derived
from the grammar described in RFC 7159. Starting in state "init", the
input is read and used to determined the next state. Only state "done"
accepts the number. State "error" is a trap state to model errors. In the
table below, "anything" means any character but the ones listed before.
state | 0 | 1-9 | e E | + | - | . | anything
---------|----------|----------|----------|---------|---------|----------|-----------
Expand Down Expand Up @@ -2486,8 +2486,8 @@ class lexer
}

scan_number_done:
// unget the character after the number (we only read it to know that we
// are done scanning a number)
// unget the character after the number (we only read it to know that
// we are done scanning a number)
--chars_read;
next_unget = true;

Expand Down Expand Up @@ -2886,6 +2886,9 @@ class parser
*/
void parse_internal(bool keep, BasicJsonType& result)
{
// never parse after a parse error was detected
assert(not errored);

// start with a discarded value
if (not result.is_discarded())
{
Expand Down Expand Up @@ -2919,6 +2922,7 @@ class parser
}

// parse values
std::string key;
BasicJsonType value;
while (true)
{
Expand All @@ -2927,7 +2931,7 @@ class parser
{
return;
}
const auto key = m_lexer.get_string();
key = m_lexer.get_string();

bool keep_tag = false;
if (keep)
Expand Down Expand Up @@ -2955,9 +2959,15 @@ class parser
value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded;
parse_internal(keep, value);

if (JSON_UNLIKELY(errored))
{
return;
}

if (keep and keep_tag and not value.is_discarded())
{
result[key] = std::move(value);
result.m_value.object->operator[](std::move(key)) = std::move(value);
}

// comma -> next value
Expand Down Expand Up @@ -3015,9 +3025,15 @@ class parser
value.m_value.destroy(value.m_type);
value.m_type = value_t::discarded;
parse_internal(keep, value);

if (JSON_UNLIKELY(errored))
{
return;
}

if (keep and not value.is_discarded())
{
result.push_back(std::move(value));
result.m_value.array->push_back(std::move(value));
}

// comma -> next value
Expand Down Expand Up @@ -4405,8 +4421,7 @@ class binary_reader
@param[in] adapter input adapter to read from
*/
explicit binary_reader(input_adapter_t adapter)
: ia(adapter), is_little_endian(little_endianess())
explicit binary_reader(input_adapter_t adapter) : ia(adapter)
{
assert(ia);
}
Expand Down Expand Up @@ -5526,7 +5541,7 @@ class binary_reader
std::size_t chars_read = 0;

/// whether we can assume little endianess
const bool is_little_endian = true;
const bool is_little_endian = little_endianess();
};

/*!
Expand All @@ -5541,8 +5556,7 @@ class binary_writer
@param[in] adapter output adapter to write to
*/
explicit binary_writer(output_adapter_t<CharType> adapter)
: is_little_endian(binary_reader<BasicJsonType>::little_endianess()), oa(adapter)
explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter)
{
assert(oa);
}
Expand Down Expand Up @@ -6067,7 +6081,7 @@ class binary_writer

private:
/// whether we can assume little endianess
const bool is_little_endian = true;
const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess();

/// the output
output_adapter_t<CharType> oa = nullptr;
Expand Down

0 comments on commit 7737a29

Please sign in to comment.