diff --git a/deps/simdjson/simdjson.cpp b/deps/simdjson/simdjson.cpp index 2fd974c1e92429..b5dd7243853217 100644 --- a/deps/simdjson/simdjson.cpp +++ b/deps/simdjson/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-06-11 14:08:20 -0400. Do not edit! */ +/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -40,6 +40,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -224,6 +234,11 @@ using std::size_t; #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif +#if defined(__clang__) || defined(__GNUC__) +#define simdjson_pure [[gnu::pure]] +#else +#define simdjson_pure +#endif #if defined(__clang__) || defined(__GNUC__) #if defined(__has_feature) @@ -5949,7 +5964,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -5966,7 +5981,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -6020,14 +6035,14 @@ class dom_parser_implementation { * * @return Current capacity, in bytes. */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -6068,11 +6083,11 @@ simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } -simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -12477,7 +12492,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -13352,7 +13367,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -18696,7 +18711,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -19571,7 +19586,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -24908,7 +24923,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -25783,7 +25798,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -31391,7 +31406,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -32266,7 +32281,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -38448,7 +38463,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -39323,7 +39338,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -44472,7 +44487,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -45347,7 +45362,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -50487,7 +50502,7 @@ simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementa } parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); /*** - * The On Demand API requires special padding. + * The On-Demand API requires special padding. * * This is related to https://github.com/simdjson/simdjson/issues/906 * Basically, we want to make sure that if the parsing continues beyond the last (valid) @@ -51362,7 +51377,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ @@ -54563,7 +54578,7 @@ simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. */ diff --git a/deps/simdjson/simdjson.h b/deps/simdjson/simdjson.h index 4b7a0695d323b9..ddb6f2e4e0a6ed 100644 --- a/deps/simdjson/simdjson.h +++ b/deps/simdjson/simdjson.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-06-11 14:08:20 -0400. Do not edit! */ +/* auto-generated on 2024-08-01 09:31:50 -0400. Do not edit! */ /* including simdjson.h: */ /* begin file simdjson.h */ #ifndef SIMDJSON_H @@ -60,6 +60,16 @@ #endif #endif +// C++ 23 +#if !defined(SIMDJSON_CPLUSPLUS23) && (SIMDJSON_CPLUSPLUS >= 202302L) +#define SIMDJSON_CPLUSPLUS23 1 +#endif + +// C++ 20 +#if !defined(SIMDJSON_CPLUSPLUS20) && (SIMDJSON_CPLUSPLUS >= 202002L) +#define SIMDJSON_CPLUSPLUS20 1 +#endif + // C++ 17 #if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) #define SIMDJSON_CPLUSPLUS17 1 @@ -244,6 +254,11 @@ using std::size_t; #define SIMDJSON_NO_SANITIZE_UNDEFINED #endif +#if defined(__clang__) || defined(__GNUC__) +#define simdjson_pure [[gnu::pure]] +#else +#define simdjson_pure +#endif #if defined(__clang__) || defined(__GNUC__) #if defined(__has_feature) @@ -2351,7 +2366,7 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.9.4" +#define SIMDJSON_VERSION "3.10.0" namespace simdjson { enum { @@ -2362,11 +2377,11 @@ enum { /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 9, + SIMDJSON_VERSION_MINOR = 10, /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 4 + SIMDJSON_VERSION_REVISION = 0 }; } // namespace simdjson @@ -3094,7 +3109,7 @@ class dom_parser_implementation { * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -3111,7 +3126,7 @@ class dom_parser_implementation { * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There * must be an unescaped quote terminating the string. It returns the final output * position as pointer. In case of error (e.g., the string has bad escaped codes), - * then null_nullptrptr is returned. It is assumed that the output buffer is large + * then null_ptr is returned. It is assumed that the output buffer is large * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + * SIMDJSON_PADDING bytes. * @@ -3165,14 +3180,14 @@ class dom_parser_implementation { * * @return Current capacity, in bytes. */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** * The maximum level of nested object and arrays supported by this parser. * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -3213,11 +3228,11 @@ simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; -simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { return _capacity; } -simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { return _max_depth; } @@ -3923,6 +3938,7 @@ inline padded_string::padded_string(std::string_view sv_) noexcept inline padded_string::padded_string(padded_string &&o) noexcept : viable_size(o.viable_size), data_ptr(o.data_ptr) { o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; } inline padded_string &padded_string::operator=(padded_string &&o) noexcept { @@ -4273,6 +4289,11 @@ class array { */ inline simdjson_result at(size_t index) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline array(const internal::tape_ref &tape) noexcept; internal::tape_ref tape; @@ -4954,7 +4975,7 @@ class parser { * * @return Maximum depth, in bytes. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Set max_capacity. This is the largest document this parser can automatically support. @@ -6154,6 +6175,11 @@ class object { */ inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + /** + * Implicitly convert object to element + */ + inline operator element() const noexcept; + private: simdjson_inline object(const internal::tape_ref &tape) noexcept; @@ -6765,6 +6791,10 @@ inline simdjson_result array::at(size_t index) const noexcept { return INDEX_OUT_OF_BOUNDS; } +inline array::operator element() const noexcept { + return element(tape); +} + // // array::iterator inline implementation // @@ -6973,6 +7003,10 @@ inline simdjson_result object::at_key_case_insensitive(std::string_view return NO_SUCH_FIELD; } +inline object::operator element() const noexcept { + return element(tape); +} + // // object::iterator inline implementation // @@ -7757,7 +7791,7 @@ simdjson_inline size_t parser::capacity() const noexcept { simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; } @@ -33653,8 +33687,8 @@ namespace arm64 { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -33885,9 +33919,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -33895,7 +33929,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -33967,6 +34001,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -35614,21 +35659,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -35662,6 +35723,8 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -35836,8 +35899,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -37889,6 +37952,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -37956,6 +38027,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -38323,11 +38400,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -39214,6 +39303,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -39322,13 +39416,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -44059,8 +44153,8 @@ namespace fallback { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -44291,9 +44385,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -44301,7 +44395,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -44373,6 +44467,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -46020,21 +46125,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -46068,6 +46189,8 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -46242,8 +46365,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -48295,6 +48418,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -48362,6 +48493,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -48729,11 +48866,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -49620,6 +49769,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -49728,13 +49882,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -54957,8 +55111,8 @@ namespace haswell { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -55189,9 +55343,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -55199,7 +55353,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -55271,6 +55425,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -56918,21 +57083,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -56966,6 +57147,8 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -57140,8 +57323,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -59193,6 +59376,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -59260,6 +59451,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -59627,11 +59824,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -60518,6 +60727,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -60626,13 +60840,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -65854,8 +66068,8 @@ namespace icelake { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -66086,9 +66300,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -66096,7 +66310,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -66168,6 +66382,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -67815,21 +68040,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -67863,6 +68104,8 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -68037,8 +68280,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -70090,6 +70333,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -70157,6 +70408,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -70524,11 +70781,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -71415,6 +71684,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -71523,13 +71797,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -76866,8 +77140,8 @@ namespace ppc64 { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -77098,9 +77372,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -77108,7 +77382,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -77180,6 +77454,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -78827,21 +79112,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -78875,6 +79176,8 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -79049,8 +79352,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -81102,6 +81405,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -81169,6 +81480,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -81536,11 +81853,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -82427,6 +82756,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -82535,13 +82869,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -88201,8 +88535,8 @@ namespace westmere { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -88433,9 +88767,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -88443,7 +88777,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -88515,6 +88849,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -90162,21 +90507,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -90210,6 +90571,8 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -90384,8 +90747,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -92437,6 +92800,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -92504,6 +92875,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -92871,11 +93248,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -93762,6 +94151,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -93870,13 +94264,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -99007,8 +99401,8 @@ namespace lsx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -99239,9 +99633,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -99249,7 +99643,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -99321,6 +99715,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -100968,21 +101373,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -101016,6 +101437,8 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -101190,8 +101613,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -103243,6 +103666,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -103310,6 +103741,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -103677,11 +104114,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -104568,6 +105017,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -104676,13 +105130,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; } @@ -109826,8 +110280,8 @@ namespace lasx { namespace ondemand { /** - * The default batch size for document_stream instances for this On Demand kernel. - * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * The default batch size for document_stream instances for this On-Demand kernel. + * Note that different On-Demand kernel may use a different DEFAULT_BATCH_SIZE value * in the future. */ static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; @@ -110058,9 +110512,9 @@ class parser { simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; /** The capacity of this parser (the largest document it can process). */ - simdjson_inline size_t capacity() const noexcept; + simdjson_pure simdjson_inline size_t capacity() const noexcept; /** The maximum capacity of this parser (the largest document it is allowed to process). */ - simdjson_inline size_t max_capacity() const noexcept; + simdjson_pure simdjson_inline size_t max_capacity() const noexcept; simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; /** * The maximum depth of this parser (the most deeply nested objects and arrays it can process). @@ -110068,7 +110522,7 @@ class parser { * The document's instance current_depth() method should be used to monitor the parsing * depth and limit it if desired. */ - simdjson_inline size_t max_depth() const noexcept; + simdjson_pure simdjson_inline size_t max_depth() const noexcept; /** * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length @@ -110140,6 +110594,17 @@ class parser { */ simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + /** + * Returns true if string_buf_loc is outside of the allocated range for the + * the string buffer. When true, it indicates that the string buffer has overflowed. + * This is a development-time check that is not needed in production. It can be + * used to detect buffer overflows in the string buffer and usafe usage of the + * string buffer. + */ + bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; +#endif + private: /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; @@ -111787,21 +112252,37 @@ class field : public std::pair { * This consumes the key: once you have called unescaped_key(), you cannot * call it again nor can you call key(). */ - simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. The content is stored in the receiver. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + template + simdjson_inline simdjson_warn_unused error_code unescaped_key(string_type& receiver, bool allow_replacement = false) noexcept; /** * Get the key as a raw_json_string. Can be used for direct comparison with - * an unescaped C string: e.g., key() == "test". + * an unescaped C string: e.g., key() == "test". This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key() for a similar function which returns + * a more convenient std::string_view result. */ simdjson_inline raw_json_string key() const noexcept; /** * Get the unprocessed key as a string_view. This includes the quotes and may include - * some spaces after the last quote. + * some spaces after the last quote. This does not count as + * consumption of the content: you can safely call it repeatedly. + * See escaped_key(). */ simdjson_inline std::string_view key_raw_json_token() const noexcept; /** * Get the key as a string_view. This does not include the quotes and * the string is unprocessed key so it may contain escape characters - * (e.g., \uXXXX or \n). Use unescaped_key() to get the unescaped key. + * (e.g., \uXXXX or \n). It does not count as a consumption of the content: + * you can safely call it repeatedly. Use unescaped_key() to get the unescaped key. */ simdjson_inline std::string_view escaped_key() const noexcept; /** @@ -111835,6 +112316,8 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result() noexcept = default; simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + template + simdjson_inline error_code unescaped_key(string_type &receiver, bool allow_replacement = false) noexcept; simdjson_inline simdjson_result key() noexcept; simdjson_inline simdjson_result key_raw_json_token() noexcept; simdjson_inline simdjson_result escaped_key() noexcept; @@ -112009,8 +112492,8 @@ class object { /** * Reset the iterator so that we are pointing back at the * beginning of the object. You should still consume values only once even if you - * can iterate through the object more than once. If you unescape a string within - * the object more than once, you have unsafe code. Note that rewinding an object + * can iterate through the object more than once. If you unescape a string or a key + * within the object more than once, you have unsafe code. Note that rewinding an object * means that you may need to reparse it anew: it is not a free operation. * * @returns true if the object contains some elements (not empty) @@ -114062,6 +114545,14 @@ simdjson_inline simdjson_warn_unused simdjson_result field::un return answer; } +template +simdjson_inline simdjson_warn_unused error_code field::unescaped_key(string_type& receiver, bool allow_replacement) noexcept { + std::string_view key; + SIMDJSON_TRY( unescaped_key(allow_replacement).get(key) ); + receiver = key; + return SUCCESS; +} + simdjson_inline raw_json_string field::key() const noexcept { SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. return first; @@ -114129,6 +114620,12 @@ simdjson_inline simdjson_result simdjson_result +simdjson_inline error_code simdjson_result::unescaped_key(string_type &receiver, bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(receiver, allow_replacement); +} + simdjson_inline simdjson_result simdjson_result::value() noexcept { if (error()) { return error(); } return std::move(first.value()); @@ -114496,11 +114993,23 @@ simdjson_inline token_position json_iterator::position() const noexcept { } simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape(in, _string_buf_loc, allow_replacement); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape(in, _string_buf_loc, allow_replacement); +#endif } simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + auto result = parser->unescape_wobbly(in, _string_buf_loc); + SIMDJSON_ASSUME(!parser->string_buffer_overflow(_string_buf_loc)); + return result; +#else return parser->unescape_wobbly(in, _string_buf_loc); +#endif } simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { @@ -115387,6 +115896,11 @@ simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capa _max_depth = new_max_depth; return SUCCESS; } +#if SIMDJSON_DEVELOPMENT_CHECKS +simdjson_inline simdjson_warn_unused bool parser::string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept { + return (string_buf_loc < string_buf.get()) || (size_t(string_buf_loc - string_buf.get()) >= capacity()); +} +#endif simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } @@ -115495,13 +116009,13 @@ inline simdjson_result parser::iterate_many(const padded_string return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } -simdjson_inline size_t parser::capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } -simdjson_inline size_t parser::max_capacity() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_capacity() const noexcept { return _max_capacity; } -simdjson_inline size_t parser::max_depth() const noexcept { +simdjson_pure simdjson_inline size_t parser::max_depth() const noexcept { return _max_depth; }