From 89e0a070841b794b44dc104ac4de47eabad40c0b Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 11 Feb 2026 12:59:52 -0400 Subject: [PATCH 1/4] Refactor JSON parser on a parser + construct pair of phases Signed-off-by: Juan Cruz Viotti --- src/core/json/construct.h | 637 ++++++++ .../json/include/sourcemeta/core/json_array.h | 5 + .../json/include/sourcemeta/core/json_hash.h | 86 +- .../include/sourcemeta/core/json_object.h | 21 +- .../json/include/sourcemeta/core/json_value.h | 4 + src/core/json/json.cc | 49 +- src/core/json/json_value.cc | 6 + src/core/json/parser.h | 1442 ++++++----------- test/json/json_value_test.cc | 2 +- 9 files changed, 1276 insertions(+), 976 deletions(-) create mode 100644 src/core/json/construct.h diff --git a/src/core/json/construct.h b/src/core/json/construct.h new file mode 100644 index 0000000000..5d775850a8 --- /dev/null +++ b/src/core/json/construct.h @@ -0,0 +1,637 @@ +#ifndef SOURCEMETA_CORE_JSON_CONSTRUCT_H_ +#define SOURCEMETA_CORE_JSON_CONSTRUCT_H_ + +#include +#include + +#include +#include + +#include "parser.h" + +#include // assert +#include // std::size_t +#include // std::uint64_t, std::uint32_t +#include // std::memchr +#include // std::reference_wrapper +#include // std::optional +#include // std::invalid_argument +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace internal { + +inline auto unescape_string(const char *data, const std::uint32_t length) + -> typename JSON::String { + typename JSON::String result; + const char *cursor{data}; + const char *string_end{data + length}; + + if (!std::memchr(data, '\\', length)) { + result.append(data, length); + return result; + } + + result.reserve(length); + while (cursor < string_end) { + const char *scan{cursor}; + while (scan < string_end && *scan != '\\') { + scan++; + } + + if (scan > cursor) { + result.append(cursor, static_cast(scan - cursor)); + cursor = scan; + } + + if (cursor >= string_end) { + break; + } + + assert(*cursor == '\\'); + cursor++; + assert(cursor < string_end); + + switch (*cursor++) { + case '"': + result.push_back('"'); + break; + case '\\': + result.push_back('\\'); + break; + case '/': + result.push_back('/'); + break; + case 'b': + result.push_back('\b'); + break; + case 'f': + result.push_back('\f'); + break; + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 't': + result.push_back('\t'); + break; + case 'u': { + auto parse_hex4 = [](const char *&position) -> unsigned long { + unsigned long value{0}; + for (std::size_t index = 0; index < 4; index++) { + const char hex_char{*position++}; + unsigned long digit; + if (hex_char >= '0' && hex_char <= '9') { + digit = static_cast(hex_char - '0'); + } else if (hex_char >= 'a' && hex_char <= 'f') { + digit = static_cast(hex_char - 'a') + 10; + } else if (hex_char >= 'A' && hex_char <= 'F') { + digit = static_cast(hex_char - 'A') + 10; + } else { + digit = 0; + } + value = (value << 4) | digit; + } + return value; + }; + + auto code_point{parse_hex4(cursor)}; + if (code_point >= 0xD800 && code_point <= 0xDBFF) { + assert(cursor + 6 <= string_end); + cursor += 2; + const auto low{parse_hex4(cursor)}; + code_point = + 0x10000 + ((code_point - 0xD800) << 10) + (low - 0xDC00); + } + + sourcemeta::core::codepoint_to_utf8( + static_cast(code_point), result); + break; + } + default: + break; + } + } + + return result; +} + +inline auto construct_number(const char *data, const std::uint32_t length) + -> JSON { + const bool has_dot{ + std::memchr(data, '.', length) != nullptr}; + const bool has_exponent{ + std::memchr(data, 'e', length) != nullptr || + std::memchr(data, 'E', length) != nullptr}; + + if (has_exponent) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + if (has_dot) { + std::size_t first_nonzero_position{JSON::String::npos}; + const auto decimal_position{static_cast( + static_cast(std::memchr(data, '.', length)) - data)}; + for (std::size_t index = 0; index < length; index++) { + if (index != decimal_position && data[index] != '0' && + data[index] != '-') { + first_nonzero_position = index; + break; + } + } + + if (first_nonzero_position == JSON::String::npos) { + first_nonzero_position = 0; + } + + const auto decimal_after_first_nonzero{ + decimal_position > first_nonzero_position}; + const auto significant_digits{ + length - first_nonzero_position - + (decimal_after_first_nonzero ? 1 : 0)}; + constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; + if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + const typename JSON::String string_value{data, length}; + const auto double_result{sourcemeta::core::to_double(string_value)}; + if (double_result.has_value()) { + return JSON{double_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + const typename JSON::String string_value{data, length}; + const auto int_result{sourcemeta::core::to_int64_t(string_value)}; + if (int_result.has_value()) { + return JSON{int_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } +} + +inline auto post_column_for(const TapeEntry &entry) -> std::uint64_t { + switch (entry.type) { + case TapeType::True: + return entry.column + 3; + case TapeType::False: + return entry.column + 4; + case TapeType::Null: + return entry.column + 3; + case TapeType::String: + case TapeType::Key: + return entry.column + entry.length + 1; + case TapeType::Number: + return entry.column + entry.length - 1; + default: + return entry.column; + } +} + +} // namespace internal + +// NOLINTBEGIN(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#define CALLBACK_PRE(value_type, entry_ref, context, index, property) \ + if (callback) { \ + callback(JSON::ParsePhase::Pre, JSON::Type::value_type, \ + (entry_ref).line, (entry_ref).column, context, index, property); \ + } + +#define CALLBACK_POST(value_type, post_line, post_column) \ + if (callback) { \ + callback(JSON::ParsePhase::Post, JSON::Type::value_type, \ + post_line, post_column, \ + JSON::ParseContext::Root, 0, JSON::StringView{}); \ + } + +inline auto construct_json(const char *buffer, + const std::vector &tape, + const JSON::ParseCallback &callback) -> JSON { + using Result = JSON; + enum class Container : std::uint8_t { Array, Object }; + std::vector levels; + std::vector> frames; + levels.reserve(32); + frames.reserve(32); + std::optional result; + typename Result::String key; + typename Result::Object::hash_type key_hash; + std::uint64_t key_line{0}; + std::uint64_t key_column{0}; + std::size_t tape_index{0}; + + if (tape.empty()) { + throw JSONParseError(1, 1); + } + + const auto &entry{tape[tape_index]}; + switch (entry.type) { + case TapeType::True: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + return JSON{true}; + case TapeType::False: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + return JSON{false}; + case TapeType::Null: + CALLBACK_PRE(Null, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Null, entry.line, internal::post_column_for(entry)); + return JSON{nullptr}; + case TapeType::String: { + CALLBACK_PRE(String, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + auto value{Result{internal::unescape_string(buffer + entry.offset, + entry.length)}}; + CALLBACK_POST(String, entry.line, internal::post_column_for(entry)); + return value; + } + case TapeType::Number: { + auto value{ + internal::construct_number(buffer + entry.offset, entry.length)}; + if (value.is_integer()) { + CALLBACK_PRE(Integer, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Integer, entry.line, internal::post_column_for(entry)); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Decimal, entry.line, internal::post_column_for(entry)); + } else { + CALLBACK_PRE(Real, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + CALLBACK_POST(Real, entry.line, internal::post_column_for(entry)); + } + return value; + } + case TapeType::ArrayStart: + CALLBACK_PRE(Array, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, entry, JSON::ParseContext::Root, 0, + JSON::StringView{}); + goto do_construct_object; + default: + throw JSONParseError(1, 1); + } + + /* + * Construct an array + */ + +do_construct_array : { + const auto &array_entry{tape[tape_index]}; + assert(array_entry.type == TapeType::ArrayStart); + const auto child_count{array_entry.count}; + tape_index++; + + if (levels.empty()) { + assert(!result.has_value()); + levels.push_back(Container::Array); + result = std::make_optional(Result::make_array()); + frames.emplace_back(result.value()); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Array); + frames.back().get().push_back(Result::make_array()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Array); + frames.back().get().assign(key, Result::make_array()); + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_array().reserve(child_count); + + if (child_count == 0) { + assert(tape[tape_index].type == TapeType::ArrayEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; +} + +do_construct_array_item: { + assert(!levels.empty()); + assert(levels.back() == Container::Array); + const auto &item_entry{tape[tape_index]}; + + switch (item_entry.type) { + case TapeType::ArrayStart: + CALLBACK_PRE(Array, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + goto do_construct_object; + case TapeType::True: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + frames.back().get().push_back(JSON{true}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::False: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + frames.back().get().push_back(JSON{false}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Null: + CALLBACK_PRE(Null, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + frames.back().get().push_back(JSON{nullptr}); + tape_index++; + CALLBACK_POST(Null, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::String: + CALLBACK_PRE(String, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), JSON::StringView{}); + frames.back().get().push_back(Result{internal::unescape_string( + buffer + item_entry.offset, item_entry.length)}); + tape_index++; + CALLBACK_POST(String, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Number: { + const auto current_index{frames.back().get().size()}; + auto value{internal::construct_number(buffer + item_entry.offset, + item_entry.length)}; + if (value.is_integer()) { + CALLBACK_PRE(Integer, item_entry, JSON::ParseContext::Index, + current_index, JSON::StringView{}); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, item_entry, JSON::ParseContext::Index, + current_index, JSON::StringView{}); + } else { + CALLBACK_PRE(Real, item_entry, JSON::ParseContext::Index, + current_index, JSON::StringView{}); + } + const auto value_type{value.type()}; + frames.back().get().push_back(std::move(value)); + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, item_entry.line, + internal::post_column_for(item_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, item_entry.line, + internal::post_column_for(item_entry)); + } else { + CALLBACK_POST(Real, item_entry.line, + internal::post_column_for(item_entry)); + } + goto do_construct_array_item_separator; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_array_item_separator: + if (tape[tape_index].type == TapeType::ArrayEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; + + /* + * Construct an object + */ + +do_construct_object : { + const auto &object_entry{tape[tape_index]}; + assert(object_entry.type == TapeType::ObjectStart); + const auto property_count{object_entry.count}; + tape_index++; + + if (levels.empty()) { + assert(!result.has_value()); + levels.push_back(Container::Object); + result = std::make_optional(Result::make_object()); + frames.emplace_back(result.value()); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Object); + frames.back().get().push_back(Result::make_object()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Object); + frames.back().get().assign(key, Result::make_object()); + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_object().reserve(property_count); + + if (property_count == 0) { + assert(tape[tape_index].type == TapeType::ObjectEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; +} + +do_construct_object_key: { + assert(!levels.empty()); + assert(levels.back() == Container::Object); + const auto &key_entry{tape[tape_index]}; + assert(key_entry.type == TapeType::Key); + const char *key_data{buffer + key_entry.offset}; + const auto key_length{key_entry.length}; + if (std::memchr(key_data, '\\', key_length)) { + key = internal::unescape_string(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key); + } else { + key.assign(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key_data, key_length); + } + key_line = key_entry.line; + key_column = key_entry.column; + tape_index++; + goto do_construct_object_value; +} + +do_construct_object_value: { + const auto &value_entry{tape[tape_index]}; + + switch (value_entry.type) { + case TapeType::ArrayStart: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Array, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + goto do_construct_array; + case TapeType::ObjectStart: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Object, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + goto do_construct_object; + case TapeType::True: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + frames.back().get().assign_assume_new(std::move(key), JSON{true}, + key_hash); + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::False: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + frames.back().get().assign_assume_new(std::move(key), JSON{false}, + key_hash); + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Null: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + frames.back().get().assign_assume_new(std::move(key), JSON{nullptr}, + key_hash); + tape_index++; + CALLBACK_POST(Null, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::String: + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::String, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + frames.back().get().assign_assume_new( + std::move(key), + Result{internal::unescape_string(buffer + value_entry.offset, + value_entry.length)}, + key_hash); + tape_index++; + CALLBACK_POST(String, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Number: { + auto value{internal::construct_number(buffer + value_entry.offset, + value_entry.length)}; + const auto value_type{value.type()}; + if (value_type == JSON::Type::Integer) { + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Integer, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + } else if (value_type == JSON::Type::Decimal) { + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Decimal, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + } else { + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Real, key_line, + key_column, JSON::ParseContext::Property, 0, key); + } + } + frames.back().get().assign_assume_new(std::move(key), std::move(value), + key_hash); + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, value_entry.line, + internal::post_column_for(value_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, value_entry.line, + internal::post_column_for(value_entry)); + } else { + CALLBACK_POST(Real, value_entry.line, + internal::post_column_for(value_entry)); + } + goto do_construct_object_property_end; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_object_property_end: + if (tape[tape_index].type == TapeType::ObjectEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; + + /* + * Finish constructing a container + */ + +do_construct_container_end: + assert(!levels.empty()); + if (levels.size() == 1) { + return result.value(); + } + + frames.pop_back(); + levels.pop_back(); + if (levels.back() == Container::Array) { + goto do_construct_array_item_separator; + } else { + goto do_construct_object_property_end; + } +} + +// NOLINTEND(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#undef CALLBACK_PRE +#undef CALLBACK_POST + +} // namespace sourcemeta::core + +#endif diff --git a/src/core/json/include/sourcemeta/core/json_array.h b/src/core/json/include/sourcemeta/core/json_array.h index 808cafbc7e..f719449220 100644 --- a/src/core/json/include/sourcemeta/core/json_array.h +++ b/src/core/json/include/sourcemeta/core/json_array.h @@ -97,6 +97,11 @@ template class JSONArray { return this->data.size(); } + /// Reserve capacity for a given number of elements + auto reserve(const size_type capacity) -> void { + this->data.reserve(capacity); + } + private: friend Value; // Exporting symbols that depends on the standard C++ library is considered diff --git a/src/core/json/include/sourcemeta/core/json_hash.h b/src/core/json/include/sourcemeta/core/json_hash.h index 49ee106624..b008947233 100644 --- a/src/core/json/include/sourcemeta/core/json_hash.h +++ b/src/core/json/include/sourcemeta/core/json_hash.h @@ -57,11 +57,95 @@ template struct PropertyHashJSON { -> hash_type { hash_type result; assert(!value.empty()); - // Copy starting a byte 2 std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); return result; } + [[nodiscard]] + inline auto perfect(const char *data, const std::size_t size) const noexcept + -> hash_type { + hash_type result; + assert(size > 0); + std::memcpy(reinterpret_cast(&result) + 1, data, size); + return result; + } + + inline auto operator()(const char *data, + const std::size_t size) const noexcept -> hash_type { + switch (size) { + case 0: + return {}; + case 1: + return this->perfect(data, 1); + case 2: + return this->perfect(data, 2); + case 3: + return this->perfect(data, 3); + case 4: + return this->perfect(data, 4); + case 5: + return this->perfect(data, 5); + case 6: + return this->perfect(data, 6); + case 7: + return this->perfect(data, 7); + case 8: + return this->perfect(data, 8); + case 9: + return this->perfect(data, 9); + case 10: + return this->perfect(data, 10); + case 11: + return this->perfect(data, 11); + case 12: + return this->perfect(data, 12); + case 13: + return this->perfect(data, 13); + case 14: + return this->perfect(data, 14); + case 15: + return this->perfect(data, 15); + case 16: + return this->perfect(data, 16); + case 17: + return this->perfect(data, 17); + case 18: + return this->perfect(data, 18); + case 19: + return this->perfect(data, 19); + case 20: + return this->perfect(data, 20); + case 21: + return this->perfect(data, 21); + case 22: + return this->perfect(data, 22); + case 23: + return this->perfect(data, 23); + case 24: + return this->perfect(data, 24); + case 25: + return this->perfect(data, 25); + case 26: + return this->perfect(data, 26); + case 27: + return this->perfect(data, 27); + case 28: + return this->perfect(data, 28); + case 29: + return this->perfect(data, 29); + case 30: + return this->perfect(data, 30); + case 31: + return this->perfect(data, 31); + default: + auto hash = this->perfect(data, 31); + hash.a |= 1 + (size + static_cast(data[0]) + + static_cast(data[size - 1])) % + 255; + return hash; + } + } + inline auto operator()(const T &value) const noexcept -> hash_type { const auto size{value.size()}; switch (size) { diff --git a/src/core/json/include/sourcemeta/core/json_object.h b/src/core/json/include/sourcemeta/core/json_object.h index ab9006476b..ccb12d5bbd 100644 --- a/src/core/json/include/sourcemeta/core/json_object.h +++ b/src/core/json/include/sourcemeta/core/json_object.h @@ -124,6 +124,13 @@ template class JSONObject { return this->hasher(key); } + /// Compute a hash from raw data + [[nodiscard]] inline auto hash(const char *raw_data, + const std::size_t raw_size) const noexcept + -> hash_type { + return this->hasher(raw_data, raw_size); + } + /// Attempt to find an entry by key [[nodiscard]] inline auto find(const Key &key) const -> const_iterator { const auto key_hash{this->hash(key)}; @@ -370,6 +377,18 @@ template class JSONObject { return key_hash; } + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(Key &&key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({std::move(key), std::move(value), key_hash}); + } + + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(const Key &key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({key, std::move(value), key_hash}); + } + /// Remove every property in the object inline auto clear() noexcept -> void { this->data.clear(); } @@ -442,7 +461,7 @@ template class JSONObject { #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif - Hash hasher; + static constexpr Hash hasher{}; underlying_type data; #if defined(_MSC_VER) #pragma warning(default : 4251) diff --git a/src/core/json/include/sourcemeta/core/json_value.h b/src/core/json/include/sourcemeta/core/json_value.h index 92689ae7f2..95aa47086a 100644 --- a/src/core/json/include/sourcemeta/core/json_value.h +++ b/src/core/json/include/sourcemeta/core/json_value.h @@ -1438,6 +1438,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// ``` auto assign_assume_new(String &&key, JSON &&value) -> void; + /// This method sets an object key with a pre-computed hash + auto assign_assume_new(String &&key, JSON &&value, Object::hash_type hash) + -> void; + /// This method deletes an object key. For example: /// /// ```cpp diff --git a/src/core/json/json.cc b/src/core/json/json.cc index 84342d518e..39f0a4095a 100644 --- a/src/core/json/json.cc +++ b/src/core/json/json.cc @@ -4,6 +4,7 @@ #include #include +#include "construct.h" #include "parser.h" #include "stringify.h" @@ -15,9 +16,33 @@ #include // std::basic_ostream #include // std::basic_ostringstream #include // std::make_error_code, std::errc +#include // std::vector namespace sourcemeta::core { +static auto internal_parse_json(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column, + const JSON::ParseCallback &callback, + const bool track_positions) -> JSON { + const char *buffer_start{cursor}; + std::vector tape; + tape.reserve(static_cast(end - cursor) / 8); + if (callback || track_positions) { + scan_json(cursor, end, buffer_start, line, column, tape); + } else { + try { + scan_json(cursor, end, buffer_start, line, column, tape); + } catch (const JSONParseError &) { + cursor = buffer_start; + tape.clear(); + line = 1; + column = 0; + scan_json(cursor, end, buffer_start, line, column, tape); + } + } + return construct_json(buffer_start, tape, callback); +} + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, std::uint64_t &line, std::uint64_t &column, @@ -28,7 +53,7 @@ auto parse_json(std::basic_istream &stream, const auto input{buffer.str()}; const char *cursor{input.data()}; const char *end{input.data() + input.size()}; - auto result{internal_parse_json(cursor, end, line, column, callback)}; + auto result{internal_parse_json(cursor, end, line, column, callback, true)}; if (start_position != static_cast(-1)) { const auto consumed{static_cast(cursor - input.data())}; stream.clear(); @@ -41,22 +66,38 @@ auto parse_json(std::basic_istream &stream, auto parse_json(const std::basic_string &input, std::uint64_t &line, std::uint64_t &column, const JSON::ParseCallback &callback) -> JSON { - return internal_parse_json(input, line, column, callback); + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, true); } // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, const JSON::ParseCallback &callback) -> JSON { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(stream, line, column, callback); + auto result{internal_parse_json(cursor, end, line, column, callback, false)}; + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } + return result; } auto parse_json(const std::basic_string &input, const JSON::ParseCallback &callback) -> JSON { std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(input, line, column, callback); + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, false); } auto read_json(const std::filesystem::path &path, diff --git a/src/core/json/json_value.cc b/src/core/json/json_value.cc index 6140dc0081..9f70cc0bf1 100644 --- a/src/core/json/json_value.cc +++ b/src/core/json/json_value.cc @@ -977,6 +977,12 @@ auto JSON::assign_assume_new(JSON::String &&key, JSON &&value) -> void { this->data_object.emplace_assume_new(std::move(key), std::move(value)); } +auto JSON::assign_assume_new(JSON::String &&key, JSON &&value, + Object::hash_type hash) -> void { + assert(this->is_object()); + this->data_object.emplace_assume_new(std::move(key), std::move(value), hash); +} + auto JSON::erase(const JSON::String &key) -> typename Object::size_type { assert(this->is_object()); return this->data_object.erase(key); diff --git a/src/core/json/parser.h b/src/core/json/parser.h index baa91082fd..d18efd0010 100644 --- a/src/core/json/parser.h +++ b/src/core/json/parser.h @@ -4,23 +4,39 @@ #include #include -#include -#include - #include "grammar.h" -#include // assert -#include // std::size_t -#include // std::uint64_t -#include // std::reference_wrapper -#include // std::optional -#include // std::invalid_argument -#include // std::basic_string -#include // std::move -#include // std::vector +#include // assert +#include // std::uint64_t, std::uint32_t +#include // std::vector -namespace sourcemeta::core::internal { +namespace sourcemeta::core { +enum class TapeType : std::uint8_t { + ObjectStart, + ObjectEnd, + ArrayStart, + ArrayEnd, + Key, + String, + Number, + Null, + True, + False +}; + +struct TapeEntry { + TapeType type; + std::uint32_t offset; + std::uint32_t length; + std::uint32_t count; + std::uint64_t line; + std::uint64_t column; +}; + +namespace internal { + +template inline auto skip_whitespace(const char *&cursor, const char *end, std::uint64_t &line, std::uint64_t &column) -> void { @@ -29,12 +45,16 @@ inline auto skip_whitespace(const char *&cursor, const char *end, case internal::token_whitespace_space: case internal::token_whitespace_tabulation: case internal::token_whitespace_carriage_return: - column += 1; + if constexpr (TrackPositions) { + column += 1; + } cursor++; continue; case internal::token_whitespace_line_feed: - line += 1; - column = 0; + if constexpr (TrackPositions) { + line += 1; + column = 0; + } cursor++; continue; default: @@ -43,13 +63,16 @@ inline auto skip_whitespace(const char *&cursor, const char *end, } } -inline auto parse_null(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end) -> JSON { +template +inline auto scan_null(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_null.substr( 1)) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -58,17 +81,18 @@ inline auto parse_null(const std::uint64_t line, std::uint64_t &column, } cursor++; } - - return JSON{nullptr}; } -inline auto parse_boolean_true(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end) -> JSON { +template +inline auto scan_true(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_true.substr( 1)) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -77,17 +101,18 @@ inline auto parse_boolean_true(const std::uint64_t line, std::uint64_t &column, } cursor++; } - - return JSON{true}; } -inline auto parse_boolean_false(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end) -> JSON { +template +inline auto scan_false(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_false.substr( 1)) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -96,17 +121,18 @@ inline auto parse_boolean_false(const std::uint64_t line, std::uint64_t &column, } cursor++; } - - return JSON{false}; } -inline auto parse_string_unicode_code_point(const std::uint64_t line, - std::uint64_t &column, - const char *&cursor, - const char *end) -> unsigned long { +template +inline auto scan_string_unicode_code_point(const std::uint64_t line, + std::uint64_t &column, + const char *&cursor, const char *end) + -> unsigned long { unsigned long result{0}; for (std::size_t index = 0; index < 4; index++) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -128,20 +154,21 @@ inline auto parse_string_unicode_code_point(const std::uint64_t line, return result; } -inline auto parse_string_unicode(const std::uint64_t line, - std::uint64_t &column, const char *&cursor, - const char *end, typename JSON::String &result) - -> void { - auto code_point{parse_string_unicode_code_point(line, column, cursor, end)}; +template +inline auto scan_string_unicode(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + auto code_point{scan_string_unicode_code_point(line, column, + cursor, end)}; using CharT = typename JSON::Char; - // Lone low surrogate without a preceding high surrogate if (code_point >= 0xDC00 && code_point <= 0xDFFF) { throw JSONParseError(line, column); } if (code_point >= 0xD800 && code_point <= 0xDBFF) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -150,7 +177,9 @@ inline auto parse_string_unicode(const std::uint64_t line, } cursor++; - column += 1; + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } @@ -159,80 +188,47 @@ inline auto parse_string_unicode(const std::uint64_t line, } cursor++; - const auto low_code_point{ - parse_string_unicode_code_point(line, column, cursor, end)}; + const auto low_code_point{scan_string_unicode_code_point( + line, column, cursor, end)}; // See // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF - if (low_code_point >= 0xDC00 && low_code_point <= 0xDFFF) { - code_point = - 0x10000 + ((code_point - 0xD800) << 10) + (low_code_point - 0xDC00); - } else { + if (low_code_point < 0xDC00 || low_code_point > 0xDFFF) { throw JSONParseError(line, column); } } - - sourcemeta::core::codepoint_to_utf8(static_cast(code_point), - result); } -inline auto parse_string_escape(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end, - typename JSON::String &result) -> void { - column += 1; +template +inline auto scan_string_escape(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + if constexpr (TrackPositions) { + column += 1; + } if (cursor >= end) { throw JSONParseError(line, column); } switch (*cursor++) { case internal::token_string_quote: - result.push_back(internal::token_string_quote); - return; case internal::token_string_escape: - result.push_back(internal::token_string_escape); - return; case internal::token_string_solidus: - result.push_back(internal::token_string_solidus); - return; case internal::token_string_escape_backspace: - result.push_back('\b'); - return; case internal::token_string_escape_form_feed: - result.push_back('\f'); - return; case internal::token_string_escape_line_feed: - result.push_back('\n'); - return; case internal::token_string_escape_carriage_return: - result.push_back('\r'); - return; case internal::token_string_escape_tabulation: - result.push_back('\t'); return; - - // Any code point may be represented as a hexadecimal escape sequence. - // The meaning of such a hexadecimal number is determined by ISO/IEC - // 10646. If the code point is in the Basic Multilingual Plane (U+0000 - // through U+FFFF), then it may be represented as a six-character - // sequence: a reverse solidus, followed by the lowercase letter u, - // followed by four hexadecimal digits that encode the code point. - // Hexadecimal digits can be digits (U+0030 through U+0039) or the - // hexadecimal letters A through F in uppercase (U+0041 through U+0046) - // or lowercase (U+0061 through U+0066). - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf case internal::token_string_escape_unicode: - parse_string_unicode(line, column, cursor, end, result); + scan_string_unicode(line, column, cursor, end); return; - default: throw JSONParseError(line, column); } } -inline auto parse_string(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end) -> - typename JSON::String { - typename JSON::String result; +template +inline auto scan_string(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { while (cursor < end) { const char *scan{cursor}; while (scan < end && *scan != '"' && *scan != '\\' && @@ -241,1015 +237,523 @@ inline auto parse_string(const std::uint64_t line, std::uint64_t &column, } if (scan > cursor) { - column += static_cast(scan - cursor); - result.append(cursor, static_cast(scan - cursor)); + if constexpr (TrackPositions) { + column += static_cast(scan - cursor); + } cursor = scan; } if (cursor >= end) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); } - column += 1; + if constexpr (TrackPositions) { + column += 1; + } const char character{*cursor++}; switch (character) { - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf case internal::token_string_quote: - return result; + return; case internal::token_string_escape: - parse_string_escape(line, column, cursor, end, result); + scan_string_escape(line, column, cursor, end); break; default: throw JSONParseError(line, column); } } - column += 1; - throw JSONParseError(line, column); -} - -template -auto parse_number_decimal(const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) - -> JSON { - try { - return JSON{Decimal{string}}; - } catch (const DecimalParseError &) { - throw JSONParseError(line, column); - } catch (const std::invalid_argument &) { - throw JSONParseError(line, column); - } -} - -template -auto parse_number_integer_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) -> JSON { - const auto result{sourcemeta::core::to_int64_t(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); -} - -template -auto parse_number_real_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string, - const std::size_t first_nonzero_position, - const std::size_t decimal_position) -> JSON { - // We are guaranteed to not be dealing with exponential numbers here - assert((string.find('e') == std::basic_string::npos)); - assert((string.find('E') == std::basic_string::npos)); - - // If the number has enough significant digits, then we risk completely losing - // precision of the fractional component, and thus incorrectly interpreting a - // fractional number as an integral value - const auto decimal_after_first_nonzero{ - decimal_position != std::basic_string::npos && - decimal_position > first_nonzero_position}; - const auto significant_digits{string.length() - first_nonzero_position - - (decimal_after_first_nonzero ? 1 : 0)}; - constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; - if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { - return parse_number_decimal(line, column, string); + if constexpr (TrackPositions) { + column += 1; } - - const auto result{sourcemeta::core::to_double(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); + throw JSONParseError(line, column); } -inline auto parse_number_exponent_rest(const std::uint64_t line, - std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result) -> JSON { - while (cursor < end) { - const char character{*cursor}; - switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.push_back(character); - cursor++; - column += 1; - break; - default: - // As a heuristic, if a number has exponential notation, it is almost - // always a big number for which `double` is typically a poor - // representation. If an exponent is encountered, we just always parse - // as a high-precision decimal - return parse_number_decimal(line, original_column, result); +template +inline auto scan_digits(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, + const bool at_least_one) -> void { + bool found{false}; + while (cursor < end && *cursor >= '0' && *cursor <= '9') { + found = true; + if constexpr (TrackPositions) { + column += 1; } + cursor++; } - - return parse_number_decimal(line, original_column, result); -} - -inline auto parse_number_exponent(const std::uint64_t line, - std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result) -> JSON { - if (cursor >= end) { - column += 1; + if (at_least_one && !found) { + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); } - const char character{*cursor++}; - column += 1; - switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.push_back(character); - return parse_number_exponent_rest(line, column, original_column, cursor, - end, result); - default: - throw JSONParseError(line, column); - } } -inline auto parse_number_exponent_first(const std::uint64_t line, - std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result) -> JSON { - if (cursor >= end) { - column += 1; - throw JSONParseError(line, column); - } - const char character{*cursor++}; - column += 1; - switch (character) { - case internal::token_number_plus: - return parse_number_exponent(line, column, original_column, cursor, end, - result); - case internal::token_number_minus: - result.push_back(character); - return parse_number_exponent(line, column, original_column, cursor, end, - result); - - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.push_back(character); - return parse_number_exponent_rest(line, column, original_column, cursor, - end, result); - default: +template +inline auto scan_number(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, const char first) + -> void { + if (first == '-') { + if (cursor >= end || *cursor < '0' || *cursor > '9') { + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); + } } -} -inline auto parse_number_fractional( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, const char *&cursor, const char *end, - typename JSON::String &result, std::size_t &first_nonzero_position, - const std::size_t decimal_position) -> JSON { - while (cursor < end) { - const char character{*cursor}; - switch (character) { - // [A number] may have an exponent, prefixed by e (U+0065) or E (U+0045) - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.push_back(character); - cursor++; - column += 1; - return parse_number_exponent_first(line, column, original_column, - cursor, end, result); - - case internal::token_number_zero: - result.push_back(character); - cursor++; - column += 1; - break; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.size(); - } - result.push_back(character); - cursor++; - column += 1; - break; - default: - return parse_number_real_maybe_decimal(line, original_column, result, - first_nonzero_position, - decimal_position); + const char int_start{first == '-' ? *cursor : first}; + if (first == '-') { + if constexpr (TrackPositions) { + column += 1; } + cursor++; } - return parse_number_real_maybe_decimal( - line, original_column, result, first_nonzero_position, decimal_position); -} - -inline auto parse_number_fractional_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, const char *&cursor, const char *end, - typename JSON::String &result, std::size_t &first_nonzero_position, - const std::size_t decimal_position) -> JSON { - if (cursor >= end) { - column += 1; - throw JSONParseError(line, column); - } - const char character{*cursor}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: - column += 1; - throw JSONParseError(line, column); - case internal::token_number_zero: - result.push_back(character); - cursor++; - column += 1; - return parse_number_fractional(line, column, original_column, cursor, end, - result, first_nonzero_position, - decimal_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.size(); + if (int_start == '0') { + if (cursor < end && *cursor >= '0' && *cursor <= '9') { + if constexpr (TrackPositions) { + column += 1; } - result.push_back(character); - cursor++; - column += 1; - return parse_number_fractional(line, column, original_column, cursor, end, - result, first_nonzero_position, - decimal_position); - default: - return parse_number_real_maybe_decimal(line, original_column, result, - first_nonzero_position, - decimal_position); + throw JSONParseError(line, column); + } + } else { + scan_digits(line, column, cursor, end, false); } -} -inline auto parse_number_maybe_fractional(const std::uint64_t line, - std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result, - std::size_t &first_nonzero_position) - -> JSON { - if (cursor >= end) { - return JSON{ - parse_number_integer_maybe_decimal(line, original_column, result)}; - } - const char character{*cursor}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.size()}; - result.push_back(character); - cursor++; + if (cursor < end && *cursor == '.') { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, cursor, end, result, - first_nonzero_position, decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.push_back(character); - cursor++; - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - cursor, end, result)}; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - column += 1; - throw JSONParseError(line, column); - default: - return JSON{ - parse_number_integer_maybe_decimal(line, original_column, result)}; + cursor++; + scan_digits(line, column, cursor, end, true); } -} -inline auto parse_number_any_rest(const std::uint64_t line, - std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result, - std::size_t &first_nonzero_position) -> JSON { - while (cursor < end) { - const char character{*cursor}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.size()}; - result.push_back(character); - cursor++; + if (cursor < end && (*cursor == 'e' || *cursor == 'E')) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + if (cursor < end && (*cursor == '+' || *cursor == '-')) { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, cursor, end, result, - first_nonzero_position, decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.push_back(character); - cursor++; - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - cursor, end, result)}; - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.push_back(character); - cursor++; - column += 1; - break; - default: - return JSON{ - parse_number_integer_maybe_decimal(line, original_column, result)}; + cursor++; } - } - - return JSON{ - parse_number_integer_maybe_decimal(line, original_column, result)}; -} - -inline auto -parse_number_any_negative_first(const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - const char *&cursor, const char *end, - typename JSON::String &result, - std::size_t &first_nonzero_position) -> JSON { - if (cursor >= end) { - column += 1; - throw JSONParseError(line, column); - } - const char character{*cursor++}; - column += 1; - switch (character) { - // A number is a sequence of decimal digits with no superfluous leading - // zero. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_zero: - result.push_back(character); - return parse_number_maybe_fractional(line, column, original_column, - cursor, end, result, - first_nonzero_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - first_nonzero_position = result.size(); - result.push_back(character); - return parse_number_any_rest(line, column, original_column, cursor, end, - result, first_nonzero_position); - default: - throw JSONParseError(line, column); - } -} - -inline auto parse_number(const std::uint64_t line, std::uint64_t &column, - const char *&cursor, const char *end, const char first) - -> JSON { - typename JSON::String result; - result.push_back(first); - - std::size_t first_nonzero_position{ - std::basic_string::npos}; - - // A number is a sequence of decimal digits with no superfluous leading zero. - // It may have a preceding minus sign (U+002D). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (first) { - case internal::token_number_minus: - return parse_number_any_negative_first(line, column, column, cursor, end, - result, first_nonzero_position); - case internal::token_number_zero: - return parse_number_maybe_fractional(line, column, column, cursor, end, - result, first_nonzero_position); - default: - first_nonzero_position = 0; - return parse_number_any_rest(line, column, column, cursor, end, result, - first_nonzero_position); + scan_digits(line, column, cursor, end, true); } } -} // namespace sourcemeta::core::internal +} // namespace internal -// We use "goto" to avoid recursion // NOLINTBEGIN(cppcoreguidelines-avoid-goto) -#define CALLBACK_PRE(value_type, context, index, property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ - } - -#define CALLBACK_PRE_WITH_POSITION(value_type, line, column, context, index, \ - property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ - } - -#define CALLBACK_POST(value_type) \ - if (callback) { \ - callback(JSON::ParsePhase::Post, JSON::Type::value_type, line, column, \ - JSON::ParseContext::Root, 0, JSON::StringView{}); \ - } +template +inline auto scan_json(const char *&cursor, const char *end, + const char *buffer_start, std::uint64_t &line, + std::uint64_t &column, std::vector &tape) + -> void { + struct ContainerFrame { + std::size_t tape_index; + std::uint32_t child_count; + }; -namespace sourcemeta::core { -inline auto internal_parse_json(const char *&cursor, const char *end, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - using Result = JSON; - enum class Container : std::uint8_t { Array, Object }; - std::vector levels; - std::vector> frames; - levels.reserve(32); - frames.reserve(32); - std::optional result; - typename Result::String key{""}; - std::uint64_t key_line{0}; - std::uint64_t key_column{0}; char character = 0; + std::vector container_stack; + container_stack.reserve(32); - /* - * Parse any JSON document - */ - - internal::skip_whitespace(cursor, end, line, column); + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { - column += 1; + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); } - column += 1; + if constexpr (TrackPositions) { + column += 1; + } character = *cursor++; - // A JSON value can be an object, array, number, string, true, false, or null. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (character) { - case internal::constant_true.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{ - internal::parse_boolean_true(line, column, cursor, end)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_true(line, column, cursor, end); - } - case internal::constant_false.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{ - internal::parse_boolean_false(line, column, cursor, end)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_false(line, column, cursor, end); - } - case internal::constant_null.front(): - if (callback) { - CALLBACK_PRE(Null, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_null(line, column, cursor, end)}; - CALLBACK_POST(Null); - return value; - } else { - return internal::parse_null(line, column, cursor, end); - } - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - if (callback) { - CALLBACK_PRE(String, JSON::ParseContext::Root, 0, JSON::StringView{}); - const Result value{internal::parse_string(line, column, cursor, end)}; - CALLBACK_POST(String); - return value; - } else { - return Result{internal::parse_string(line, column, cursor, end)}; + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case 't': + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + return; + case 'f': + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + return; + case 'n': + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + return; + case '"': { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + return; } - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_object; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto value{ - internal::parse_number(line, column, cursor, end, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Decimal); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Real); - } - - return value; + case '[': + goto do_scan_array; + case '{': + goto do_scan_object; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + return; } - - return internal::parse_number(line, column, cursor, end, character); - - default: - throw JSONParseError(line, column); + default: + throw JSONParseError(line, column); + } } /* - * Parse an array + * Scan an array */ -do_parse_array: - if (levels.empty()) { - assert(!result.has_value()); - levels.push_back(Container::Array); - result = std::make_optional(Result::make_array()); - frames.emplace_back(result.value()); - } else if (levels.back() == Container::Array) { - assert(result.has_value()); - levels.push_back(Container::Array); - assert(!frames.empty()); - assert(frames.back().get().is_array()); - frames.back().get().push_back(Result::make_array()); - frames.emplace_back(frames.back().get().back()); - } else if (levels.back() == Container::Object) { - assert(result.has_value()); - levels.push_back(Container::Array); - assert(!frames.empty()); - assert(frames.back().get().is_object()); - frames.back().get().assign(key, Result::make_array()); - frames.emplace_back(frames.back().get().at(key)); - } +do_scan_array: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ArrayStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); - // An array structure is a pair of square bracket tokens surrounding zero or - // more values. The values are separated by commas. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - -do_parse_array_item: - assert(levels.back() == Container::Array); - internal::skip_whitespace(cursor, end, line, column); + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { - column += 1; - goto error; + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - column += 1; - character = *cursor++; - switch (character) { - case internal::token_array_end: - if (frames.back().get().empty()) { - CALLBACK_POST(Array); - goto do_parse_container_end; - } else { - throw JSONParseError(line, column); - } - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Index, frames.back().get().size(), - JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Index, - frames.back().get().size(), JSON::StringView{}); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.back().get().size(), JSON::StringView{}); - frames.back().get().push_back( - internal::parse_boolean_true(line, column, cursor, end)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_false.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.back().get().size(), JSON::StringView{}); - frames.back().get().push_back( - internal::parse_boolean_false(line, column, cursor, end)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_null.front(): - CALLBACK_PRE(Null, JSON::ParseContext::Index, frames.back().get().size(), - JSON::StringView{}); - frames.back().get().push_back( - internal::parse_null(line, column, cursor, end)); - CALLBACK_POST(Null); - goto do_parse_array_item_separator; + if (*cursor == ']') { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE(String, JSON::ParseContext::Index, - frames.back().get().size(), JSON::StringView{}); - frames.back().get().push_back( - Result{internal::parse_string(line, column, cursor, end)}); - CALLBACK_POST(String); - goto do_parse_array_item_separator; + goto do_scan_array_item; +} - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto current_index{frames.back().get().size()}; - const auto value{ - internal::parse_number(line, column, cursor, end, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } +do_scan_array_item: + assert(!container_stack.empty()); + container_stack.back().child_count++; - frames.back().get().push_back(value); + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; - if (value.is_integer()) { - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.back().get().push_back( - internal::parse_number(line, column, cursor, end, character)); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case '[': + goto do_scan_array; + case '{': + goto do_scan_object; + case 't': + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case 'f': + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case 'n': + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case '"': { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; } - - goto do_parse_array_item_separator; - - default: - goto error; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; + } + default: + throw JSONParseError(line, column); + } } -do_parse_array_item_separator: - assert(levels.back() == Container::Array); - internal::skip_whitespace(cursor, end, line, column); +do_scan_array_item_separator: + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { column += 1; - goto error; } - column += 1; character = *cursor++; switch (character) { - case internal::token_array_delimiter: - goto do_parse_array_item; - case internal::token_array_end: - CALLBACK_POST(Array); - goto do_parse_container_end; - + case ',': + goto do_scan_array_item; + case ']': { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; + throw JSONParseError(line, column); } /* - * Parse an object + * Scan an object */ -do_parse_object: - if (levels.empty()) { - assert(levels.empty()); - assert(!result.has_value()); - levels.push_back(Container::Object); - result = std::make_optional(Result::make_object()); - frames.emplace_back(result.value()); - } else if (levels.back() == Container::Array) { - assert(result.has_value()); - levels.push_back(Container::Object); - assert(!frames.empty()); - assert(frames.back().get().is_array()); - frames.back().get().push_back(Result::make_object()); - frames.emplace_back(frames.back().get().back()); - } else if (levels.back() == Container::Object) { - assert(result.has_value()); - levels.push_back(Container::Object); - assert(!frames.empty()); - assert(frames.back().get().is_object()); - frames.back().get().assign(key, Result::make_object()); - frames.emplace_back(frames.back().get().at(key)); +do_scan_object: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ObjectStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); + + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - frames.back().get().as_object().reserve(8); + if (*cursor == '}') { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } + + goto do_scan_object_key; +} - // An object structure is represented as a pair of curly bracket tokens - // surrounding zero or more name/value pairs. A name is a string. A single - // colon token follows each name, separating the name from the value. A - // single comma token separates a value from a following name. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf +do_scan_object_key: + assert(!container_stack.empty()); + container_stack.back().child_count++; -do_parse_object_property_key: - assert(levels.back() == Container::Object); - internal::skip_whitespace(cursor, end, line, column); + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { column += 1; - goto error; } - column += 1; character = *cursor++; switch (character) { - case internal::token_object_end: - if (frames.back().get().empty()) { - CALLBACK_POST(Object); - goto do_parse_container_end; - } else { - goto error; - } - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - key_line = line; - key_column = column; - key = internal::parse_string(line, column, cursor, end); - goto do_parse_object_property_separator; - + case '"': { + const auto key_start{static_cast(cursor - buffer_start)}; + const auto key_line{line}; + const auto key_column{column}; + internal::scan_string(line, column, cursor, end); + const auto key_length{ + static_cast(cursor - buffer_start - key_start - 1)}; + tape.push_back( + {TapeType::Key, key_start, key_length, 0, key_line, key_column}); + goto do_scan_object_separator; + } default: - goto error; + throw JSONParseError(line, column); } -do_parse_object_property_separator: - assert(levels.back() == Container::Object); - internal::skip_whitespace(cursor, end, line, column); +do_scan_object_separator: + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { column += 1; - goto error; } - column += 1; character = *cursor++; switch (character) { - case internal::token_object_key_delimiter: - goto do_parse_object_property_value; - + case ':': + goto do_scan_object_value; default: - goto error; + throw JSONParseError(line, column); } -do_parse_object_property_value: - assert(levels.back() == Container::Object); - internal::skip_whitespace(cursor, end, line, column); +do_scan_object_value: + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { column += 1; - goto error; } - column += 1; character = *cursor++; - switch (character) { - case internal::token_array_begin: - CALLBACK_PRE_WITH_POSITION(Array, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE_WITH_POSITION(Object, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.back().get().assign_assume_new( - key, internal::parse_boolean_true(line, column, cursor, end)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_false.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.back().get().assign_assume_new( - key, internal::parse_boolean_false(line, column, cursor, end)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_null.front(): - CALLBACK_PRE_WITH_POSITION(Null, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.back().get().assign_assume_new( - key, internal::parse_null(line, column, cursor, end)); - CALLBACK_POST(Null); - goto do_parse_object_property_end; - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE_WITH_POSITION(String, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.back().get().assign_assume_new( - key, Result{internal::parse_string(line, column, cursor, end)}); - CALLBACK_POST(String); - goto do_parse_object_property_end; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - auto value{ - internal::parse_number(line, column, cursor, end, character)}; - const auto value_type{value.type()}; - if (value_type == JSON::Type::Integer) { - CALLBACK_PRE_WITH_POSITION(Integer, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else if (value_type == JSON::Type::Decimal) { - CALLBACK_PRE_WITH_POSITION(Decimal, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else { - CALLBACK_PRE_WITH_POSITION(Real, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } - - frames.back().get().assign_assume_new(key, std::move(value)); - - if (value_type == JSON::Type::Integer) { - CALLBACK_POST(Integer); - } else if (value_type == JSON::Type::Decimal) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.back().get().assign_assume_new( - key, internal::parse_number(line, column, cursor, end, character)); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case '[': + goto do_scan_array; + case '{': + goto do_scan_object; + case 't': + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case 'f': + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case 'n': + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case '"': { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; } - - goto do_parse_object_property_end; - - default: - goto error; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; + } + default: + throw JSONParseError(line, column); + } } -do_parse_object_property_end: - assert(levels.back() == Container::Object); - internal::skip_whitespace(cursor, end, line, column); +do_scan_object_property_end: + internal::skip_whitespace(cursor, end, line, column); if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { column += 1; - goto error; } - column += 1; character = *cursor++; switch (character) { - case internal::token_object_delimiter: - goto do_parse_object_property_key; - case internal::token_object_end: - CALLBACK_POST(Object); - goto do_parse_container_end; - + case ',': + goto do_scan_object_key; + case '}': { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; - } - - /* - * Finish parsing a container - */ - -error: - // For some strange reason, with certain AppleClang versions, - // the program crashes when de-allocating huge array/objects - // before throwing an error. The error goes away if we manually - // reset every frame of the resulting object. Compiler error? - // Seen on Apple clang version 14.0.3 (clang-1403.0.22.14.1) - while (!frames.empty()) { - frames.back().get().into(Result{nullptr}); - frames.pop_back(); + throw JSONParseError(line, column); } - throw JSONParseError(line, column); - -do_parse_container_end: - assert(!levels.empty()); - if (levels.size() == 1) { - return result.value(); +do_scan_container_end: + if (container_stack.empty()) { + return; } - frames.pop_back(); - levels.pop_back(); - if (levels.back() == Container::Array) { - goto do_parse_array_item_separator; + if (tape[container_stack.back().tape_index].type == TapeType::ArrayStart) { + goto do_scan_array_item_separator; } else { - goto do_parse_object_property_end; + goto do_scan_object_property_end; } } // NOLINTEND(cppcoreguidelines-avoid-goto) -inline auto internal_parse_json( - const std::basic_string - &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - const char *cursor{input.data()}; - return internal_parse_json(cursor, input.data() + input.size(), line, column, - callback); -} - } // namespace sourcemeta::core -#undef CALLBACK_PRE -#undef CALLBACK_PRE_WITH_POSITION -#undef CALLBACK_POST - #endif diff --git a/test/json/json_value_test.cc b/test/json/json_value_test.cc index 399796ef33..cc549bde91 100644 --- a/test/json/json_value_test.cc +++ b/test/json/json_value_test.cc @@ -16,7 +16,7 @@ TEST(JSON_value, general_traits) { // BIG WARNING! Increase this number will make projects like Blaze slower, // as it will affect cache lines when dealing with JSON documents -TEST(JSON_value, size) { EXPECT_EQ(sizeof(sourcemeta::core::JSON), 40); } +TEST(JSON_value, size) { EXPECT_EQ(sizeof(sourcemeta::core::JSON), 32); } TEST(JSON_value, copy_traits) { EXPECT_TRUE(std::is_copy_assignable::value); From b51b8ce006cc8143153595840b1d6b9c042eaf49 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 11 Feb 2026 13:49:30 -0400 Subject: [PATCH 2/4] Optimise Signed-off-by: Juan Cruz Viotti --- src/core/json/construct.h | 57 +++++----- .../json/include/sourcemeta/core/json_hash.h | 103 +++--------------- .../include/sourcemeta/core/json_object.h | 3 +- 3 files changed, 41 insertions(+), 122 deletions(-) diff --git a/src/core/json/construct.h b/src/core/json/construct.h index 5d775850a8..f6acfb2ec0 100644 --- a/src/core/json/construct.h +++ b/src/core/json/construct.h @@ -23,8 +23,8 @@ namespace sourcemeta::core { namespace internal { -inline auto unescape_string(const char *data, const std::uint32_t length) - -> typename JSON::String { +inline auto unescape_string(const char *data, const std::uint32_t length) -> + typename JSON::String { typename JSON::String result; const char *cursor{data}; const char *string_end{data + length}; @@ -104,12 +104,11 @@ inline auto unescape_string(const char *data, const std::uint32_t length) assert(cursor + 6 <= string_end); cursor += 2; const auto low{parse_hex4(cursor)}; - code_point = - 0x10000 + ((code_point - 0xD800) << 10) + (low - 0xDC00); + code_point = 0x10000 + ((code_point - 0xD800) << 10) + (low - 0xDC00); } - sourcemeta::core::codepoint_to_utf8( - static_cast(code_point), result); + sourcemeta::core::codepoint_to_utf8(static_cast(code_point), + result); break; } default: @@ -122,11 +121,9 @@ inline auto unescape_string(const char *data, const std::uint32_t length) inline auto construct_number(const char *data, const std::uint32_t length) -> JSON { - const bool has_dot{ - std::memchr(data, '.', length) != nullptr}; - const bool has_exponent{ - std::memchr(data, 'e', length) != nullptr || - std::memchr(data, 'E', length) != nullptr}; + const bool has_dot{std::memchr(data, '.', length) != nullptr}; + const bool has_exponent{std::memchr(data, 'e', length) != nullptr || + std::memchr(data, 'E', length) != nullptr}; if (has_exponent) { try { @@ -154,11 +151,10 @@ inline auto construct_number(const char *data, const std::uint32_t length) first_nonzero_position = 0; } - const auto decimal_after_first_nonzero{ - decimal_position > first_nonzero_position}; - const auto significant_digits{ - length - first_nonzero_position - - (decimal_after_first_nonzero ? 1 : 0)}; + const auto decimal_after_first_nonzero{decimal_position > + first_nonzero_position}; + const auto significant_digits{length - first_nonzero_position - + (decimal_after_first_nonzero ? 1 : 0)}; constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { try { @@ -222,15 +218,14 @@ inline auto post_column_for(const TapeEntry &entry) -> std::uint64_t { #define CALLBACK_PRE(value_type, entry_ref, context, index, property) \ if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, \ - (entry_ref).line, (entry_ref).column, context, index, property); \ + callback(JSON::ParsePhase::Pre, JSON::Type::value_type, (entry_ref).line, \ + (entry_ref).column, context, index, property); \ } #define CALLBACK_POST(value_type, post_line, post_column) \ if (callback) { \ - callback(JSON::ParsePhase::Post, JSON::Type::value_type, \ - post_line, post_column, \ - JSON::ParseContext::Root, 0, JSON::StringView{}); \ + callback(JSON::ParsePhase::Post, JSON::Type::value_type, post_line, \ + post_column, JSON::ParseContext::Root, 0, JSON::StringView{}); \ } inline auto construct_json(const char *buffer, @@ -273,8 +268,8 @@ inline auto construct_json(const char *buffer, case TapeType::String: { CALLBACK_PRE(String, entry, JSON::ParseContext::Root, 0, JSON::StringView{}); - auto value{Result{internal::unescape_string(buffer + entry.offset, - entry.length)}}; + auto value{Result{ + internal::unescape_string(buffer + entry.offset, entry.length)}}; CALLBACK_POST(String, entry.line, internal::post_column_for(entry)); return value; } @@ -312,7 +307,7 @@ inline auto construct_json(const char *buffer, * Construct an array */ -do_construct_array : { +do_construct_array: { const auto &array_entry{tape[tape_index]}; assert(array_entry.type == TapeType::ArrayStart); const auto child_count{array_entry.count}; @@ -404,8 +399,8 @@ do_construct_array_item: { CALLBACK_PRE(Decimal, item_entry, JSON::ParseContext::Index, current_index, JSON::StringView{}); } else { - CALLBACK_PRE(Real, item_entry, JSON::ParseContext::Index, - current_index, JSON::StringView{}); + CALLBACK_PRE(Real, item_entry, JSON::ParseContext::Index, current_index, + JSON::StringView{}); } const auto value_type{value.type()}; frames.back().get().push_back(std::move(value)); @@ -441,7 +436,7 @@ do_construct_array_item: { * Construct an object */ -do_construct_object : { +do_construct_object: { const auto &object_entry{tape[tape_index]}; assert(object_entry.type == TapeType::ObjectStart); const auto property_count{object_entry.count}; @@ -501,8 +496,8 @@ do_construct_object_value: { switch (value_entry.type) { case TapeType::ArrayStart: if (callback) { - callback(JSON::ParsePhase::Pre, JSON::Type::Array, key_line, - key_column, JSON::ParseContext::Property, 0, key); + callback(JSON::ParsePhase::Pre, JSON::Type::Array, key_line, key_column, + JSON::ParseContext::Property, 0, key); } goto do_construct_array; case TapeType::ObjectStart: @@ -535,8 +530,8 @@ do_construct_object_value: { goto do_construct_object_property_end; case TapeType::Null: if (callback) { - callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, - key_column, JSON::ParseContext::Property, 0, key); + callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, key_column, + JSON::ParseContext::Property, 0, key); } frames.back().get().assign_assume_new(std::move(key), JSON{nullptr}, key_hash); diff --git a/src/core/json/include/sourcemeta/core/json_hash.h b/src/core/json/include/sourcemeta/core/json_hash.h index b008947233..4dff502038 100644 --- a/src/core/json/include/sourcemeta/core/json_hash.h +++ b/src/core/json/include/sourcemeta/core/json_hash.h @@ -1,10 +1,11 @@ #ifndef SOURCEMETA_CORE_JSON_HASH_H_ #define SOURCEMETA_CORE_JSON_HASH_H_ -#include // assert -#include // std::uint64_t -#include // std::memcpy -#include // std::reference_wrapper +#include // assert +#include // std::uint64_t +#include // std::memcpy +#include // std::reference_wrapper +#include // std::string_view namespace sourcemeta::core { @@ -53,100 +54,22 @@ template struct PropertyHashJSON { }; [[nodiscard]] - inline auto perfect(const T &value, const std::size_t size) const noexcept - -> hash_type { + inline auto perfect(const std::string_view value, + const std::size_t size) const noexcept -> hash_type { hash_type result; assert(!value.empty()); std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); return result; } - [[nodiscard]] - inline auto perfect(const char *data, const std::size_t size) const noexcept - -> hash_type { - hash_type result; - assert(size > 0); - std::memcpy(reinterpret_cast(&result) + 1, data, size); - return result; - } - - inline auto operator()(const char *data, - const std::size_t size) const noexcept -> hash_type { - switch (size) { - case 0: - return {}; - case 1: - return this->perfect(data, 1); - case 2: - return this->perfect(data, 2); - case 3: - return this->perfect(data, 3); - case 4: - return this->perfect(data, 4); - case 5: - return this->perfect(data, 5); - case 6: - return this->perfect(data, 6); - case 7: - return this->perfect(data, 7); - case 8: - return this->perfect(data, 8); - case 9: - return this->perfect(data, 9); - case 10: - return this->perfect(data, 10); - case 11: - return this->perfect(data, 11); - case 12: - return this->perfect(data, 12); - case 13: - return this->perfect(data, 13); - case 14: - return this->perfect(data, 14); - case 15: - return this->perfect(data, 15); - case 16: - return this->perfect(data, 16); - case 17: - return this->perfect(data, 17); - case 18: - return this->perfect(data, 18); - case 19: - return this->perfect(data, 19); - case 20: - return this->perfect(data, 20); - case 21: - return this->perfect(data, 21); - case 22: - return this->perfect(data, 22); - case 23: - return this->perfect(data, 23); - case 24: - return this->perfect(data, 24); - case 25: - return this->perfect(data, 25); - case 26: - return this->perfect(data, 26); - case 27: - return this->perfect(data, 27); - case 28: - return this->perfect(data, 28); - case 29: - return this->perfect(data, 29); - case 30: - return this->perfect(data, 30); - case 31: - return this->perfect(data, 31); - default: - auto hash = this->perfect(data, 31); - hash.a |= 1 + (size + static_cast(data[0]) + - static_cast(data[size - 1])) % - 255; - return hash; - } + template + requires requires(const U &wrapper) { wrapper.get(); } + inline auto operator()(const U &value) const noexcept -> hash_type { + return (*this)(std::string_view{value.get()}); } - inline auto operator()(const T &value) const noexcept -> hash_type { + inline auto operator()(const std::string_view value) const noexcept + -> hash_type { const auto size{value.size()}; switch (size) { case 0: diff --git a/src/core/json/include/sourcemeta/core/json_object.h b/src/core/json/include/sourcemeta/core/json_object.h index ccb12d5bbd..28bfccccde 100644 --- a/src/core/json/include/sourcemeta/core/json_object.h +++ b/src/core/json/include/sourcemeta/core/json_object.h @@ -6,6 +6,7 @@ #include // std::size_t #include // std::initializer_list #include // std::advance +#include // std::string_view #include // std::pair, std::move #include // std::vector @@ -128,7 +129,7 @@ template class JSONObject { [[nodiscard]] inline auto hash(const char *raw_data, const std::size_t raw_size) const noexcept -> hash_type { - return this->hasher(raw_data, raw_size); + return hasher(std::string_view{raw_data, raw_size}); } /// Attempt to find an entry by key From 6d6acc887c76d0ce6ab837427aaf9a91fdd728f7 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 11 Feb 2026 14:06:00 -0400 Subject: [PATCH 3/4] Fix sizes Signed-off-by: Juan Cruz Viotti --- test/json/json_value_test.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/json/json_value_test.cc b/test/json/json_value_test.cc index cc549bde91..0130cf0b29 100644 --- a/test/json/json_value_test.cc +++ b/test/json/json_value_test.cc @@ -16,7 +16,12 @@ TEST(JSON_value, general_traits) { // BIG WARNING! Increase this number will make projects like Blaze slower, // as it will affect cache lines when dealing with JSON documents -TEST(JSON_value, size) { EXPECT_EQ(sizeof(sourcemeta::core::JSON), 32); } +TEST(JSON_value, size) { + // The union's largest member is std::string, whose size varies across + // standard library implementations (24 on libc++, 32 on libstdc++). + // The Type enum (1 byte) is padded to 8 bytes for alignment. + EXPECT_EQ(sizeof(sourcemeta::core::JSON), sizeof(std::string) + 8); +} TEST(JSON_value, copy_traits) { EXPECT_TRUE(std::is_copy_assignable::value); From fb368cd43024b441ecbde5ee8c9728fc2ad7d755 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Wed, 11 Feb 2026 16:03:15 -0400 Subject: [PATCH 4/4] Explicit hashing Signed-off-by: Juan Cruz Viotti --- .../json/include/sourcemeta/core/json_hash.h | 172 +++++++++++++----- .../include/sourcemeta/core/json_object.h | 3 +- 2 files changed, 125 insertions(+), 50 deletions(-) diff --git a/src/core/json/include/sourcemeta/core/json_hash.h b/src/core/json/include/sourcemeta/core/json_hash.h index 4dff502038..503520161f 100644 --- a/src/core/json/include/sourcemeta/core/json_hash.h +++ b/src/core/json/include/sourcemeta/core/json_hash.h @@ -1,11 +1,10 @@ #ifndef SOURCEMETA_CORE_JSON_HASH_H_ #define SOURCEMETA_CORE_JSON_HASH_H_ -#include // assert -#include // std::uint64_t -#include // std::memcpy -#include // std::reference_wrapper -#include // std::string_view +#include // assert +#include // std::uint64_t +#include // std::memcpy +#include // std::reference_wrapper namespace sourcemeta::core { @@ -54,93 +53,90 @@ template struct PropertyHashJSON { }; [[nodiscard]] - inline auto perfect(const std::string_view value, - const std::size_t size) const noexcept -> hash_type { + inline auto perfect(const char *data, const std::size_t size) const noexcept + -> hash_type { hash_type result; - assert(!value.empty()); - std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); + assert(size > 0); + std::memcpy(reinterpret_cast(&result) + 1, data, size); return result; } - template - requires requires(const U &wrapper) { wrapper.get(); } - inline auto operator()(const U &value) const noexcept -> hash_type { - return (*this)(std::string_view{value.get()}); - } + // GCC does not optimise well across implicit type conversions such as + // std::string to std::string_view, so we provide separate overloads with + // duplicated logic instead of unifying on a single parameter type - inline auto operator()(const std::string_view value) const noexcept - -> hash_type { + inline auto operator()(const T &value) const noexcept -> hash_type { const auto size{value.size()}; switch (size) { case 0: return {}; case 1: - return this->perfect(value, 1); + return this->perfect(value.data(), 1); case 2: - return this->perfect(value, 2); + return this->perfect(value.data(), 2); case 3: - return this->perfect(value, 3); + return this->perfect(value.data(), 3); case 4: - return this->perfect(value, 4); + return this->perfect(value.data(), 4); case 5: - return this->perfect(value, 5); + return this->perfect(value.data(), 5); case 6: - return this->perfect(value, 6); + return this->perfect(value.data(), 6); case 7: - return this->perfect(value, 7); + return this->perfect(value.data(), 7); case 8: - return this->perfect(value, 8); + return this->perfect(value.data(), 8); case 9: - return this->perfect(value, 9); + return this->perfect(value.data(), 9); case 10: - return this->perfect(value, 10); + return this->perfect(value.data(), 10); case 11: - return this->perfect(value, 11); + return this->perfect(value.data(), 11); case 12: - return this->perfect(value, 12); + return this->perfect(value.data(), 12); case 13: - return this->perfect(value, 13); + return this->perfect(value.data(), 13); case 14: - return this->perfect(value, 14); + return this->perfect(value.data(), 14); case 15: - return this->perfect(value, 15); + return this->perfect(value.data(), 15); case 16: - return this->perfect(value, 16); + return this->perfect(value.data(), 16); case 17: - return this->perfect(value, 17); + return this->perfect(value.data(), 17); case 18: - return this->perfect(value, 18); + return this->perfect(value.data(), 18); case 19: - return this->perfect(value, 19); + return this->perfect(value.data(), 19); case 20: - return this->perfect(value, 20); + return this->perfect(value.data(), 20); case 21: - return this->perfect(value, 21); + return this->perfect(value.data(), 21); case 22: - return this->perfect(value, 22); + return this->perfect(value.data(), 22); case 23: - return this->perfect(value, 23); + return this->perfect(value.data(), 23); case 24: - return this->perfect(value, 24); + return this->perfect(value.data(), 24); case 25: - return this->perfect(value, 25); + return this->perfect(value.data(), 25); case 26: - return this->perfect(value, 26); + return this->perfect(value.data(), 26); case 27: - return this->perfect(value, 27); + return this->perfect(value.data(), 27); case 28: - return this->perfect(value, 28); + return this->perfect(value.data(), 28); case 29: - return this->perfect(value, 29); + return this->perfect(value.data(), 29); case 30: - return this->perfect(value, 30); + return this->perfect(value.data(), 30); case 31: - return this->perfect(value, 31); + return this->perfect(value.data(), 31); default: // This case is specifically designed to be constant with regards to // string length, and to exploit the fact that most JSON objects don't // have a lot of entries, so hash collision is not as common - auto hash = this->perfect(value, 31); + auto hash = this->perfect(value.data(), 31); hash.a |= 1 + (size + static_cast(value.front()) + static_cast(value.back())) % @@ -150,6 +146,86 @@ template struct PropertyHashJSON { } } + inline auto operator()(const char *data, + const std::size_t size) const noexcept -> hash_type { + switch (size) { + case 0: + return {}; + case 1: + return this->perfect(data, 1); + case 2: + return this->perfect(data, 2); + case 3: + return this->perfect(data, 3); + case 4: + return this->perfect(data, 4); + case 5: + return this->perfect(data, 5); + case 6: + return this->perfect(data, 6); + case 7: + return this->perfect(data, 7); + case 8: + return this->perfect(data, 8); + case 9: + return this->perfect(data, 9); + case 10: + return this->perfect(data, 10); + case 11: + return this->perfect(data, 11); + case 12: + return this->perfect(data, 12); + case 13: + return this->perfect(data, 13); + case 14: + return this->perfect(data, 14); + case 15: + return this->perfect(data, 15); + case 16: + return this->perfect(data, 16); + case 17: + return this->perfect(data, 17); + case 18: + return this->perfect(data, 18); + case 19: + return this->perfect(data, 19); + case 20: + return this->perfect(data, 20); + case 21: + return this->perfect(data, 21); + case 22: + return this->perfect(data, 22); + case 23: + return this->perfect(data, 23); + case 24: + return this->perfect(data, 24); + case 25: + return this->perfect(data, 25); + case 26: + return this->perfect(data, 26); + case 27: + return this->perfect(data, 27); + case 28: + return this->perfect(data, 28); + case 29: + return this->perfect(data, 29); + case 30: + return this->perfect(data, 30); + case 31: + return this->perfect(data, 31); + default: + // This case is specifically designed to be constant with regards to + // string length, and to exploit the fact that most JSON objects don't + // have a lot of entries, so hash collision is not as common + auto hash = this->perfect(data, 31); + hash.a |= 1 + (size + static_cast(data[0]) + + static_cast(data[size - 1])) % + // Make sure the property hash can never exceed 8 bits + 255; + return hash; + } + } + [[nodiscard]] inline auto is_perfect(const hash_type &hash) const noexcept -> bool { // If there is anything written past the first byte, diff --git a/src/core/json/include/sourcemeta/core/json_object.h b/src/core/json/include/sourcemeta/core/json_object.h index 28bfccccde..cf0ff6810d 100644 --- a/src/core/json/include/sourcemeta/core/json_object.h +++ b/src/core/json/include/sourcemeta/core/json_object.h @@ -6,7 +6,6 @@ #include // std::size_t #include // std::initializer_list #include // std::advance -#include // std::string_view #include // std::pair, std::move #include // std::vector @@ -129,7 +128,7 @@ template class JSONObject { [[nodiscard]] inline auto hash(const char *raw_data, const std::size_t raw_size) const noexcept -> hash_type { - return hasher(std::string_view{raw_data, raw_size}); + return hasher(raw_data, raw_size); } /// Attempt to find an entry by key