Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
632 changes: 632 additions & 0 deletions src/core/json/construct.h

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions src/core/json/include/sourcemeta/core/json_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ template <typename Value> class JSONArray {
return this->data.size();
}

/// Reserve capacity for a given number of elements
auto reserve(const size_type capacity) -> void {
this->data.reserve(capacity);
}

private:
friend Value;
// Exporting symbols that depends on the standard C++ library is considered
Expand Down
155 changes: 119 additions & 36 deletions src/core/json/include/sourcemeta/core/json_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,87 +53,90 @@ template <typename T> struct PropertyHashJSON {
};

[[nodiscard]]
inline auto perfect(const T &value, const std::size_t size) const noexcept
inline auto perfect(const char *data, const std::size_t size) const noexcept
-> hash_type {
hash_type result;
assert(!value.empty());
// Copy starting a byte 2
std::memcpy(reinterpret_cast<char *>(&result) + 1, value.data(), size);
assert(size > 0);
std::memcpy(reinterpret_cast<char *>(&result) + 1, data, size);
return result;
}

// GCC does not optimise well across implicit type conversions such as
// std::string to std::string_view, so we provide separate overloads with
// duplicated logic instead of unifying on a single parameter type

inline auto operator()(const T &value) const noexcept -> hash_type {
const auto size{value.size()};
switch (size) {
case 0:
return {};
case 1:
return this->perfect(value, 1);
return this->perfect(value.data(), 1);
case 2:
return this->perfect(value, 2);
return this->perfect(value.data(), 2);
case 3:
return this->perfect(value, 3);
return this->perfect(value.data(), 3);
case 4:
return this->perfect(value, 4);
return this->perfect(value.data(), 4);
case 5:
return this->perfect(value, 5);
return this->perfect(value.data(), 5);
case 6:
return this->perfect(value, 6);
return this->perfect(value.data(), 6);
case 7:
return this->perfect(value, 7);
return this->perfect(value.data(), 7);
case 8:
return this->perfect(value, 8);
return this->perfect(value.data(), 8);
case 9:
return this->perfect(value, 9);
return this->perfect(value.data(), 9);
case 10:
return this->perfect(value, 10);
return this->perfect(value.data(), 10);
case 11:
return this->perfect(value, 11);
return this->perfect(value.data(), 11);
case 12:
return this->perfect(value, 12);
return this->perfect(value.data(), 12);
case 13:
return this->perfect(value, 13);
return this->perfect(value.data(), 13);
case 14:
return this->perfect(value, 14);
return this->perfect(value.data(), 14);
case 15:
return this->perfect(value, 15);
return this->perfect(value.data(), 15);
case 16:
return this->perfect(value, 16);
return this->perfect(value.data(), 16);
case 17:
return this->perfect(value, 17);
return this->perfect(value.data(), 17);
case 18:
return this->perfect(value, 18);
return this->perfect(value.data(), 18);
case 19:
return this->perfect(value, 19);
return this->perfect(value.data(), 19);
case 20:
return this->perfect(value, 20);
return this->perfect(value.data(), 20);
case 21:
return this->perfect(value, 21);
return this->perfect(value.data(), 21);
case 22:
return this->perfect(value, 22);
return this->perfect(value.data(), 22);
case 23:
return this->perfect(value, 23);
return this->perfect(value.data(), 23);
case 24:
return this->perfect(value, 24);
return this->perfect(value.data(), 24);
case 25:
return this->perfect(value, 25);
return this->perfect(value.data(), 25);
case 26:
return this->perfect(value, 26);
return this->perfect(value.data(), 26);
case 27:
return this->perfect(value, 27);
return this->perfect(value.data(), 27);
case 28:
return this->perfect(value, 28);
return this->perfect(value.data(), 28);
case 29:
return this->perfect(value, 29);
return this->perfect(value.data(), 29);
case 30:
return this->perfect(value, 30);
return this->perfect(value.data(), 30);
case 31:
return this->perfect(value, 31);
return this->perfect(value.data(), 31);
default:
// This case is specifically designed to be constant with regards to
// string length, and to exploit the fact that most JSON objects don't
// have a lot of entries, so hash collision is not as common
auto hash = this->perfect(value, 31);
auto hash = this->perfect(value.data(), 31);
hash.a |=
1 + (size + static_cast<typename hash_type::type>(value.front()) +
static_cast<typename hash_type::type>(value.back())) %
Expand All @@ -143,6 +146,86 @@ template <typename T> struct PropertyHashJSON {
}
}

inline auto operator()(const char *data,
const std::size_t size) const noexcept -> hash_type {
switch (size) {
case 0:
return {};
case 1:
return this->perfect(data, 1);
case 2:
return this->perfect(data, 2);
case 3:
return this->perfect(data, 3);
case 4:
return this->perfect(data, 4);
case 5:
return this->perfect(data, 5);
case 6:
return this->perfect(data, 6);
case 7:
return this->perfect(data, 7);
case 8:
return this->perfect(data, 8);
case 9:
return this->perfect(data, 9);
case 10:
return this->perfect(data, 10);
case 11:
return this->perfect(data, 11);
case 12:
return this->perfect(data, 12);
case 13:
return this->perfect(data, 13);
case 14:
return this->perfect(data, 14);
case 15:
return this->perfect(data, 15);
case 16:
return this->perfect(data, 16);
case 17:
return this->perfect(data, 17);
case 18:
return this->perfect(data, 18);
case 19:
return this->perfect(data, 19);
case 20:
return this->perfect(data, 20);
case 21:
return this->perfect(data, 21);
case 22:
return this->perfect(data, 22);
case 23:
return this->perfect(data, 23);
case 24:
return this->perfect(data, 24);
case 25:
return this->perfect(data, 25);
case 26:
return this->perfect(data, 26);
case 27:
return this->perfect(data, 27);
case 28:
return this->perfect(data, 28);
case 29:
return this->perfect(data, 29);
case 30:
return this->perfect(data, 30);
case 31:
return this->perfect(data, 31);
default:
// This case is specifically designed to be constant with regards to
// string length, and to exploit the fact that most JSON objects don't
// have a lot of entries, so hash collision is not as common
auto hash = this->perfect(data, 31);
hash.a |= 1 + (size + static_cast<typename hash_type::type>(data[0]) +
static_cast<typename hash_type::type>(data[size - 1])) %
// Make sure the property hash can never exceed 8 bits
255;
return hash;
}
}

[[nodiscard]]
inline auto is_perfect(const hash_type &hash) const noexcept -> bool {
// If there is anything written past the first byte,
Expand Down
21 changes: 20 additions & 1 deletion src/core/json/include/sourcemeta/core/json_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ template <typename Key, typename Value, typename Hash> class JSONObject {
return this->hasher(key);
}

/// Compute a hash from raw data
[[nodiscard]] inline auto hash(const char *raw_data,
const std::size_t raw_size) const noexcept
-> hash_type {
return hasher(raw_data, raw_size);
}

/// Attempt to find an entry by key
[[nodiscard]] inline auto find(const Key &key) const -> const_iterator {
const auto key_hash{this->hash(key)};
Expand Down Expand Up @@ -370,6 +377,18 @@ template <typename Key, typename Value, typename Hash> class JSONObject {
return key_hash;
}

/// Emplace an object property with a pre-computed hash
inline auto emplace_assume_new(Key &&key, mapped_type &&value,
const hash_type key_hash) -> void {
this->data.push_back({std::move(key), std::move(value), key_hash});
}

/// Emplace an object property with a pre-computed hash
inline auto emplace_assume_new(const Key &key, mapped_type &&value,
const hash_type key_hash) -> void {
this->data.push_back({key, std::move(value), key_hash});
}

/// Remove every property in the object
inline auto clear() noexcept -> void { this->data.clear(); }

Expand Down Expand Up @@ -442,7 +461,7 @@ template <typename Key, typename Value, typename Hash> class JSONObject {
#if defined(_MSC_VER)
#pragma warning(disable : 4251)
#endif
Hash hasher;
static constexpr Hash hasher{};
underlying_type data;
#if defined(_MSC_VER)
#pragma warning(default : 4251)
Expand Down
4 changes: 4 additions & 0 deletions src/core/json/include/sourcemeta/core/json_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON {
/// ```
auto assign_assume_new(String &&key, JSON &&value) -> void;

/// This method sets an object key with a pre-computed hash
auto assign_assume_new(String &&key, JSON &&value, Object::hash_type hash)
-> void;

/// This method deletes an object key. For example:
///
/// ```cpp
Expand Down
49 changes: 45 additions & 4 deletions src/core/json/json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <sourcemeta/core/json_error.h>
#include <sourcemeta/core/json_value.h>

#include "construct.h"
#include "parser.h"
#include "stringify.h"

Expand All @@ -15,9 +16,33 @@
#include <ostream> // std::basic_ostream
#include <sstream> // std::basic_ostringstream
#include <system_error> // std::make_error_code, std::errc
#include <vector> // std::vector

namespace sourcemeta::core {

static auto internal_parse_json(const char *&cursor, const char *end,
std::uint64_t &line, std::uint64_t &column,
const JSON::ParseCallback &callback,
const bool track_positions) -> JSON {
const char *buffer_start{cursor};
std::vector<TapeEntry> tape;
tape.reserve(static_cast<std::size_t>(end - cursor) / 8);
if (callback || track_positions) {
scan_json<true>(cursor, end, buffer_start, line, column, tape);
} else {
try {
scan_json<false>(cursor, end, buffer_start, line, column, tape);
} catch (const JSONParseError &) {
cursor = buffer_start;
tape.clear();
line = 1;
column = 0;
scan_json<true>(cursor, end, buffer_start, line, column, tape);
}
}
return construct_json(buffer_start, tape, callback);
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
auto parse_json(std::basic_istream<JSON::Char, JSON::CharTraits> &stream,
std::uint64_t &line, std::uint64_t &column,
Expand All @@ -28,7 +53,7 @@ auto parse_json(std::basic_istream<JSON::Char, JSON::CharTraits> &stream,
const auto input{buffer.str()};
const char *cursor{input.data()};
const char *end{input.data() + input.size()};
auto result{internal_parse_json(cursor, end, line, column, callback)};
auto result{internal_parse_json(cursor, end, line, column, callback, true)};
if (start_position != static_cast<std::streampos>(-1)) {
const auto consumed{static_cast<std::streamoff>(cursor - input.data())};
stream.clear();
Expand All @@ -41,22 +66,38 @@ auto parse_json(std::basic_istream<JSON::Char, JSON::CharTraits> &stream,
auto parse_json(const std::basic_string<JSON::Char, JSON::CharTraits> &input,
std::uint64_t &line, std::uint64_t &column,
const JSON::ParseCallback &callback) -> JSON {
return internal_parse_json(input, line, column, callback);
const char *cursor{input.data()};
return internal_parse_json(cursor, input.data() + input.size(), line, column,
callback, true);
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
auto parse_json(std::basic_istream<JSON::Char, JSON::CharTraits> &stream,
const JSON::ParseCallback &callback) -> JSON {
const auto start_position{stream.tellg()};
std::basic_ostringstream<JSON::Char, JSON::CharTraits> buffer;
buffer << stream.rdbuf();
const auto input{buffer.str()};
const char *cursor{input.data()};
const char *end{input.data() + input.size()};
std::uint64_t line{1};
std::uint64_t column{0};
return parse_json(stream, line, column, callback);
auto result{internal_parse_json(cursor, end, line, column, callback, false)};
if (start_position != static_cast<std::streampos>(-1)) {
const auto consumed{static_cast<std::streamoff>(cursor - input.data())};
stream.clear();
stream.seekg(start_position + consumed);
}
return result;
}

auto parse_json(const std::basic_string<JSON::Char, JSON::CharTraits> &input,
const JSON::ParseCallback &callback) -> JSON {
std::uint64_t line{1};
std::uint64_t column{0};
return parse_json(input, line, column, callback);
const char *cursor{input.data()};
return internal_parse_json(cursor, input.data() + input.size(), line, column,
callback, false);
}

auto read_json(const std::filesystem::path &path,
Expand Down
6 changes: 6 additions & 0 deletions src/core/json/json_value.cc
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,12 @@ auto JSON::assign_assume_new(JSON::String &&key, JSON &&value) -> void {
this->data_object.emplace_assume_new(std::move(key), std::move(value));
}

auto JSON::assign_assume_new(JSON::String &&key, JSON &&value,
Object::hash_type hash) -> void {
assert(this->is_object());
this->data_object.emplace_assume_new(std::move(key), std::move(value), hash);
}

auto JSON::erase(const JSON::String &key) -> typename Object::size_type {
assert(this->is_object());
return this->data_object.erase(key);
Expand Down
Loading