diff --git a/src/common/bytes_utils.h b/src/common/bytes_utils.h index 8aa3013..83828c8 100644 --- a/src/common/bytes_utils.h +++ b/src/common/bytes_utils.h @@ -30,7 +30,7 @@ inline constexpr size_t kSizePrefixBytes = sizeof(uint32_t); -// Utility functions for little-endian number reading and writing. +// Utility functions for little-endian number reading and writing (vectors and spans) inline void append_u32_le(std::vector& out, uint32_t v) { const size_t offset = out.size(); @@ -81,13 +81,6 @@ inline void write_u32_le_at(std::vector& buf, size_t offset, uint32_t v buf[offset + 3] = static_cast((v >> 24) & 0xFF); } -inline void write_u32_le(uint8_t* p, uint32_t v) { - p[0] = static_cast(v); - p[1] = static_cast(v >> 8); - p[2] = static_cast(v >> 16); - p[3] = static_cast(v >> 24); -} - inline uint32_t read_u32_le(const std::vector& in, size_t offset) { return static_cast(in[offset]) | (static_cast(in[offset + 1]) << 8) | @@ -102,10 +95,114 @@ inline uint32_t read_u32_le(tcb::span in, size_t offset) { (static_cast(in[offset + 3]) << 24); } +// Utility functions for reading and writing with templated types. + +template +inline T read_le(const uint8_t* p) { + if constexpr (std::is_same_v) { + const uint32_t v = + (static_cast(p[0]) ) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24); + return static_cast(v); + } else if constexpr (std::is_same_v) { + const uint64_t v = + (static_cast(p[0]) ) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24) | + (static_cast(p[4]) << 32) | + (static_cast(p[5]) << 40) | + (static_cast(p[6]) << 48) | + (static_cast(p[7]) << 56); + return static_cast(v); + } else if constexpr (std::is_same_v) { + const uint32_t bits = + (static_cast(p[0]) ) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24); + float value; + std::memcpy(&value, &bits, sizeof(value)); + return value; + } else if constexpr (std::is_same_v) { + const uint64_t bits = + (static_cast(p[0]) ) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24) | + (static_cast(p[4]) << 32) | + (static_cast(p[5]) << 40) | + (static_cast(p[6]) << 48) | + (static_cast(p[7]) << 56); + double value; + std::memcpy(&value, &bits, sizeof(value)); + return value; + } else { + throw InvalidInputException("read_le: unsupported type"); + } +} + +template +inline void write_le(const T& value, uint8_t* p) { + if constexpr (std::is_same_v) { + const uint32_t v = static_cast(value); + p[0] = static_cast( v & 0xFF); + p[1] = static_cast((v >> 8) & 0xFF); + p[2] = static_cast((v >> 16) & 0xFF); + p[3] = static_cast((v >> 24) & 0xFF); + } else if constexpr (std::is_same_v) { + const uint64_t v = static_cast(value); + p[0] = static_cast( v & 0xFF); + p[1] = static_cast((v >> 8) & 0xFF); + p[2] = static_cast((v >> 16) & 0xFF); + p[3] = static_cast((v >> 24) & 0xFF); + p[4] = static_cast((v >> 32) & 0xFF); + p[5] = static_cast((v >> 40) & 0xFF); + p[6] = static_cast((v >> 48) & 0xFF); + p[7] = static_cast((v >> 56) & 0xFF); + } else if constexpr (std::is_same_v) { + uint32_t bits; + std::memcpy(&bits, &value, sizeof(bits)); + p[0] = static_cast( bits & 0xFF); + p[1] = static_cast((bits >> 8) & 0xFF); + p[2] = static_cast((bits >> 16) & 0xFF); + p[3] = static_cast((bits >> 24) & 0xFF); + } else if constexpr (std::is_same_v) { + uint64_t bits; + std::memcpy(&bits, &value, sizeof(bits)); + p[0] = static_cast( bits & 0xFF); + p[1] = static_cast((bits >> 8) & 0xFF); + p[2] = static_cast((bits >> 16) & 0xFF); + p[3] = static_cast((bits >> 24) & 0xFF); + p[4] = static_cast((bits >> 32) & 0xFF); + p[5] = static_cast((bits >> 40) & 0xFF); + p[6] = static_cast((bits >> 48) & 0xFF); + p[7] = static_cast((bits >> 56) & 0xFF); + } else { + throw InvalidInputException("write_le: unsupported type"); + } +} + +// Utility functions for little-endian number reading and writing UINT32 values. +// +// Since UINT32 functions are called heavily in tight loops and hot execution paths to read/write sizes, +// we keep them separate from other functions so they easy to identify in the library code and can be +// optimatized separately. This results in some code duplication. + +inline void write_u32_le(uint8_t* p, uint32_t v) { + p[0] = static_cast(v); + p[1] = static_cast(v >> 8); + p[2] = static_cast(v >> 16); + p[3] = static_cast(v >> 24); +} + inline uint32_t read_u32_le(const uint8_t* p) { - uint32_t v; - std::memcpy(&v, p, sizeof(v)); - return v; + return static_cast(p[0]) | + (static_cast(p[1]) << 8) | + (static_cast(p[2]) << 16) | + (static_cast(p[3]) << 24); } // Utility functions for splitting and joining byte vectors. @@ -303,7 +400,12 @@ inline std::string AddStringAttribute( return value; } -// Helper function to convert string to binary data +// Helper function to convert string to binary data and vice versa + inline std::vector StringToBytes(const std::string& str) { return std::vector(str.begin(), str.end()); } + +inline std::string BytesToString(tcb::span span) { + return std::string(reinterpret_cast(span.data()), span.size()); +} diff --git a/src/common/bytes_utils_test.cpp b/src/common/bytes_utils_test.cpp index 87a6a38..2c9c432 100644 --- a/src/common/bytes_utils_test.cpp +++ b/src/common/bytes_utils_test.cpp @@ -18,6 +18,8 @@ #include "bytes_utils.h" #include "exceptions.h" +#include +#include #include #include #include @@ -284,4 +286,202 @@ TEST(BytesUtils, StringToBytes_PreservesRawBytesAndNulls) { static_cast('Z')}; EXPECT_EQ(expected, result); -} \ No newline at end of file +} + +TEST(BytesUtils, BytesToString_ConvertsAsciiEmptyAndRawBytes) { + { + const std::vector bytes = {'d', 'b', 'p', 's'}; + const std::string result = BytesToString(tcb::span(bytes)); + EXPECT_EQ(result, "dbps"); + } + + { + const std::vector bytes; + const std::string result = BytesToString(tcb::span(bytes)); + EXPECT_TRUE(result.empty()); + } + + { + const std::vector bytes = { + static_cast('D'), + static_cast('B'), + static_cast('P'), + static_cast('S'), + static_cast(0x00), + static_cast('X'), + static_cast('Y'), + static_cast(0xFF), + static_cast(0x80), + static_cast(0x00), + static_cast('Z')}; + const std::string result = BytesToString(tcb::span(bytes)); + const std::string expected = std::string{ + 'D', 'B', 'P', 'S', '\0', 'X', 'Y', + static_cast(0xFF), static_cast(0x80), '\0', 'Z'}; + EXPECT_EQ(result.size(), expected.size()); + EXPECT_EQ(result, expected); + } +} + +TEST(BytesUtils, ReadU32Le_FromPointer_DecodesLittleEndianBytes) { + const std::array bytes = {0x78, 0x56, 0x34, 0x12}; + const uint32_t value = read_u32_le(bytes.data()); + + EXPECT_EQ(value, 0x12345678u); +} + +TEST(BytesUtils, ReadLeWriteLe_Int32_RoundTrip) { + const int32_t original = -2147483000; + std::array bytes{}; + + write_le(original, bytes.data()); + const int32_t decoded = read_le(bytes.data()); + + EXPECT_EQ(decoded, original); +} + +TEST(BytesUtils, ReadLeWriteLe_Int64_RoundTrip) { + const int64_t original = -9223372036854000000LL; + std::array bytes{}; + + write_le(original, bytes.data()); + const int64_t decoded = read_le(bytes.data()); + + EXPECT_EQ(decoded, original); +} + +TEST(BytesUtils, ReadLeWriteLe_Float_RoundTrip) { + const float original = -12345.625f; + std::array bytes{}; + + write_le(original, bytes.data()); + const float decoded = read_le(bytes.data()); + + EXPECT_FLOAT_EQ(decoded, original); +} + +TEST(BytesUtils, ReadLeWriteLe_Double_RoundTrip) { + const double original = 9876543210.125; + std::array bytes{}; + + write_le(original, bytes.data()); + const double decoded = read_le(bytes.data()); + + EXPECT_DOUBLE_EQ(decoded, original); +} + +TEST(BytesUtils, WriteReadU32Le_OffsetRoundTrip_VerifiesBytesAndGuards) { + constexpr size_t kPrefix = 3u; + constexpr size_t kValueSize = sizeof(uint32_t); + constexpr size_t kSuffix = 5u; + std::array bytes; + bytes.fill(0xDD); + + const uint32_t original = 0xD3A5C79Eu; + write_u32_le(bytes.data() + kPrefix, original); + const uint32_t decoded = read_u32_le(bytes.data() + kPrefix); + + for (size_t i = 0; i < kPrefix; ++i) { + EXPECT_EQ(bytes[i], 0xDD); + } + for (size_t i = 0; i < kSuffix; ++i) { + EXPECT_EQ(bytes[kPrefix + kValueSize + i], 0xDD); + } + EXPECT_EQ(bytes[kPrefix + 0], 0x9E); + EXPECT_EQ(bytes[kPrefix + 1], 0xC7); + EXPECT_EQ(bytes[kPrefix + 2], 0xA5); + EXPECT_EQ(bytes[kPrefix + 3], 0xD3); + EXPECT_EQ(decoded, original); +} + +TEST(BytesUtils, WriteReadLeInt32_OffsetRoundTrip_VerifiesBytesAndGuards) { + constexpr size_t kPrefix = 3u; + constexpr size_t kValueSize = sizeof(int32_t); + constexpr size_t kSuffix = 5u; + std::array bytes; + bytes.fill(0xDD); + + const int32_t original = 0x6E91A2F3; + write_le(original, bytes.data() + kPrefix); + const int32_t decoded = read_le(bytes.data() + kPrefix); + + for (size_t i = 0; i < kPrefix; ++i) { + EXPECT_EQ(bytes[i], 0xDD); + } + for (size_t i = 0; i < kSuffix; ++i) { + EXPECT_EQ(bytes[kPrefix + kValueSize + i], 0xDD); + } + EXPECT_EQ(bytes[kPrefix + 0], 0xF3); + EXPECT_EQ(bytes[kPrefix + 1], 0xA2); + EXPECT_EQ(bytes[kPrefix + 2], 0x91); + EXPECT_EQ(bytes[kPrefix + 3], 0x6E); + EXPECT_EQ(decoded, original); +} + +TEST(BytesUtils, WriteReadLeInt64_OffsetRoundTrip_VerifiesBytesAndGuards) { + constexpr size_t kPrefix = 3u; + constexpr size_t kValueSize = sizeof(int64_t); + constexpr size_t kSuffix = 5u; + std::array bytes; + bytes.fill(0xDD); + + const int64_t original = 0x0102030405060708LL; + write_le(original, bytes.data() + kPrefix); + const int64_t decoded = read_le(bytes.data() + kPrefix); + + for (size_t i = 0; i < kPrefix; ++i) { + EXPECT_EQ(bytes[i], 0xDD); + } + for (size_t i = 0; i < kSuffix; ++i) { + EXPECT_EQ(bytes[kPrefix + kValueSize + i], 0xDD); + } + EXPECT_EQ(bytes[kPrefix + 0], 0x08); + EXPECT_EQ(bytes[kPrefix + 1], 0x07); + EXPECT_EQ(bytes[kPrefix + 2], 0x06); + EXPECT_EQ(bytes[kPrefix + 3], 0x05); + EXPECT_EQ(bytes[kPrefix + 4], 0x04); + EXPECT_EQ(bytes[kPrefix + 5], 0x03); + EXPECT_EQ(bytes[kPrefix + 6], 0x02); + EXPECT_EQ(bytes[kPrefix + 7], 0x01); + EXPECT_EQ(decoded, original); +} + +TEST(BytesUtils, WriteReadLeFloat_OffsetRoundTrip_VerifiesBytesAndGuards) { + constexpr size_t kPrefix = 3u; + constexpr size_t kValueSize = sizeof(float); + constexpr size_t kSuffix = 5u; + std::array bytes; + bytes.fill(0xDD); + + constexpr float kOriginal = -3.1415927f; + write_le(kOriginal, bytes.data() + kPrefix); + const float decoded = read_le(bytes.data() + kPrefix); + + for (size_t i = 0; i < kPrefix; ++i) { + EXPECT_EQ(bytes[i], 0xDD); + } + for (size_t i = 0; i < kSuffix; ++i) { + EXPECT_EQ(bytes[kPrefix + kValueSize + i], 0xDD); + } + EXPECT_FLOAT_EQ(decoded, kOriginal); +} + +TEST(BytesUtils, WriteReadLeDouble_OffsetRoundTrip_VerifiesBytesAndGuards) { + constexpr size_t kPrefix = 3u; + constexpr size_t kValueSize = sizeof(double); + constexpr size_t kSuffix = 5u; + std::array bytes; + bytes.fill(0xDD); + + constexpr double kOriginal = -3.141592653589793; + write_le(kOriginal, bytes.data() + kPrefix); + const double decoded = read_le(bytes.data() + kPrefix); + + for (size_t i = 0; i < kPrefix; ++i) { + EXPECT_EQ(bytes[i], 0xDD); + } + for (size_t i = 0; i < kSuffix; ++i) { + EXPECT_EQ(bytes[kPrefix + kValueSize + i], 0xDD); + } + EXPECT_DOUBLE_EQ(decoded, kOriginal); +} diff --git a/src/processing/typed_buffer_codecs.h b/src/processing/typed_buffer_codecs.h index 9e165fd..b3951d6 100644 --- a/src/processing/typed_buffer_codecs.h +++ b/src/processing/typed_buffer_codecs.h @@ -22,10 +22,19 @@ #include #include #include +#include "bytes_utils.h" #include "exceptions.h" namespace dbps::processing { +// The values in Int96 of low/mid/hi are stored in little endian order. +// The order of low/mid/hi in the C++ struct should be kept, otherwise the codec will yield incorrect values. +struct Int96 { + int32_t lo; + int32_t mid; + int32_t hi; +}; + template struct PlainValueCodec { using value_type = T; @@ -43,21 +52,53 @@ struct PlainValueCodec { return sizeof(T); } - // TODO: Make explicit endianness conversions to prevent architecture/in-memory representation incompatibility issues. - value_type Decode(tcb::span read_span) const { + inline value_type Decode(tcb::span read_span) const { if (read_span.size() != sizeof(T)) { throw InvalidInputException("Decode: read_span size does not match sizeof(T)"); } - T value; - std::memcpy(&value, read_span.data(), sizeof(T)); - return value; + return read_le(read_span.data()); } - void Encode(const value_type& value, tcb::span write_span) const { + inline void Encode(const value_type& value, tcb::span write_span) const { if (write_span.size() != sizeof(T)) { throw InvalidInputException("Encode: write_span size does not match sizeof(T)"); } - std::memcpy(write_span.data(), &value, sizeof(T)); + write_le(value, write_span.data()); + } +}; + +struct Int96Codec { + using value_type = Int96; + static constexpr bool is_fixed_sized = true; + static constexpr size_t kI32Size = sizeof(int32_t); + + static constexpr std::string_view type_name() noexcept { + return "INT96"; + } + + static constexpr size_t element_size() noexcept { + return sizeof(Int96); + } + + inline value_type Decode(tcb::span read_span) const { + if (read_span.size() != sizeof(Int96)) { + throw InvalidInputException("Decode: read_span size does not match Int96 element size"); + } + const uint8_t* p = read_span.data(); + return Int96{ + read_le(p + 0 * kI32Size), + read_le(p + 1 * kI32Size), + read_le(p + 2 * kI32Size)}; + } + + inline void Encode(const value_type& value, tcb::span write_span) const { + if (write_span.size() != sizeof(Int96)) { + throw InvalidInputException("Encode: write_span size does not match Int96 element size"); + } + uint8_t* p = write_span.data(); + write_le(value.lo, p + 0 * kI32Size); + write_le(value.mid, p + 1 * kI32Size); + write_le(value.hi, p + 2 * kI32Size); } }; @@ -79,11 +120,11 @@ struct RawBytesFixedSizedCodec { return element_size_bytes_; } - value_type Decode(tcb::span read_span) const noexcept { + inline value_type Decode(tcb::span read_span) const noexcept { return read_span; } - void Encode(const value_type& value, tcb::span write_span) const { + inline void Encode(const value_type& value, tcb::span write_span) const { if (value.size() != write_span.size()) { throw InvalidInputException("Encode: value size does not match write_span size"); } @@ -106,11 +147,11 @@ struct RawBytesVariableSizedCodec { throw InvalidInputException("RawBytesVariableSizedCodec does not have a fixed element size"); } - value_type Decode(tcb::span read_span) const noexcept { + inline value_type Decode(tcb::span read_span) const noexcept { return read_span; } - void Encode(const value_type& value, tcb::span write_span) const { + inline void Encode(const value_type& value, tcb::span write_span) const { if (value.size() != write_span.size()) { throw InvalidInputException("Encode: value size does not match write_span size"); } diff --git a/src/processing/typed_buffer_testing_codecs.h b/src/processing/typed_buffer_testing_codecs.h index 5f2f985..96f2af2 100644 --- a/src/processing/typed_buffer_testing_codecs.h +++ b/src/processing/typed_buffer_testing_codecs.h @@ -50,7 +50,7 @@ struct StringFixedSizedCodec { return element_size_bytes_; } - value_type Decode(tcb::span read_span) const { + inline value_type Decode(tcb::span read_span) const { if (read_span.size() != element_size_bytes_) { throw InvalidInputException("Decode: read_span size does not match element_size_bytes"); } @@ -59,7 +59,7 @@ struct StringFixedSizedCodec { read_span.size()); } - void Encode(const value_type& value, tcb::span write_span) const { + inline void Encode(const value_type& value, tcb::span write_span) const { if (write_span.size() != element_size_bytes_) { throw InvalidInputException("Encode: write_span size does not match element_size_bytes"); } @@ -85,13 +85,13 @@ struct StringVariableSizedCodec { throw InvalidInputException("StringVariableSizedCodec does not have a fixed element size"); } - value_type Decode(tcb::span read_span) const noexcept { + inline value_type Decode(tcb::span read_span) const noexcept { return std::string_view( reinterpret_cast(read_span.data()), read_span.size()); } - void Encode(const value_type& value, tcb::span write_span) const { + inline void Encode(const value_type& value, tcb::span write_span) const { // Exact match required to prevent short values leaving stale trailing bytes, // and to prevent longer values from overflowing. if (value.size() != write_span.size()) { diff --git a/src/processing/typed_buffer_values.h b/src/processing/typed_buffer_values.h index 7a71285..481fe55 100644 --- a/src/processing/typed_buffer_values.h +++ b/src/processing/typed_buffer_values.h @@ -20,28 +20,22 @@ #include #include #include +#include "../common/bytes_utils.h" #include "typed_buffer_codecs.h" #include "typed_buffer.h" namespace dbps::processing { -struct Int96 { - int32_t lo; - int32_t mid; - int32_t hi; -}; - inline constexpr char kI32TypeName[] = "INT32"; inline constexpr char kI64TypeName[] = "INT64"; inline constexpr char kF32TypeName[] = "FLOAT"; inline constexpr char kF64TypeName[] = "DOUBLE"; -inline constexpr char kInt96TypeName[] = "INT96"; using TypedBufferI32 = ByteBuffer>; using TypedBufferI64 = ByteBuffer>; using TypedBufferFloat = ByteBuffer>; using TypedBufferDouble = ByteBuffer>; -using TypedBufferInt96 = ByteBuffer>; +using TypedBufferInt96 = ByteBuffer; using TypedBufferRawBytesFixedSized = ByteBuffer; using TypedBufferRawBytesVariableSized = ByteBuffer; @@ -63,10 +57,12 @@ inline std::string PrintableTypedValuesBuffer(const TypedValuesBuffer& buffer) { std::ostringstream out; const size_t num_elements = typed_buffer.GetNumElements(); + const size_t max_printable_elements = 20; + const size_t elements_to_print = std::min(num_elements, max_printable_elements); out << BufferType::type_name() << " (" << num_elements << " elements):\n"; - for (size_t i = 0; i < num_elements; ++i) { + for (size_t i = 0; i < elements_to_print; ++i) { const auto element = typed_buffer.GetElement(i); if constexpr (std::is_same_v) { out << " [" << i << "] [" << element.lo << ", " @@ -75,12 +71,19 @@ inline std::string PrintableTypedValuesBuffer(const TypedValuesBuffer& buffer) { out << " [" << i << "] \"" << element << "\" (length: " << element.size() << ")\n"; } else if constexpr (std::is_same_v>) { - out << " [" << i << "] <" << element.size() << " bytes>\n"; + auto printable_span = BytesToString(element); + out << " [" << i << "] \"" << printable_span + << "\" (length: " << element.size() << " bytes)\n"; } else { out << " [" << i << "] " << element << "\n"; } } + if (num_elements > max_printable_elements) { + out << " ... output truncated, showing first " << max_printable_elements + << " of " << num_elements << " elements\n"; + } + return out.str(); }, buffer); } diff --git a/src/processing/typed_buffer_values_test.cpp b/src/processing/typed_buffer_values_test.cpp index bcb4394..00f4f17 100644 --- a/src/processing/typed_buffer_values_test.cpp +++ b/src/processing/typed_buffer_values_test.cpp @@ -18,6 +18,7 @@ #include "typed_buffer_values.h" #include "typed_buffer_testing_codecs.h" +#include #include #include #include @@ -154,6 +155,27 @@ void ExpectInt96Eq(const Int96& actual, const Int96& expected) { } } // namespace +TEST(TypedBufferValuesTest, Int96Codec_ElementSizeAndTypeName) { + const Int96Codec codec; + EXPECT_EQ(codec.element_size(), 12u); + EXPECT_EQ(codec.type_name(), "INT96"); +} + +TEST(TypedBufferValuesTest, Int96Codec_DecodeWrongSize_Throws) { + const Int96Codec codec; + const std::array bytes = {0}; + + EXPECT_THROW((void)codec.Decode(tcb::span(bytes)), InvalidInputException); +} + +TEST(TypedBufferValuesTest, Int96Codec_EncodeWrongSize_Throws) { + const Int96Codec codec; + const Int96 value{1, 2, 3}; + std::array bytes = {0}; + + EXPECT_THROW(codec.Encode(value, tcb::span(bytes)), InvalidInputException); +} + TEST(TypedBufferValuesTest, Int96_ReadBack) { const Int96 a{1, 2, 3}; const Int96 b{-1, 0, 2147483647};