Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/client/dbps_api_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ using namespace dbps::external;
using namespace dbps::enum_utils;

// Generate a simple unique reference ID using timestamp
// TODO: Potentially not-unique if concurrent calls are made on the same millisecond.
// Can use atomic counters but may not be an issue to justify the complexity.

// Potential improvement:
// - Potentially not-unique if concurrent calls are made on the same millisecond.
// - Can use atomic counters but may not be an issue to justify the complexity.
std::string GenerateReferenceId() {
auto now = std::chrono::system_clock::now();
auto timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(
Expand Down
6 changes: 3 additions & 3 deletions src/common/dbpa_remote.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ std::unique_ptr<DecryptionResult> RemoteDataBatchProtectionAgent::Decrypt(span<c
);

// Validate that response fields match request fields
// TODO: Add validation for encoding when these are expanded beyond PLAIN.
// Potential improvement: Add validation for encoding when these are expanded beyond PLAIN.
if (response.Success()) {
const auto& response_attrs = response.GetResponseAttributes();

Expand Down Expand Up @@ -452,7 +452,7 @@ std::shared_ptr<HttpClientBase> RemoteDataBatchProtectionAgent::InstantiateHttpC
// get the client for the given server_url_ with configured number of worker threads
std::size_t num_worker_threads = ExtractNumWorkerThreads(*config_json_opt);

// TODO: Split credentials config file key and credentials file from connection config.
// Potential improvement: Split credentials config file key and credentials file from connection config.
HttpClientBase::ClientCredentials credentials = ExtractClientCredentials(*config_json_opt);

std::shared_ptr<HttpClientBase> http_client =
Expand Down Expand Up @@ -549,7 +549,7 @@ HttplibPoolRegistry::PoolConfig RemoteDataBatchProtectionAgent::ExtractPoolConfi
pool_config.write_timeout = std::chrono::seconds(
get_int_or_default(config_json, "connection_pool.write_timeout_seconds", HttplibPoolRegistry::kDefaultWriteTimeout_s.count()));

//TODO: find a better way to log this.
//Potential improvement: find a better way to log this.
// Log the configured pool values for observability
std::cerr << "INFO: RemoteDataBatchProtectionAgent::init() - HTTP pool config {"
<< " max_pool_size=" << pool_config.max_pool_size
Expand Down
6 changes: 4 additions & 2 deletions src/common/dbpa_remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,10 @@ class DBPS_EXPORT RemoteDataBatchProtectionAgent : public DataBatchProtectionAge
// Constructor with HTTP client passed. Creates API_client immediately on the contructor.
explicit RemoteDataBatchProtectionAgent(std::shared_ptr<HttpClientBase> http_client);

// TODO: Split credentials config file key and credentials file from connection config.
// Currently these live on the same file, but should be separate as they may have different access permissions.
// Potential improvement:
// - Split credentials config file key and credentials file from connection config.
// - Currently these live on the same file, but should be separate as they may have different access permissions.

// Configuration map keys
inline static const std::string k_connection_config_key_ = "connection_config_file_path";

Expand Down
3 changes: 2 additions & 1 deletion src/common/json_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,9 @@ std::string EncodeBase64Safe(const std::vector<uint8_t>& data) {
}
}

// Potential improvement: Unify usage of nlohmann or crow json for parsing and dumping.

// Allow strings or nested structures in the JSON payload for authentication credentials.
// TODO(Issue #205): Unify usage of nlohmann or crow json for parsing and dumping.
std::optional<std::string> TokenRequest::ParseWithError(const std::string& request_body) {
credential_values_.clear();

Expand Down
5 changes: 3 additions & 2 deletions src/processing/parquet_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ int CalculateLevelBytesLength(tcb::span<const uint8_t> raw,
int32_t def_level_length = std::get<int32_t>(encoding_attribs.at("page_v2_definition_levels_byte_length"));
int32_t rep_level_length = std::get<int32_t>(encoding_attribs.at("page_v2_repetition_levels_byte_length"));
total_level_bytes = def_level_length + rep_level_length;

} else if (page_type == "DATA_PAGE_V1") {
// Check that encoding types are RLE (instead of BIT_PACKED which is deprecated)
const std::string& rep_encoding = std::get<std::string>(encoding_attribs.at("page_v1_repetition_level_encoding"));
Expand Down Expand Up @@ -301,7 +302,7 @@ LevelAndValueBytes DecompressAndSplit(
auto page_type = std::get<std::string>(encoding_attributes.at("page_type"));

// On DATA_PAGE_V1, the whole payload is compressed.
// So the split of level and value byte requires to
// So the split of level and value byte requires to:
// (1) decompress the whole payload, (2) calculate length of level bytes, (3) split into level and value bytes.
if (page_type == "DATA_PAGE_V1") {
auto decompressed_bytes = Decompress(plaintext, compression);
Expand Down Expand Up @@ -330,7 +331,7 @@ LevelAndValueBytes DecompressAndSplit(
}

// On DATA_PAGE_V2, only the value bytes are compressed.
// So the split of level and value byte requires to
// So the split of level and value byte requires to:
// (1) calculate length of level bytes, (2) split into level, (3) decompress only the value bytes.
if (page_type == "DATA_PAGE_V2") {
int leading_bytes_to_strip = CalculateLevelBytesLength(
Expand Down
15 changes: 4 additions & 11 deletions src/processing/typed_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ class ByteBuffer {

// Attribute for the number of elements in the buffer (a const)
// - `num_elements_` is a const and passed during construction of both read-only and write buffers.
// - It indicates the expected number of elements in the buffer payload declared upfront.
// - This is treated as an invariant, so if the payload count mismatches, exceptions are thrown.
// - It indicates the expected number of elements in the buffer payload.
// - This is treated as an invariant. If the actual payload count mismatches, exceptions are thrown.
const size_t num_elements_;

// Variables for element span iterator.
Expand Down Expand Up @@ -155,7 +155,7 @@ ByteBuffer<Codec>::ByteBuffer(
Codec codec)
: elements_span_(elements_span),
elements_span_size_(elements_span.size()),
// `num_elements_` is the expected number of elements in the buffer declared upfront.
// `num_elements_` is the expected number of elements in the buffer.
// - if the actual payload count mismatches, exceptions are thrown.
num_elements_(num_elements),
codec_(std::move(codec)),
Expand Down Expand Up @@ -206,15 +206,12 @@ inline void ByteBuffer<Codec>::InitializeFromSpan() const {
// Fixed-size layout has implicit offsets from index * element_size.
// We validate shape and derive element count. No need to store offsets.
if constexpr (is_fixed_sized) {
if (element_size_ <= 0) {
throw InvalidInputException("Invalid fixed-size buffer: element_size must be greater than zero");
}
if ((readable_size % element_size_) != 0) {
throw InvalidInputException("Malformed fixed-size buffer: buffer does not align with element_size");
}

// Check if the num_elements passed at contruction time coincides with the calculated from the payload size.
// This is a division of integer values, however it results in a correct integer result because of the modulo guard above.
// Although this is a division of integers, the result is an integer (no remainder) because of the modulo guard above.
const size_t num_elements_on_payload = readable_size / element_size_;
if (num_elements_on_payload != num_elements_) {
throw InvalidInputException("Malformed fixed-size buffer: num_elements on payload != num_elements_ expected.");
Expand Down Expand Up @@ -454,10 +451,6 @@ template <class Codec>
inline void ByteBuffer<Codec>::InitializeForWriteBuffer(size_t variable_size_reserved_bytes_hint) {
// Fixed-size elements
if constexpr (is_fixed_sized) {
if (element_size_ <= 0) {
throw InvalidInputException("Invalid fixed-size buffer: element_size must be greater than zero");
}

// write_buffer can be allocated to precise size since the element size and number of elements are known.
// We initialize it to 0s to have random-ish access during writes.
const size_t fixed_size_total_bytes = prefix_size_ + (num_elements_ * element_size_);
Expand Down
5 changes: 4 additions & 1 deletion src/server/dbps_api_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,11 @@ int main(int argc, char* argv[]) {
// Initialize credentials file path and JWT secret key with parsed command line options
std::optional<std::string> credentials_file_path = std::nullopt;
std::string jwt_secret_key = "default-secret-key-overwritten-by-command-line";
// TODO: Flip allow_missing_credentials=False when instructions are updated with the cmdline flags to run the server.

// `allow_missing_credentials` is set to true to allow a missing credentials file to be used.
// This is useful for development and testing purposes, but should be set to false in production.
bool allow_missing_credentials = true;

try {
cxxopts::Options options("dbps_api_server", "Data Batch Protection Service API Server");
options.add_options()
Expand Down
Loading