From 2283a9e90b13b6fc2b2fe4f8a11afa9917e85dde Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 25 May 2026 18:43:07 +0800 Subject: [PATCH 1/9] feat(blob): Support blob-descriptor-field for inline blob descriptor storage --- include/paimon/data/blob.h | 3 +- include/paimon/defs.h | 2 +- src/paimon/CMakeLists.txt | 3 + src/paimon/common/data/blob.cpp | 5 +- src/paimon/common/data/blob_descriptor.h | 2 +- src/paimon/common/data/blob_test.cpp | 14 +- src/paimon/common/data/blob_utils.cpp | 80 +- src/paimon/common/data/blob_utils.h | 23 +- src/paimon/common/data/blob_utils_test.cpp | 188 ++- src/paimon/core/append/append_only_writer.cpp | 83 +- src/paimon/core/append/append_only_writer.h | 9 +- .../casting/binary_to_blob_cast_executor.cpp | 81 ++ .../casting/binary_to_blob_cast_executor.h | 42 + .../core/casting/cast_executor_factory.cpp | 3 + .../casting/cast_executor_factory_test.cpp | 8 + .../core/casting/cast_executor_test.cpp | 38 + src/paimon/core/io/data_file_path_factory.h | 6 + .../core/io/data_file_path_factory_test.cpp | 15 + .../core/io/external_storage_blob_writer.cpp | 229 ++++ .../core/io/external_storage_blob_writer.h | 110 ++ .../io/external_storage_blob_writer_test.cpp | 150 ++ .../core/io/field_mapping_reader_test.cpp | 40 +- .../core/io/rolling_blob_file_writer.cpp | 5 +- src/paimon/core/io/rolling_blob_file_writer.h | 5 +- .../core/operation/abstract_split_read.cpp | 12 +- .../append_only_file_store_write.cpp | 8 +- .../operation/append_only_file_store_write.h | 1 - src/paimon/core/operation/file_store_scan.cpp | 5 +- src/paimon/core/schema/schema_validation.cpp | 8 +- src/paimon/core/utils/field_mapping.cpp | 43 +- src/paimon/core/utils/field_mapping.h | 11 +- src/paimon/core/utils/field_mapping_test.cpp | 70 +- .../format/avro/avro_direct_encoder.cpp | 9 +- .../avro/avro_file_batch_reader_test.cpp | 50 + .../format/avro/avro_schema_converter.cpp | 1 + .../format/avro/avro_stats_extractor.cpp | 1 + .../blob/blob_file_batch_reader_test.cpp | 7 +- src/paimon/format/blob/blob_format_writer.cpp | 54 +- src/paimon/format/blob/blob_format_writer.h | 21 +- .../format/blob/blob_format_writer_test.cpp | 125 +- src/paimon/format/blob/blob_writer_builder.h | 14 +- .../format/blob/blob_writer_builder_test.cpp | 37 + src/paimon/format/orc/orc_adapter.cpp | 4 + src/paimon/format/orc/orc_adapter_test.cpp | 44 +- .../format/orc/orc_file_batch_reader_test.cpp | 44 + .../parquet_file_batch_reader_test.cpp | 46 + src/paimon/testing/utils/test_helper.h | 11 +- test/inte/blob_table_inte_test.cpp | 1215 ++++++++++++++--- 48 files changed, 2590 insertions(+), 395 deletions(-) create mode 100644 src/paimon/core/casting/binary_to_blob_cast_executor.cpp create mode 100644 src/paimon/core/casting/binary_to_blob_cast_executor.h create mode 100644 src/paimon/core/io/external_storage_blob_writer.cpp create mode 100644 src/paimon/core/io/external_storage_blob_writer.h create mode 100644 src/paimon/core/io/external_storage_blob_writer_test.cpp diff --git a/include/paimon/data/blob.h b/include/paimon/data/blob.h index 9396bbb95..aa0cb54cf 100644 --- a/include/paimon/data/blob.h +++ b/include/paimon/data/blob.h @@ -97,7 +97,8 @@ class PAIMON_EXPORT Blob { /// @param metadata A map of key-value metadata to be attached to the field. /// @return A result containing a unique pointer to the generated `ArrowSchema` or an error. static Result> ArrowField( - const std::string& field_name, std::unordered_map metadata = {}); + const std::string& field_name, bool nullable = false, + std::unordered_map metadata = {}); private: class Impl; diff --git a/include/paimon/defs.h b/include/paimon/defs.h index 400e59e84..70f2aa0b0 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -365,7 +365,7 @@ struct PAIMON_EXPORT Options { /// "partition.legacy-name" - The legacy partition name is using `ToString` for all types. If /// false, using casting to string for all types. Default value is "true". static const char PARTITION_GENERATE_LEGACY_NAME[]; - /// "blob-as-descriptor" - Read and write blob field using blob descriptor rather than blob + /// "blob-as-descriptor" - Read blob field using blob descriptor rather than blob /// bytes. Default value is "false". static const char BLOB_AS_DESCRIPTOR[]; /// "blob-field" - Specifies column names that should be stored as blob type. This is used diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 5d39247d7..6d2c2697d 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -159,6 +159,7 @@ set(PAIMON_CORE_SRCS core/bucket/hive_bucket_function.cpp core/bucket/mod_bucket_function.cpp core/bucket/bucket_id_calculator.cpp + core/casting/binary_to_blob_cast_executor.cpp core/casting/binary_to_string_cast_executor.cpp core/casting/boolean_to_decimal_cast_executor.cpp core/casting/boolean_to_numeric_cast_executor.cpp @@ -222,6 +223,7 @@ set(PAIMON_CORE_SRCS core/io/key_value_meta_projection_consumer.cpp core/io/key_value_projection_consumer.cpp core/io/key_value_projection_reader.cpp + core/io/external_storage_blob_writer.cpp core/io/multiple_blob_file_writer.cpp core/io/rolling_blob_file_writer.cpp core/manifest/file_kind.cpp @@ -603,6 +605,7 @@ if(PAIMON_BUILD_TESTS) core/io/file_index_evaluator_test.cpp core/io/single_file_writer_test.cpp core/io/rolling_blob_file_writer_test.cpp + core/io/external_storage_blob_writer_test.cpp core/global_index/indexed_split_test.cpp core/manifest/file_source_test.cpp core/manifest/file_kind_test.cpp diff --git a/src/paimon/common/data/blob.cpp b/src/paimon/common/data/blob.cpp index d5b500dd5..336f89641 100644 --- a/src/paimon/common/data/blob.cpp +++ b/src/paimon/common/data/blob.cpp @@ -106,8 +106,9 @@ Result> Blob::ToData(const std::shared_ptr& } Result> Blob::ArrowField( - const std::string& field_name, std::unordered_map metadata) { - auto blob_field = BlobUtils::ToArrowField(field_name, /*nullable=*/false, metadata); + const std::string& field_name, bool nullable, + std::unordered_map metadata) { + auto blob_field = BlobUtils::ToArrowField(field_name, nullable, metadata); auto field = std::make_unique<::ArrowSchema>(); PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportField(*blob_field, field.get())); return field; diff --git a/src/paimon/common/data/blob_descriptor.h b/src/paimon/common/data/blob_descriptor.h index 526a1050a..090577787 100644 --- a/src/paimon/common/data/blob_descriptor.h +++ b/src/paimon/common/data/blob_descriptor.h @@ -38,7 +38,7 @@ namespace paimon { /// | 13 + N | offset | long | 8 | /// | 21 + N | length | long | 8 | -class BlobDescriptor { +class PAIMON_EXPORT BlobDescriptor { public: static Result> Create(const std::string& uri, int64_t offset, int64_t length); diff --git a/src/paimon/common/data/blob_test.cpp b/src/paimon/common/data/blob_test.cpp index a105ea1f6..9a6f709e0 100644 --- a/src/paimon/common/data/blob_test.cpp +++ b/src/paimon/common/data/blob_test.cpp @@ -144,38 +144,34 @@ TEST_F(BlobTest, TestNewInputStreamWithDynamicLength) { } TEST_F(BlobTest, TestArrowField) { - { - // basic: field name, non-nullable by default - ASSERT_OK_AND_ASSIGN(auto schema, Blob::ArrowField("my_blob")); + for (bool nullable : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto schema, Blob::ArrowField("my_blob", nullable)); ASSERT_NE(schema, nullptr); - // import back to arrow::Field to verify auto field_result = arrow::ImportField(schema.get()); ASSERT_TRUE(field_result.ok()); auto field = field_result.ValueUnsafe(); ASSERT_EQ(field->name(), "my_blob"); ASSERT_EQ(field->type()->id(), arrow::Type::LARGE_BINARY); - ASSERT_FALSE(field->nullable()); + ASSERT_EQ(field->nullable(), nullable); ASSERT_TRUE(field->HasMetadata()); auto extension_type = field->metadata()->Get("paimon.extension.type"); ASSERT_TRUE(extension_type.ok()); ASSERT_EQ(extension_type.ValueUnsafe(), "paimon.type.blob"); } { - // with custom metadata std::unordered_map custom_metadata = { {"custom_key", "custom_value"}}; - ASSERT_OK_AND_ASSIGN(auto schema, Blob::ArrowField("meta_blob", custom_metadata)); + ASSERT_OK_AND_ASSIGN(auto schema, + Blob::ArrowField("meta_blob", /*nullable=*/false, custom_metadata)); auto field = arrow::ImportField(schema.get()).ValueUnsafe(); ASSERT_EQ(field->name(), "meta_blob"); ASSERT_FALSE(field->nullable()); ASSERT_TRUE(field->HasMetadata()); - // blob extension metadata should be present auto extension_type = field->metadata()->Get("paimon.extension.type"); ASSERT_TRUE(extension_type.ok()); ASSERT_EQ(extension_type.ValueUnsafe(), "paimon.type.blob"); - // custom metadata should also be present auto custom_val = field->metadata()->Get("custom_key"); ASSERT_TRUE(custom_val.ok()); ASSERT_EQ(custom_val.ValueUnsafe(), "custom_value"); diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index 84835071d..ca0f7923e 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -17,65 +17,71 @@ #include "paimon/common/data/blob_utils.h" #include -#include #include #include "arrow/api.h" #include "arrow/array/array_nested.h" #include "arrow/type.h" #include "paimon/common/data/blob_defs.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/string_utils.h" - namespace arrow { class Array; } namespace paimon { - BlobUtils::SeparatedSchemas BlobUtils::SeparateBlobSchema( - const std::shared_ptr& schema) { - std::vector> remaining_fields; + const std::shared_ptr& schema, const std::set& inline_fields) { + std::vector> main_fields; std::vector> blob_fields; - for (auto i = 0; i < schema->num_fields(); i++) { + for (int i = 0; i < schema->num_fields(); i++) { auto field = schema->field(i); - if (IsBlobField(field)) { + if (IsBlobField(field) && inline_fields.count(field->name()) == 0) { + // Non-inline BLOB -> goes to blob file blob_fields.emplace_back(field); } else { - remaining_fields.emplace_back(field); + // Non-blob fields OR inline BLOB fields -> stay in main + main_fields.emplace_back(field); } } SeparatedSchemas result; - result.main_schema = arrow::schema(remaining_fields); + result.main_schema = arrow::schema(main_fields); result.blob_schema = arrow::schema(blob_fields); return result; } Result BlobUtils::SeparateBlobArray( - const std::shared_ptr& struct_array) { + const std::shared_ptr& struct_array, + const std::set& inline_fields) { std::shared_ptr old_type = std::static_pointer_cast(struct_array->type()); const auto& old_fields = old_type->fields(); const auto& old_arrays = struct_array->fields(); - std::vector> remaining_fields; - std::vector> remaining_arrays; - std::vector> blob_fields; - std::vector> blob_arrays; + arrow::ArrayVector main_arrays; + arrow::ArrayVector blob_arrays; + arrow::FieldVector main_fields; + arrow::FieldVector blob_fields; for (size_t i = 0; i < old_fields.size(); i++) { - if (IsBlobField(old_fields[i])) { + if (IsBlobField(old_fields[i]) && inline_fields.count(old_fields[i]->name()) == 0) { blob_fields.push_back(old_fields[i]); blob_arrays.push_back(old_arrays[i]); } else { - remaining_fields.push_back(old_fields[i]); - remaining_arrays.push_back(old_arrays[i]); + main_fields.push_back(old_fields[i]); + main_arrays.push_back(old_arrays[i]); } } + if (blob_fields.empty()) { + return Status::Invalid( + "SeparateBlobArray expects at least one non-inline blob field, but got none."); + } + SeparatedStructArrays result; PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(result.main_array, - arrow::StructArray::Make(remaining_arrays, remaining_fields)); + arrow::StructArray::Make(main_arrays, main_fields)); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(result.blob_array, arrow::StructArray::Make(blob_arrays, blob_fields)); return result; @@ -114,4 +120,42 @@ std::shared_ptr BlobUtils::ToArrowField( return arrow::field(field_name, arrow::large_binary(), nullable, std::make_shared(metadata)); } + +Status BlobUtils::ValidateInlineBlobDescriptors( + const std::shared_ptr& struct_array, + const std::set& inline_descriptor_fields) { + if (inline_descriptor_fields.empty()) { + return Status::OK(); + } + if (!struct_array) { + return Status::Invalid("array in ValidateInlineBlobDescriptors must be a struct_array"); + } + for (const auto& field_name : inline_descriptor_fields) { + auto field_array = struct_array->GetFieldByName(field_name); + if (!field_array) { + continue; + } + const auto* binary_array = + arrow::internal::checked_cast(field_array.get()); + if (!binary_array) { + return Status::Invalid( + fmt::format("cannot cast array for field {} to LargeBinaryArray", field_name)); + } + for (int64_t row = 0; row < binary_array->length(); ++row) { + if (binary_array->IsNull(row)) { + continue; + } + auto value = binary_array->GetView(row); + PAIMON_ASSIGN_OR_RAISE(bool is_descriptor, + BlobDescriptor::IsBlobDescriptor(value.data(), value.size())); + if (!is_descriptor) { + return Status::Invalid( + "BLOB inline fields configured by blob-descriptor-field or blob-view-field " + "require values to be a BlobDescriptor or BlobViewStruct."); + } + } + } + return Status::OK(); +} + } // namespace paimon diff --git a/src/paimon/common/data/blob_utils.h b/src/paimon/common/data/blob_utils.h index 2b5539f7a..58c6e95af 100644 --- a/src/paimon/common/data/blob_utils.h +++ b/src/paimon/common/data/blob_utils.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -38,23 +39,29 @@ class PAIMON_EXPORT BlobUtils { ~BlobUtils() = delete; struct SeparatedSchemas { - /// Non-blob fields + /// Non-blob fields (includes inline blob fields when inline_fields is provided) std::shared_ptr main_schema; - /// Blob fields only + /// Blob fields that go to separate .blob files std::shared_ptr blob_schema; }; struct SeparatedStructArrays { - /// Non-blob fields + /// Non-blob fields (includes inline blob fields when inline_fields is provided) std::shared_ptr main_array; - /// Blob fields only + /// Blob fields that go to separate .blob files std::shared_ptr blob_array; }; - static SeparatedSchemas SeparateBlobSchema(const std::shared_ptr& schema); + /// Separates schema with inline field awareness. + /// BLOB fields in inline_fields stay in main_schema; others go to blob_schema. + static SeparatedSchemas SeparateBlobSchema(const std::shared_ptr& schema, + const std::set& inline_fields); + /// Separates array with inline field awareness. + /// BLOB fields in inline_fields stay in main_array; others go to blob_array. static Result SeparateBlobArray( - const std::shared_ptr& struct_array); + const std::shared_ptr& struct_array, + const std::set& inline_fields); static bool IsBlobField(const std::shared_ptr& field); static bool IsBlobMetadata(const std::shared_ptr& metadata); @@ -63,6 +70,10 @@ class PAIMON_EXPORT BlobUtils { static std::shared_ptr ToArrowField( const std::string& field_name, bool nullable = false, std::unordered_map metadata = {}); + + static Status ValidateInlineBlobDescriptors( + const std::shared_ptr& struct_array, + const std::set& inline_descriptor_fields); }; } // namespace paimon diff --git a/src/paimon/common/data/blob_utils_test.cpp b/src/paimon/common/data/blob_utils_test.cpp index f8835379d..8d0e507b8 100644 --- a/src/paimon/common/data/blob_utils_test.cpp +++ b/src/paimon/common/data/blob_utils_test.cpp @@ -20,7 +20,9 @@ #include "arrow/c/bridge.h" #include "gtest/gtest.h" #include "paimon/common/data/blob_defs.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/data/blob.h" +#include "paimon/memory/memory_pool.h" #include "paimon/testing/utils/testharness.h" namespace paimon::test { @@ -74,7 +76,8 @@ TEST_F(BlobUtilsTest, SeparateBlobSchema) { std::shared_ptr original_schema = arrow::schema({int_field, string_field, blob_field_1}); - BlobUtils::SeparatedSchemas schemas = BlobUtils::SeparateBlobSchema(original_schema); + BlobUtils::SeparatedSchemas schemas = + BlobUtils::SeparateBlobSchema(original_schema, /*inline_fields=*/{}); std::shared_ptr expected_main_schema = arrow::schema({int_field, string_field}); @@ -85,17 +88,46 @@ TEST_F(BlobUtilsTest, SeparateBlobSchema) { } { std::shared_ptr no_blob_schema = arrow::schema({int_field, string_field}); - BlobUtils::SeparatedSchemas no_blob_schemas = BlobUtils::SeparateBlobSchema(no_blob_schema); + BlobUtils::SeparatedSchemas no_blob_schemas = + BlobUtils::SeparateBlobSchema(no_blob_schema, /*inline_fields=*/{}); ASSERT_TRUE(no_blob_schemas.main_schema->Equals(*no_blob_schema)); ASSERT_EQ(no_blob_schemas.blob_schema->num_fields(), 0); } { std::shared_ptr only_blob_schema = arrow::schema({blob_field_1}); BlobUtils::SeparatedSchemas only_blob_schemas = - BlobUtils::SeparateBlobSchema(only_blob_schema); + BlobUtils::SeparateBlobSchema(only_blob_schema, /*inline_fields=*/{}); ASSERT_TRUE(only_blob_schemas.blob_schema->Equals(*only_blob_schema)); ASSERT_EQ(only_blob_schemas.main_schema->num_fields(), 0); } + { + // Inline blob field stays in main_schema instead of going to blob_schema + auto blob_field_2 = BlobUtils::ToArrowField("f4_blob_2", false); + std::shared_ptr schema = + arrow::schema({int_field, blob_field_1, blob_field_2, string_field}); + + BlobUtils::SeparatedSchemas schemas = + BlobUtils::SeparateBlobSchema(schema, /*inline_fields=*/{"f3_blob_1"}); + + // f3_blob_1 is inline -> stays in main; f4_blob_2 goes to blob + std::shared_ptr expected_main = + arrow::schema({int_field, blob_field_1, string_field}); + ASSERT_TRUE(schemas.main_schema->Equals(*expected_main)); + + std::shared_ptr expected_blob = arrow::schema({blob_field_2}); + ASSERT_TRUE(schemas.blob_schema->Equals(*expected_blob)); + } + { + // All blob fields are inline -> blob_schema is empty + std::shared_ptr schema = + arrow::schema({int_field, blob_field_1, string_field}); + + BlobUtils::SeparatedSchemas schemas = + BlobUtils::SeparateBlobSchema(schema, /*inline_fields=*/{"f3_blob_1"}); + + ASSERT_TRUE(schemas.main_schema->Equals(*schema)); + ASSERT_EQ(schemas.blob_schema->num_fields(), 0); + } } TEST_F(BlobUtilsTest, SeparateBlobArray) { @@ -125,7 +157,8 @@ TEST_F(BlobUtilsTest, SeparateBlobArray) { std::shared_ptr struct_array = std::static_pointer_cast(raw_struct_array); - ASSERT_OK_AND_ASSIGN(auto separated, BlobUtils::SeparateBlobArray(struct_array)); + ASSERT_OK_AND_ASSIGN(auto separated, + BlobUtils::SeparateBlobArray(struct_array, /*inline_fields=*/{})); std::shared_ptr expected_main_type = arrow::struct_({int_field, string_field}); ASSERT_TRUE(separated.main_array->type()->Equals(*expected_main_type)); @@ -137,6 +170,153 @@ TEST_F(BlobUtilsTest, SeparateBlobArray) { ASSERT_TRUE(separated.blob_array->type()->Equals(*expected_blob_type)); ASSERT_EQ(separated.blob_array->num_fields(), 1); ASSERT_TRUE(separated.blob_array->field(0)->Equals(*blob_array_data)); + + // All blob fields are inline -> should return error (no blob field to separate) + ASSERT_NOK_WITH_MSG( + BlobUtils::SeparateBlobArray(struct_array, /*inline_fields=*/{"f2_blob"}), + "SeparateBlobArray expects at least one non-inline blob field, but got none."); +} + +TEST_F(BlobUtilsTest, SeparateBlobArrayWithPartialInline) { + auto int_field = arrow::field("f1_int", arrow::int32()); + std::shared_ptr blob_field_1 = BlobUtils::ToArrowField("f2_blob_1", false); + std::shared_ptr blob_field_2 = BlobUtils::ToArrowField("f3_blob_2", true); + auto schema = arrow::schema({int_field, blob_field_1, blob_field_2}); + + arrow::Int32Builder int_builder; + ASSERT_TRUE(int_builder.AppendValues({1, 2}).ok()); + auto int_array = int_builder.Finish().ValueOrDie(); + + arrow::LargeBinaryBuilder blob_builder_1; + ASSERT_TRUE(blob_builder_1.Append("a", 1).ok()); + ASSERT_TRUE(blob_builder_1.Append("b", 1).ok()); + auto blob_array_1 = blob_builder_1.Finish().ValueOrDie(); + + arrow::LargeBinaryBuilder blob_builder_2; + ASSERT_TRUE(blob_builder_2.Append("x", 1).ok()); + ASSERT_TRUE(blob_builder_2.AppendNull().ok()); + auto blob_array_2 = blob_builder_2.Finish().ValueOrDie(); + + auto raw_struct_array = + arrow::StructArray::Make({int_array, blob_array_1, blob_array_2}, schema->fields()) + .ValueOrDie(); + auto struct_array = std::static_pointer_cast(raw_struct_array); + + // f2_blob_1 is inline, f3_blob_2 goes to blob + ASSERT_OK_AND_ASSIGN(auto separated, BlobUtils::SeparateBlobArray( + struct_array, /*inline_fields=*/{"f2_blob_1"})); + + std::shared_ptr expected_main_type = arrow::struct_({int_field, blob_field_1}); + ASSERT_TRUE(separated.main_array->type()->Equals(*expected_main_type)); + ASSERT_EQ(separated.main_array->num_fields(), 2); + ASSERT_TRUE(separated.main_array->field(0)->Equals(*int_array)); + ASSERT_TRUE(separated.main_array->field(1)->Equals(*blob_array_1)); + + std::shared_ptr expected_blob_type = arrow::struct_({blob_field_2}); + ASSERT_TRUE(separated.blob_array->type()->Equals(*expected_blob_type)); + ASSERT_EQ(separated.blob_array->num_fields(), 1); + ASSERT_TRUE(separated.blob_array->field(0)->Equals(*blob_array_2)); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsEmptyFields) { + // Empty inline_descriptor_fields -> always OK + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append("random_data").ok()); + auto array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateInlineBlobDescriptors(sa, {})); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsFieldNotPresent) { + // Field not in struct_array -> skip, OK + arrow::Int32Builder int_builder; + ASSERT_TRUE(int_builder.Append(42).ok()); + auto int_array = int_builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({int_array}, {arrow::field("f0", arrow::int32())}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + // "b0" does not exist in the struct -> should pass + ASSERT_OK(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"})); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithValidDescriptor) { + // Valid BlobDescriptor bytes -> OK + auto pool = GetDefaultPool(); + ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); + auto serialized = descriptor->Serialize(pool); + + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"})); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithNullValue) { + // Null values in blob column -> skip, OK + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.AppendNull().ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"})); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithRawBytes) { + // Raw bytes (not a descriptor) -> error + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append("not_a_descriptor_just_raw_data").ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline fields configured by blob-descriptor-field"); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMixedValidAndInvalid) { + // First row is valid descriptor, second row is raw bytes -> error on row 1 + auto pool = GetDefaultPool(); + ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); + auto serialized = descriptor->Serialize(pool); + + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); + ASSERT_TRUE(builder.Append("raw_bytes_not_descriptor").ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline fields configured by blob-descriptor-field"); +} + +TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { + // Two inline fields: b0 is valid, b1 has raw bytes -> error on b1 + auto pool = GetDefaultPool(); + ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); + auto serialized = descriptor->Serialize(pool); + + arrow::LargeBinaryBuilder b0_builder; + ASSERT_TRUE(b0_builder.Append(serialized->data(), serialized->size()).ok()); + auto b0_array = b0_builder.Finish().ValueOrDie(); + + arrow::LargeBinaryBuilder b1_builder; + ASSERT_TRUE(b1_builder.Append("invalid_raw_data").ok()); + auto b1_array = b1_builder.Finish().ValueOrDie(); + + auto struct_array = + arrow::StructArray::Make({b0_array, b1_array}, + {BlobUtils::ToArrowField("b0"), BlobUtils::ToArrowField("b1")}) + .ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0", "b1"}), + "BLOB inline fields configured by blob-descriptor-field"); } } // namespace paimon::test diff --git a/src/paimon/core/append/append_only_writer.cpp b/src/paimon/core/append/append_only_writer.cpp index a8ab13ea2..b27c98fdd 100644 --- a/src/paimon/core/append/append_only_writer.cpp +++ b/src/paimon/core/append/append_only_writer.cpp @@ -33,11 +33,13 @@ #include "paimon/core/io/data_file_path_factory.h" #include "paimon/core/io/data_file_writer.h" #include "paimon/core/io/data_increment.h" +#include "paimon/core/io/external_storage_blob_writer.h" #include "paimon/core/io/multiple_blob_file_writer.h" #include "paimon/core/io/rolling_blob_file_writer.h" #include "paimon/core/io/rolling_file_writer.h" #include "paimon/core/io/single_file_writer.h" #include "paimon/core/manifest/file_source.h" +#include "paimon/core/operation/blob_file_context.h" #include "paimon/core/utils/commit_increment.h" #include "paimon/format/file_format.h" #include "paimon/format/file_format_factory.h" @@ -82,6 +84,36 @@ Status AppendOnlyWriter::Write(std::unique_ptr&& batch) { if (writer_ == nullptr) { PAIMON_ASSIGN_OR_RAISE(writer_, CreateRollingRowWriter()); } + + // Transform batch for external storage descriptor fields before writing. + if (external_storage_writer_) { + auto data_type = arrow::struct_(write_schema_->fields()); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_array, + arrow::ImportArray(batch->GetData(), data_type)); + auto struct_array = std::dynamic_pointer_cast(arrow_array); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr transformed, + external_storage_writer_->TransformBatch(struct_array)); + auto transformed_struct = std::dynamic_pointer_cast(transformed); + // TODO(lc.lsz): validate blob view + PAIMON_RETURN_NOT_OK(BlobUtils::ValidateInlineBlobDescriptors(transformed_struct, + inline_descriptor_fields_)); + ::ArrowArray c_transformed; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*transformed, &c_transformed)); + return writer_->Write(&c_transformed); + } + + if (!inline_descriptor_fields_.empty()) { + auto data_type = arrow::struct_(write_schema_->fields()); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_array, + arrow::ImportArray(batch->GetData(), data_type)); + auto struct_array = std::dynamic_pointer_cast(arrow_array); + // TODO(lc.lsz): validate blob view + PAIMON_RETURN_NOT_OK( + BlobUtils::ValidateInlineBlobDescriptors(struct_array, inline_descriptor_fields_)); + ::ArrowArray c_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*struct_array, &c_array)); + return writer_->Write(&c_array); + } return writer_->Write(batch->GetData()); } @@ -153,14 +185,45 @@ Status AppendOnlyWriter::Flush(bool wait_for_latest_compaction, bool forced_full return Status::OK(); } -AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingRowWriter() const { - auto schemas = BlobUtils::SeparateBlobSchema(write_schema_); - if (schemas.blob_schema && schemas.blob_schema->num_fields() > 0) { - return CreateRollingBlobWriter(schemas); +AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingRowWriter() { + auto blob_context = BlobFileContext::Create(write_schema_, options_); + std::optional> main_write_cols = write_cols_; + + // Save inline descriptor fields for validation in Write() + if (blob_context) { + inline_descriptor_fields_ = blob_context->GetDescriptorFields(); + } + + // Initialize ExternalStorageBlobWriter if needed + if (blob_context && blob_context->RequireExternalStorageWriter()) { + assert(blob_context->GetExternalStoragePath()); + external_storage_writer_ = std::make_unique( + write_schema_, blob_context->GetExternalStorageFields(), + blob_context->GetExternalStoragePath().value(), schema_id_, seq_num_counter_, + options_.GetFileSystem(), path_factory_, memory_pool_, options_); + if (!main_write_cols) { + // To align with java, when require external storage writer, main writer will set write + // cols in DataFileMeta + main_write_cols = write_schema_->field_names(); + } + } + + if (blob_context && blob_context->RequireBlobFileWriter()) { + // Use context-aware schema separation: inline BLOB fields stay in main + auto schemas = + BlobUtils::SeparateBlobSchema(write_schema_, blob_context->GetInlineFields()); + return CreateRollingBlobWriter(schemas, blob_context->GetInlineFields()); + } else if (!blob_context) { + // No BLOB fields at all -> plain rolling writer + return std::make_unique>>( + options_.GetTargetFileSize(/*has_primary_key=*/false), + GetDataFileWriterCreator(write_schema_, main_write_cols)); } else { + // All BLOB fields are inline, no .blob files needed -> plain rolling writer + // The main data file contains all fields including inline descriptors/views. return std::make_unique>>( options_.GetTargetFileSize(/*has_primary_key=*/false), - GetDataFileWriterCreator(write_schema_, write_cols_)); + GetDataFileWriterCreator(write_schema_, main_write_cols)); } } @@ -212,7 +275,7 @@ AppendOnlyWriter::SingleFileWriterCreator AppendOnlyWriter::GetBlobFileWriterCre } AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingBlobWriter( - const BlobUtils::SeparatedSchemas& schemas) const { + const BlobUtils::SeparatedSchemas& schemas, const std::set& inline_fields) const { // Multiple blob fields are supported. Each blob field gets its own rolling file writer // via MultipleBlobFileWriter. auto blob_schema = schemas.blob_schema; @@ -249,7 +312,7 @@ AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingBlobWri return std::make_unique( options_.GetTargetFileSize(/*has_primary_key=*/false), GetDataFileWriterCreator(schemas.main_schema, schemas.main_schema->field_names()), - blob_schema, blob_writer_creator, arrow::struct_(write_schema_->fields())); + blob_schema, blob_writer_creator, arrow::struct_(write_schema_->fields()), inline_fields); } Status AppendOnlyWriter::Sync() { @@ -275,10 +338,14 @@ Status AppendOnlyWriter::Close() { writer_.reset(); } + if (external_storage_writer_) { + PAIMON_RETURN_NOT_OK(external_storage_writer_->Close()); + external_storage_writer_.reset(); + } + if (compact_deletion_file_ != nullptr) { compact_deletion_file_->Clean(); } return Status::OK(); } - } // namespace paimon diff --git a/src/paimon/core/append/append_only_writer.h b/src/paimon/core/append/append_only_writer.h index e403e2508..d1b4339d8 100644 --- a/src/paimon/core/append/append_only_writer.h +++ b/src/paimon/core/append/append_only_writer.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,7 @@ class Schema; namespace paimon { class CommitIncrement; +class ExternalStorageBlobWriter; class RecordBatch; template class RollingFileWriter; @@ -94,9 +96,10 @@ class AppendOnlyWriter : public BatchWriter { using RollingFileWriterResult = Result>>>; - RollingFileWriterResult CreateRollingRowWriter() const; + RollingFileWriterResult CreateRollingRowWriter(); RollingFileWriterResult CreateRollingBlobWriter( - const BlobUtils::SeparatedSchemas& schemas) const; + const BlobUtils::SeparatedSchemas& schemas, + const std::set& inline_fields) const; Result DrainIncrement(); Status Flush(bool wait_for_latest_compaction, bool forced_full_compaction); @@ -130,6 +133,8 @@ class AppendOnlyWriter : public BatchWriter { std::shared_ptr compact_deletion_file_; std::unique_ptr>> writer_; + std::unique_ptr external_storage_writer_; + std::set inline_descriptor_fields_; }; } // namespace paimon diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.cpp b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp new file mode 100644 index 000000000..878e12d3c --- /dev/null +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp @@ -0,0 +1,81 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/casting/binary_to_blob_cast_executor.h" + +#include +#include + +#include "arrow/array/array_binary.h" +#include "arrow/buffer.h" +#include "arrow/type.h" +#include "fmt/format.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/status.h" + +namespace arrow { +class Array; +} // namespace arrow + +namespace paimon { +Result BinaryToBlobCastExecutor::Cast( + const Literal& literal, const std::shared_ptr& target_type) const { + return Status::Invalid( + fmt::format("BinaryToBlobCastExecutor does not support literal cast from {} to {}", + static_cast(literal.GetType()), target_type->ToString())); +} + +Result> BinaryToBlobCastExecutor::Cast( + const std::shared_ptr& array, const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const { + if (array->type_id() != arrow::Type::BINARY) { + return Status::Invalid( + fmt::format("BinaryToBlobCastExecutor only supports binary input, got {}", + array->type()->ToString())); + } + if (target_type->id() != arrow::Type::LARGE_BINARY) { + return Status::Invalid( + fmt::format("BinaryToBlobCastExecutor only supports large_binary target, got {}", + target_type->ToString())); + } + + auto binary_array = std::static_pointer_cast(array); + if (binary_array->offset() != 0) { + return Status::Invalid("BinaryToBlobCastExecutor only supports arrays with zero offset"); + } + + const int64_t length = binary_array->length(); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( + std::shared_ptr large_offsets_buffer, + arrow::AllocateBuffer((length + 1) * static_cast(sizeof(int64_t)), pool)); + auto* large_offsets = reinterpret_cast(large_offsets_buffer->mutable_data()); + for (int64_t row_index = 0; row_index <= length; row_index++) { + large_offsets[row_index] = binary_array->value_offset(row_index); + } + + std::shared_ptr null_bitmap = binary_array->null_bitmap(); + if (binary_array->null_count() == 0) { + null_bitmap.reset(); + } + + auto value_data = binary_array->value_data(); + auto array_data = + arrow::ArrayData::Make(target_type, length, {null_bitmap, large_offsets_buffer, value_data}, + binary_array->null_count()); + return arrow::MakeArray(array_data); +} + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.h b/src/paimon/core/casting/binary_to_blob_cast_executor.h new file mode 100644 index 000000000..e12f365d7 --- /dev/null +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.h @@ -0,0 +1,42 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include "arrow/array/array_base.h" +#include "paimon/core/casting/cast_executor.h" +#include "paimon/predicate/literal.h" +#include "paimon/result.h" + +namespace arrow { +class DataType; +class MemoryPool; +} // namespace arrow + +namespace paimon { +class BinaryToBlobCastExecutor : public CastExecutor { + public: + Result Cast(const Literal& literal, + const std::shared_ptr& target_type) const override; + + Result> Cast(const std::shared_ptr& array, + const std::shared_ptr& target_type, + arrow::MemoryPool* pool) const override; +}; + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/core/casting/cast_executor_factory.cpp b/src/paimon/core/casting/cast_executor_factory.cpp index a09d53fa7..8fa721974 100644 --- a/src/paimon/core/casting/cast_executor_factory.cpp +++ b/src/paimon/core/casting/cast_executor_factory.cpp @@ -18,6 +18,7 @@ #include +#include "paimon/core/casting/binary_to_blob_cast_executor.h" #include "paimon/core/casting/binary_to_string_cast_executor.h" #include "paimon/core/casting/boolean_to_decimal_cast_executor.h" #include "paimon/core/casting/boolean_to_numeric_cast_executor.h" @@ -146,6 +147,8 @@ CastExecutorFactory::CastExecutorFactory() { REGISTER_CAST_EXECUTOR(STRING, BINARY, BinaryToStringCastExecutor); + REGISTER_CAST_EXECUTOR(BLOB, BINARY, BinaryToBlobCastExecutor); + REGISTER_CAST_EXECUTOR(STRING, DATE, DateToStringCastExecutor); REGISTER_CAST_EXECUTOR(TIMESTAMP, DATE, DateToTimestampCastExecutor); diff --git a/src/paimon/core/casting/cast_executor_factory_test.cpp b/src/paimon/core/casting/cast_executor_factory_test.cpp index a12bba603..6d67603c0 100644 --- a/src/paimon/core/casting/cast_executor_factory_test.cpp +++ b/src/paimon/core/casting/cast_executor_factory_test.cpp @@ -17,6 +17,7 @@ #include "paimon/core/casting/cast_executor_factory.h" #include "gtest/gtest.h" +#include "paimon/core/casting/binary_to_blob_cast_executor.h" #include "paimon/core/casting/binary_to_string_cast_executor.h" #include "paimon/core/casting/boolean_to_decimal_cast_executor.h" #include "paimon/core/casting/boolean_to_numeric_cast_executor.h" @@ -120,6 +121,13 @@ TEST(CastExecutorFactoryTest, TestRegister) { ASSERT_TRUE(cast_executor); ASSERT_TRUE(std::dynamic_pointer_cast(cast_executor)); } + { + auto* factory = CastExecutorFactory::GetCastExecutorFactory(); + ASSERT_FALSE(factory->executor_map_.empty()); + auto cast_executor = factory->GetCastExecutor(FieldType::BINARY, FieldType::BLOB); + ASSERT_TRUE(cast_executor); + ASSERT_TRUE(std::dynamic_pointer_cast(cast_executor)); + } { auto* factory = CastExecutorFactory::GetCastExecutorFactory(); ASSERT_FALSE(factory->executor_map_.empty()); diff --git a/src/paimon/core/casting/cast_executor_test.cpp b/src/paimon/core/casting/cast_executor_test.cpp index 1ff9397e4..9e5a426ef 100644 --- a/src/paimon/core/casting/cast_executor_test.cpp +++ b/src/paimon/core/casting/cast_executor_test.cpp @@ -34,6 +34,7 @@ #include "paimon/common/utils/date_time_utils.h" #include "paimon/common/utils/decimal_utils.h" #include "paimon/common/utils/field_type_utils.h" +#include "paimon/core/casting/binary_to_blob_cast_executor.h" #include "paimon/core/casting/binary_to_string_cast_executor.h" #include "paimon/core/casting/boolean_to_decimal_cast_executor.h" #include "paimon/core/casting/boolean_to_numeric_cast_executor.h" @@ -1284,6 +1285,43 @@ TEST_F(CastExecutorTest, TestBinaryToStringCastExecutorCastArray) { } } +TEST_F(CastExecutorTest, TestBinaryToBlobCastExecutorCastLiteral) { + auto cast_executor = std::make_shared(); + std::string src_data = "blob-descriptor-bytes"; + ASSERT_NOK_WITH_MSG( + cast_executor->Cast(Literal(FieldType::BINARY, src_data.data(), src_data.size()), + arrow::large_binary()), + "BinaryToBlobCastExecutor does not support literal cast"); +} + +TEST_F(CastExecutorTest, TestBinaryToBlobCastExecutorCastArray) { + auto cast_executor = std::make_shared(); + auto src_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::binary(), R"(["foo", "bar", "", null, "blob"])") + .ValueOrDie(); + auto expected_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::large_binary(), R"(["foo", "bar", "", null, "blob"])") + .ValueOrDie(); + + ASSERT_OK_AND_ASSIGN( + std::shared_ptr target_array, + cast_executor->Cast(src_array, arrow::large_binary(), arrow::default_memory_pool())); + ASSERT_TRUE(target_array->Equals(expected_array)); + ASSERT_EQ(target_array->data()->buffers[2], src_array->data()->buffers[2]); +} + +TEST_F(CastExecutorTest, TestBinaryToBlobCastExecutorCastArrayWithOffset) { + auto cast_executor = std::make_shared(); + auto src_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::binary(), R"(["skip", "foo", "bar"])") + .ValueOrDie() + ->Slice(1, 2); + + ASSERT_NOK_WITH_MSG( + cast_executor->Cast(src_array, arrow::large_binary(), arrow::default_memory_pool()), + "BinaryToBlobCastExecutor only supports arrays with zero offset"); +} + TEST_F(CastExecutorTest, TestDateToStringCastExecutorCastLiteral) { auto cast_executor = std::make_shared(); // date values ranging from 0000-01-01 to 9999-12-31 diff --git a/src/paimon/core/io/data_file_path_factory.h b/src/paimon/core/io/data_file_path_factory.h index 110a7035e..34a315a98 100644 --- a/src/paimon/core/io/data_file_path_factory.h +++ b/src/paimon/core/io/data_file_path_factory.h @@ -62,6 +62,12 @@ class DataFilePathFactory : public PathFactory { return NewPathFromName(NewFileName(data_file_prefix_, ".blob")); } + /// Creates a new blob file path under the given external storage path for descriptor fields. + std::string NewExternalStorageBlobPath(const std::string& external_storage_path) const { + std::string file_name = NewFileName(data_file_prefix_, ".blob"); + return PathUtil::JoinPath(external_storage_path, file_name); + } + std::string NewPathFromName(const std::string& file_name) const { if (external_path_provider_ != nullptr) { return external_path_provider_->GetNextExternalDataPath(file_name); diff --git a/src/paimon/core/io/data_file_path_factory_test.cpp b/src/paimon/core/io/data_file_path_factory_test.cpp index 9bac530e4..6009db102 100644 --- a/src/paimon/core/io/data_file_path_factory_test.cpp +++ b/src/paimon/core/io/data_file_path_factory_test.cpp @@ -22,6 +22,7 @@ #include "gtest/gtest.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/fs/external_path_provider.h" +#include "paimon/common/utils/string_utils.h" #include "paimon/core/io/data_file_meta.h" #include "paimon/core/manifest/file_source.h" #include "paimon/core/stats/simple_stats.h" @@ -56,6 +57,20 @@ TEST_F(DataFilePathFactoryTest, TestNewPath) { ASSERT_EQ(factory_.NewPathFromName("index-file"), "/tmp/index-file"); } +TEST_F(DataFilePathFactoryTest, TestNewExternalStorageBlobPath) { + std::string blob_path1 = factory_.NewExternalStorageBlobPath("/tmp/external_blob"); + std::string blob_path2 = factory_.NewExternalStorageBlobPath("/tmp/external_blob"); + + // Paths are unique (counter increments) + ASSERT_NE(blob_path1, blob_path2); + // Both start with the external storage path joined with the data file prefix + ASSERT_TRUE(StringUtils::StartsWith(blob_path1, "/tmp/external_blob/data-")); + ASSERT_TRUE(StringUtils::StartsWith(blob_path2, "/tmp/external_blob/data-")); + // Both end with .blob extension + ASSERT_TRUE(StringUtils::EndsWith(blob_path1, ".blob")); + ASSERT_TRUE(StringUtils::EndsWith(blob_path2, ".blob")); +} + TEST_F(DataFilePathFactoryTest, TestNewPathWithDataFilePrefixAndExternalPath) { DataFilePathFactory factory; ASSERT_OK_AND_ASSIGN( diff --git a/src/paimon/core/io/external_storage_blob_writer.cpp b/src/paimon/core/io/external_storage_blob_writer.cpp new file mode 100644 index 000000000..339cf04f2 --- /dev/null +++ b/src/paimon/core/io/external_storage_blob_writer.cpp @@ -0,0 +1,229 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/io/external_storage_blob_writer.h" + +#include +#include + +#include "arrow/array/array_nested.h" +#include "arrow/array/builder_binary.h" +#include "arrow/c/bridge.h" +#include "arrow/type.h" +#include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/data/blob_utils.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/scope_guard.h" +#include "paimon/core/core_options.h" +#include "paimon/core/io/data_file_path_factory.h" +#include "paimon/core/io/data_file_writer.h" +#include "paimon/format/blob/blob_writer_builder.h" +#include "paimon/format/file_format.h" +#include "paimon/format/file_format_factory.h" +#include "paimon/fs/file_system.h" +#include "paimon/memory/memory_pool.h" + +namespace paimon { + +ExternalStorageBlobWriter::ExternalStorageBlobWriter( + const std::shared_ptr& write_schema, + const std::set& external_storage_fields, const std::string& external_storage_path, + int64_t schema_id, const std::shared_ptr& seq_num_counter, + const std::shared_ptr& file_system, + const std::shared_ptr& path_factory, + const std::shared_ptr& memory_pool, const CoreOptions& options) + : write_schema_(write_schema), + external_storage_fields_(external_storage_fields), + external_storage_path_(external_storage_path), + schema_id_(schema_id), + seq_num_counter_(seq_num_counter), + file_system_(file_system), + path_factory_(path_factory), + memory_pool_(memory_pool), + options_(options), + logger_(Logger::GetLogger("ExternalStorageBlobWriter")) {} + +ExternalStorageBlobWriter::~ExternalStorageBlobWriter() {} + +Result> +ExternalStorageBlobWriter::CreateFieldRollingWriter(FieldWriter* field_writer) { + auto field = write_schema_->GetFieldByName(field_writer->field_name); + if (!field) { + return Status::Invalid("External storage field '{}' not found in write schema", + field_writer->field_name); + } + + auto single_field_schema = arrow::schema({field}); + ::ArrowSchema arrow_schema; + ScopeGuard guard([&arrow_schema]() { ArrowSchemaRelease(&arrow_schema); }); + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*single_field_schema, &arrow_schema)); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr format, + FileFormatFactory::Get("blob", options_.ToMap())); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr writer_builder, + format->CreateWriterBuilder(&arrow_schema, options_.GetWriteBatchSize())); + writer_builder->WithMemoryPool(memory_pool_); + + // Inject WriteConsumer to capture BlobDescriptors during writes + auto blob_writer_builder = std::dynamic_pointer_cast(writer_builder); + if (!blob_writer_builder) { + return Status::Invalid( + "writer_builder cannot be casted to BlobWriterBuilder in ExternalStorageBlobWriter"); + } + blob_writer_builder->WithWriteConsumer( + [field_writer](std::unique_ptr descriptor) -> bool { + field_writer->captured_descriptors.push_back(std::move(descriptor)); + return true; // Always flush for single row. + }); + + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*single_field_schema, &arrow_schema)); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr stats_extractor, + format->CreateStatsExtractor(&arrow_schema)); + + std::vector write_cols = {field_writer->field_name}; + auto single_blob_file_writer_creator = [this, writer_builder, stats_extractor, write_cols]() + -> Result>>> { + auto writer = std::make_unique( + /*compression=*/"none", std::function(), schema_id_, + seq_num_counter_, FileSource::Append(), stats_extractor, + path_factory_->IsExternalPath(), write_cols, memory_pool_); + PAIMON_RETURN_NOT_OK(writer->Init( + file_system_, path_factory_->NewExternalStorageBlobPath(external_storage_path_), + writer_builder)); + return writer; + }; + + return std::make_unique(options_.GetBlobTargetFileSize(), + single_blob_file_writer_creator); +} + +Result> ExternalStorageBlobWriter::TransformBatch( + const std::shared_ptr& batch) { + if (external_storage_fields_.empty()) { + return batch; + } + + // Lazily initialize per-field writers + if (!initialized_) { + for (int32_t i = 0; i < write_schema_->num_fields(); ++i) { + const auto& field = write_schema_->field(i); + if (external_storage_fields_.count(field->name()) > 0) { + FieldWriter fw; + fw.field_name = field->name(); + fw.field_index = i; + field_writers_.push_back(std::move(fw)); + } + } + // Create rolling writers for each field (must be done after push_back so + // the FieldWriter addresses are stable for the consumer lambda capture). + for (auto& fw : field_writers_) { + PAIMON_ASSIGN_OR_RAISE(fw.rolling_writer, CreateFieldRollingWriter(&fw)); + } + initialized_ = true; + } + + if (field_writers_.empty()) { + return batch; + } + + int64_t num_rows = batch->length(); + + // Collect all arrays and field names from the original batch + std::vector> result_arrays; + std::vector result_names; + result_arrays.reserve(batch->num_fields()); + result_names.reserve(batch->num_fields()); + + for (int32_t col = 0; col < batch->num_fields(); ++col) { + result_names.push_back(batch->type()->field(col)->name()); + result_arrays.push_back(batch->field(col)); + } + + // For each external storage field, write blobs row by row via RollingFileWriter + // and build a replacement descriptor column from captured descriptors. + for (FieldWriter& fw : field_writers_) { + std::shared_ptr original_column = batch->field(fw.field_index); + + // Clear captured descriptors before processing this batch + fw.captured_descriptors.clear(); + + arrow::LargeBinaryBuilder descriptor_builder; + PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.Reserve(num_rows)); + + for (int64_t row = 0; row < num_rows; ++row) { + // Create a single-row single-field StructArray for BlobFormatWriter + std::shared_ptr slice = original_column->Slice(row, 1); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr single_row_struct, + arrow::StructArray::Make({slice}, {fw.field_name})); + + ::ArrowArray c_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*single_row_struct, &c_array)); + + // Write via RollingFileWriter; the consumer captures the descriptor + PAIMON_RETURN_NOT_OK(fw.rolling_writer->Write(&c_array)); + } + + // Build descriptor column from captured descriptors + if (static_cast(fw.captured_descriptors.size()) != num_rows) { + return Status::Invalid( + "Captured descriptor count {} does not match row count {} for field '{}'", + fw.captured_descriptors.size(), num_rows, fw.field_name); + } + + for (int64_t row = 0; row < num_rows; ++row) { + const auto& descriptor = fw.captured_descriptors[row]; + if (!descriptor) { + // Null blob -> null descriptor + PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.AppendNull()); + } else { + auto serialized = descriptor->Serialize(memory_pool_); + PAIMON_RETURN_NOT_OK_FROM_ARROW( + descriptor_builder.Append(serialized->data(), serialized->size())); + } + } + + // Build the descriptor column and replace + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr descriptor_array, + descriptor_builder.Finish()); + result_arrays[fw.field_index] = descriptor_array; + } + + // Construct the result StructArray + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr result, + arrow::StructArray::Make(result_arrays, result_names)); + return result; +} + +Status ExternalStorageBlobWriter::Close() { + for (FieldWriter& fw : field_writers_) { + if (fw.rolling_writer) { + PAIMON_RETURN_NOT_OK(fw.rolling_writer->Close()); + } + } + return Status::OK(); +} + +void ExternalStorageBlobWriter::Abort() { + for (FieldWriter& fw : field_writers_) { + if (fw.rolling_writer) { + fw.rolling_writer->Abort(); + fw.rolling_writer.reset(); + } + } + field_writers_.clear(); +} + +} // namespace paimon diff --git a/src/paimon/core/io/external_storage_blob_writer.h b/src/paimon/core/io/external_storage_blob_writer.h new file mode 100644 index 000000000..c5cc10282 --- /dev/null +++ b/src/paimon/core/io/external_storage_blob_writer.h @@ -0,0 +1,110 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "paimon/common/data/blob_descriptor.h" +#include "paimon/core/io/data_file_meta.h" +#include "paimon/core/io/rolling_file_writer.h" +#include "paimon/core/io/single_file_writer.h" +#include "paimon/logging.h" +#include "paimon/result.h" +#include "paimon/status.h" + +namespace arrow { +class Schema; +class StructArray; +} // namespace arrow + +namespace paimon { + +class CoreOptions; +class FileSystem; +class LongCounter; +class MemoryPool; +class DataFilePathFactory; + +/// Batch-oriented writer for descriptor BLOB fields that writes raw data to external storage. +/// +/// For each configured external_storage field, this writer: +/// 1. Uses RollingFileWriter (same infra as MultipleBlobFileWriter) with BlobFormatWriter +/// 2. Injects a WriteConsumer into BlobFormatWriter to capture each row's BlobDescriptor +/// 3. After writing a batch, constructs a descriptor column from captured descriptors +/// +/// After TransformBatch(), the returned StructArray has descriptor columns replaced with +/// serialized BlobDescriptor bytes (large_binary), ready to be written into the main data file. +class ExternalStorageBlobWriter { + public: + using BlobRollingWriter = RollingFileWriter<::ArrowArray*, std::shared_ptr>; + + ExternalStorageBlobWriter(const std::shared_ptr& write_schema, + const std::set& external_storage_fields, + const std::string& external_storage_path, int64_t schema_id, + const std::shared_ptr& seq_num_counter, + const std::shared_ptr& file_system, + const std::shared_ptr& path_factory, + const std::shared_ptr& memory_pool, + const CoreOptions& options); + + ~ExternalStorageBlobWriter(); + + /// Transforms a batch by writing external storage fields to .blob files and replacing + /// the BLOB values with serialized BlobDescriptor bytes. + Result> TransformBatch( + const std::shared_ptr& batch); + + /// Closes all internal blob writers and flushes pending data. + Status Close(); + + /// Aborts all internal blob writers. + void Abort(); + + private: + /// Per-field writer state for one external storage blob field. + struct FieldWriter { + std::string field_name; + int32_t field_index; + std::unique_ptr rolling_writer; + /// Descriptors captured by the WriteConsumer callback during writes. + std::vector> captured_descriptors; + }; + + /// Creates a RollingFileWriter for one external storage blob field with consumer injected. + Result> CreateFieldRollingWriter(FieldWriter* field_writer); + + std::shared_ptr write_schema_; + std::set external_storage_fields_; + std::string external_storage_path_; + int64_t schema_id_; + std::shared_ptr seq_num_counter_; + std::shared_ptr file_system_; + std::shared_ptr path_factory_; + std::shared_ptr memory_pool_; + const CoreOptions& options_; + + std::vector field_writers_; + bool initialized_ = false; + + std::unique_ptr logger_; +}; + +} // namespace paimon diff --git a/src/paimon/core/io/external_storage_blob_writer_test.cpp b/src/paimon/core/io/external_storage_blob_writer_test.cpp new file mode 100644 index 000000000..a2206f971 --- /dev/null +++ b/src/paimon/core/io/external_storage_blob_writer_test.cpp @@ -0,0 +1,150 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/io/external_storage_blob_writer.h" + +#include +#include + +#include "arrow/api.h" +#include "arrow/ipc/json_simple.h" +#include "gtest/gtest.h" +#include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/data/blob_utils.h" +#include "paimon/common/utils/long_counter.h" +#include "paimon/core/core_options.h" +#include "paimon/core/io/data_file_path_factory.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class ExternalStorageBlobWriterTest : public ::testing::Test { + protected: + void SetUp() override { + dir_ = UniqueTestDirectory::Create(); + ASSERT_TRUE(dir_); + + pool_ = GetDefaultPool(); + seq_num_counter_ = std::make_shared(0); + + // Create CoreOptions with blob format + ASSERT_OK_AND_ASSIGN(options_, CoreOptions::FromMap({})); + file_system_ = options_.GetFileSystem(); + + // Create external storage directory + external_storage_path_ = dir_->Str() + "/external_blob"; + ASSERT_OK(file_system_->Mkdirs(external_storage_path_)); + + // Initialize DataFilePathFactory + path_factory_ = std::make_shared(); + ASSERT_OK(path_factory_->Init(dir_->Str(), "blob", "data-", nullptr)); + + // Schema: int_col (int32) + blob_col (blob) + auto int_field = arrow::field("int_col", arrow::int32()); + auto blob_field = BlobUtils::ToArrowField("blob_col", false); + write_schema_ = arrow::schema({int_field, blob_field}); + } + + std::unique_ptr dir_; + std::shared_ptr pool_; + std::shared_ptr seq_num_counter_; + CoreOptions options_; + std::shared_ptr file_system_; + std::shared_ptr path_factory_; + std::shared_ptr write_schema_; + std::string external_storage_path_; +}; + +TEST_F(ExternalStorageBlobWriterTest, TestEmptyExternalFields) { + // No external storage fields -> TransformBatch returns original batch + ExternalStorageBlobWriter writer(write_schema_, /*external_storage_fields=*/{}, + external_storage_path_, /*schema_id=*/0, seq_num_counter_, + file_system_, path_factory_, pool_, options_); + + auto input = std::static_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(write_schema_->fields()), + R"([[42, "hello"]])") + .ValueOrDie()); + + ASSERT_OK_AND_ASSIGN(auto result, writer.TransformBatch(input)); + ASSERT_TRUE(result->Equals(*input)); + + ASSERT_OK(writer.Close()); +} + +TEST_F(ExternalStorageBlobWriterTest, TestTransformBatchReplacesBlob) { + std::set external_fields = {"blob_col"}; + ExternalStorageBlobWriter writer(write_schema_, external_fields, external_storage_path_, + /*schema_id=*/0, seq_num_counter_, file_system_, path_factory_, + pool_, options_); + + auto struct_type = arrow::struct_(write_schema_->fields()); + auto input = std::static_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(struct_type, R"([[10, "data1"], [20, "data2"]])") + .ValueOrDie()); + + auto original_int_col = input->field(0); + + ASSERT_OK_AND_ASSIGN(auto result, writer.TransformBatch(input)); + + // int_col should be unchanged + ASSERT_EQ(result->num_fields(), 2); + ASSERT_TRUE(result->field(0)->Equals(*original_int_col)); + + // blob_col should be replaced with serialized BlobDescriptors + auto descriptor_col = std::static_pointer_cast(result->field(1)); + ASSERT_EQ(descriptor_col->length(), 2); + + for (int64_t i = 0; i < 2; ++i) { + ASSERT_FALSE(descriptor_col->IsNull(i)); + auto view = descriptor_col->GetView(i); + ASSERT_OK_AND_ASSIGN(auto descriptor, + BlobDescriptor::Deserialize(view.data(), view.size())); + ASSERT_EQ(descriptor->Length(), 5); + ASSERT_TRUE(descriptor->Uri().find(external_storage_path_) != std::string::npos); + } + + ASSERT_OK(writer.Close()); +} + +TEST_F(ExternalStorageBlobWriterTest, TestAbort) { + std::set external_fields = {"blob_col"}; + ExternalStorageBlobWriter writer(write_schema_, external_fields, external_storage_path_, + /*schema_id=*/0, seq_num_counter_, file_system_, path_factory_, + pool_, options_); + + auto input = std::static_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(write_schema_->fields()), + R"([[1, "abort_test"]])") + .ValueOrDie()); + + ASSERT_OK(writer.TransformBatch(input)); + + // Verify blob files exist before abort + std::vector> files_before; + ASSERT_OK(file_system_->ListDir(external_storage_path_, &files_before)); + ASSERT_FALSE(files_before.empty()); + + // Abort should clean up written blob files + writer.Abort(); + + std::vector> files_after; + ASSERT_OK(file_system_->ListDir(external_storage_path_, &files_after)); + ASSERT_TRUE(files_after.empty()); +} + +} // namespace paimon::test diff --git a/src/paimon/core/io/field_mapping_reader_test.cpp b/src/paimon/core/io/field_mapping_reader_test.cpp index 53f0c0d0a..2288163bd 100644 --- a/src/paimon/core/io/field_mapping_reader_test.cpp +++ b/src/paimon/core/io/field_mapping_reader_test.cpp @@ -121,7 +121,7 @@ class FieldMappingReaderTest : public ::testing::Test { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys_, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields_, {})); auto arrow_schema = DataField::ConvertDataFieldsToArrowSchema( mapping->non_partition_info.non_partition_data_schema); @@ -154,7 +154,8 @@ class FieldMappingReaderTest : public ::testing::Test { const std::shared_ptr& read_schema, const std::shared_ptr& predicate, const std::vector& partition_keys, const BinaryRow& partition, - const std::shared_ptr& expect_array) const { + const std::shared_ptr& expect_array, + const std::vector& blob_inline_fields = {}) const { auto dir = paimon::test::UniqueTestDirectory::Create(); ASSERT_TRUE(dir); auto fs = dir->GetFileSystem(); @@ -162,7 +163,8 @@ class FieldMappingReaderTest : public ::testing::Test { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_schema)); + ASSERT_OK_AND_ASSIGN(auto mapping, + mapping_builder->CreateFieldMapping(data_schema, blob_inline_fields)); auto arrow_schema = DataField::ConvertDataFieldsToArrowSchema( mapping->non_partition_info.non_partition_data_schema); @@ -706,6 +708,38 @@ TEST_F(FieldMappingReaderTest, TestSchemaEvolutionWithDictType) { partition, expected_array); } +TEST_F(FieldMappingReaderTest, TestReadInlineBlobAsBinaryDataFile) { + std::vector data_fields = { + DataField(0, arrow::field("descriptor", arrow::large_binary())), + }; + auto data_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); + auto data_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(data_schema->fields()), + R"([ + ["descriptor-1"], + [null], + ["descriptor-2"] + ])") + .ValueOrDie()); + + std::vector read_fields = { + DataField(0, arrow::field("descriptor", arrow::large_binary())), + }; + auto read_schema = DataField::ConvertDataFieldsToArrowSchema(read_fields); + auto expected = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(read_schema->fields()), + R"([ + ["descriptor-1"], + [null], + ["descriptor-2"] + ])") + .ValueOrDie()); + + CheckResult(data_schema, data_array, read_schema, /*predicate=*/nullptr, + /*partition_keys=*/{}, BinaryRow::EmptyRow(), expected, + /*blob_inline_fields=*/{"descriptor"}); +} + TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionRenameCombinedCast) { // Test all 4 combinations of rename × cast: // f0: no rename, no cast (utf8 → utf8, name unchanged) diff --git a/src/paimon/core/io/rolling_blob_file_writer.cpp b/src/paimon/core/io/rolling_blob_file_writer.cpp index 9019e7392..5c419baa4 100644 --- a/src/paimon/core/io/rolling_blob_file_writer.cpp +++ b/src/paimon/core/io/rolling_blob_file_writer.cpp @@ -44,12 +44,13 @@ RollingBlobFileWriter::RollingBlobFileWriter( std::function>()> create_file_writer, const std::shared_ptr& blob_schema, MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator, - const std::shared_ptr& data_type) + const std::shared_ptr& data_type, std::set inline_fields) : RollingFileWriter<::ArrowArray*, std::shared_ptr>(target_file_size, create_file_writer), blob_schema_(blob_schema), blob_writer_creator_(std::move(blob_writer_creator)), data_type_(data_type), + inline_fields_(std::move(inline_fields)), logger_(Logger::GetLogger("RollingBlobFileWriter")) {} Status RollingBlobFileWriter::Write(::ArrowArray* record) { @@ -67,7 +68,7 @@ Status RollingBlobFileWriter::Write(::ArrowArray* record) { auto struct_array = std::dynamic_pointer_cast(arrow_array); PAIMON_ASSIGN_OR_RAISE(BlobUtils::SeparatedStructArrays separated_arrays, - BlobUtils::SeparateBlobArray(struct_array)); + BlobUtils::SeparateBlobArray(struct_array, inline_fields_)); // Write main (non-blob) data ::ArrowArray c_main_array; PAIMON_RETURN_NOT_OK_FROM_ARROW( diff --git a/src/paimon/core/io/rolling_blob_file_writer.h b/src/paimon/core/io/rolling_blob_file_writer.h index 169fbbd70..907936b09 100644 --- a/src/paimon/core/io/rolling_blob_file_writer.h +++ b/src/paimon/core/io/rolling_blob_file_writer.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "arrow/array/array_nested.h" @@ -62,7 +63,8 @@ class RollingBlobFileWriter std::function>()> create_file_writer, const std::shared_ptr& blob_schema, MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator, - const std::shared_ptr& data_type); + const std::shared_ptr& data_type, + std::set inline_fields = {}); ~RollingBlobFileWriter() override = default; Status Write(::ArrowArray* record) override; @@ -85,6 +87,7 @@ class RollingBlobFileWriter MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator_; std::unique_ptr blob_writer_; std::shared_ptr data_type_; + std::set inline_fields_; std::unique_ptr logger_; }; diff --git a/src/paimon/core/operation/abstract_split_read.cpp b/src/paimon/core/operation/abstract_split_read.cpp index f5f37631e..40d5e41d5 100644 --- a/src/paimon/core/operation/abstract_split_read.cpp +++ b/src/paimon/core/operation/abstract_split_read.cpp @@ -182,6 +182,10 @@ Result> AbstractSplitRead::CreateFieldMappingRe // load schema to get data schema PAIMON_ASSIGN_OR_RAISE(data_schema, schema_manager_->ReadSchema(file_meta->schema_id)); } + PAIMON_ASSIGN_OR_RAISE(CoreOptions data_options, + CoreOptions::FromMap(data_schema->Options(), options_.GetFileSystem())); + auto blob_inline_fields = data_options.GetBlobInlineFields(); + std::unique_ptr field_mapping; if (!data_schema->PrimaryKeys().empty()) { // for pk table, add special fields to file schema when field mapping @@ -189,14 +193,14 @@ Result> AbstractSplitRead::CreateFieldMappingRe SpecialFields::ValueKind()}; file_fields.insert(file_fields.end(), data_schema->Fields().begin(), data_schema->Fields().end()); - PAIMON_ASSIGN_OR_RAISE(field_mapping, - field_mapping_builder->CreateFieldMapping(file_fields)); + PAIMON_ASSIGN_OR_RAISE(field_mapping, field_mapping_builder->CreateFieldMapping( + file_fields, blob_inline_fields)); } else { PAIMON_ASSIGN_OR_RAISE( std::vector projected_data_fields, ProjectFieldsForRowTrackingAndDataEvolution(data_schema, file_meta->write_cols)); - PAIMON_ASSIGN_OR_RAISE(field_mapping, - field_mapping_builder->CreateFieldMapping(projected_data_fields)); + PAIMON_ASSIGN_OR_RAISE(field_mapping, field_mapping_builder->CreateFieldMapping( + projected_data_fields, blob_inline_fields)); } auto read_schema = DataField::ConvertDataFieldsToArrowSchema( diff --git a/src/paimon/core/operation/append_only_file_store_write.cpp b/src/paimon/core/operation/append_only_file_store_write.cpp index e3ffe4473..0986c6d85 100644 --- a/src/paimon/core/operation/append_only_file_store_write.cpp +++ b/src/paimon/core/operation/append_only_file_store_write.cpp @@ -71,10 +71,6 @@ AppendOnlyFileStoreWrite::AppendOnlyFileStoreWrite( is_streaming_mode, ignore_num_bucket_check, executor, pool), logger_(Logger::GetLogger("AppendOnlyFileStoreWrite")) { write_cols_ = write_schema->field_names(); - auto schemas = BlobUtils::SeparateBlobSchema(schema_); - if (schemas.blob_schema && schemas.blob_schema->num_fields() > 0) { - with_blob_ = true; - } // optimize write_cols to null in following cases: // 1. write_schema contains all columns // 2. TODO(xinyu.lxy) write_schema contains all columns and append _ROW_ID & _SEQUENCE_NUMBER @@ -172,9 +168,7 @@ Result> AppendOnlyFileStoreWrite::CreateWriter( file_store_path_factory_->CreateDataFilePathFactory(partition, bucket)); std::shared_ptr compact_manager; - auto schemas = BlobUtils::SeparateBlobSchema(write_schema_); - if (options_.WriteOnly() || options_.DataEvolutionEnabled() || options_.GetBucket() == -1 || - with_blob_) { + if (options_.WriteOnly() || options_.DataEvolutionEnabled() || options_.GetBucket() == -1) { compact_manager = std::make_shared(); } else { auto dv_factory = diff --git a/src/paimon/core/operation/append_only_file_store_write.h b/src/paimon/core/operation/append_only_file_store_write.h index 1d3d7e726..e41002c00 100644 --- a/src/paimon/core/operation/append_only_file_store_write.h +++ b/src/paimon/core/operation/append_only_file_store_write.h @@ -115,7 +115,6 @@ class AppendOnlyFileStoreWrite : public AbstractFileStoreWrite { const std::vector>& files) const; std::optional> write_cols_; - bool with_blob_ = false; std::unique_ptr logger_; }; diff --git a/src/paimon/core/operation/file_store_scan.cpp b/src/paimon/core/operation/file_store_scan.cpp index 925a3afce..61d9cf61a 100644 --- a/src/paimon/core/operation/file_store_scan.cpp +++ b/src/paimon/core/operation/file_store_scan.cpp @@ -354,8 +354,9 @@ Status FileStoreScan::SplitAndSetFilter(const std::vector& partitio PAIMON_ASSIGN_OR_RAISE(std::unique_ptr mapping_builder, FieldMappingBuilder::Create(arrow_schema, partition_keys, scan_filters->GetPredicate())); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr mapping, - mapping_builder->CreateFieldMapping(arrow_schema)); + PAIMON_ASSIGN_OR_RAISE( + std::unique_ptr mapping, + mapping_builder->CreateFieldMapping(arrow_schema, core_options_.GetBlobInlineFields())); if (mapping->partition_info != std::nullopt) { const auto& partition_info = mapping->partition_info.value(); partition_schema_ = diff --git a/src/paimon/core/schema/schema_validation.cpp b/src/paimon/core/schema/schema_validation.cpp index 8a92b7e90..343435c98 100644 --- a/src/paimon/core/schema/schema_validation.cpp +++ b/src/paimon/core/schema/schema_validation.cpp @@ -444,12 +444,8 @@ Status SchemaValidation::ValidateBlobFields(const TableSchema& schema, const Cor const auto& blob_descriptor_names = options.GetBlobDescriptorFields(); const auto& blob_view_names = options.GetBlobViewFields(); const auto& blob_external_storage_names = options.GetBlobExternalStorageFields(); - std::vector configured_blob_like_names = configured_blob_names; - configured_blob_like_names.insert(configured_blob_like_names.end(), - blob_descriptor_names.begin(), blob_descriptor_names.end()); - configured_blob_like_names.insert(configured_blob_like_names.end(), blob_view_names.begin(), - blob_view_names.end()); - if (configured_blob_like_names.empty() && blob_external_storage_names.empty()) { + if (configured_blob_names.empty() && blob_descriptor_names.empty() && blob_view_names.empty() && + blob_external_storage_names.empty()) { return Status::OK(); } diff --git a/src/paimon/core/utils/field_mapping.cpp b/src/paimon/core/utils/field_mapping.cpp index 0809cd944..8898ddfd9 100644 --- a/src/paimon/core/utils/field_mapping.cpp +++ b/src/paimon/core/utils/field_mapping.cpp @@ -16,10 +16,9 @@ #include "paimon/core/utils/field_mapping.h" -#include -#include #include #include +#include #include "arrow/type.h" #include "fmt/format.h" @@ -63,19 +62,24 @@ Result> FieldMappingBuilder::Create( } Result> FieldMappingBuilder::CreateFieldMapping( - const std::shared_ptr& data_schema) const { + const std::shared_ptr& data_schema, + const std::vector& blob_inline_fields) const { PAIMON_ASSIGN_OR_RAISE(std::vector data_fields, DataField::ConvertArrowSchemaToDataFields(data_schema)); - return CreateFieldMapping(data_fields); + return CreateFieldMapping(data_fields, blob_inline_fields); } Result> FieldMappingBuilder::CreateFieldMapping( - const std::vector& data_fields) const { + const std::vector& data_fields, + const std::vector& blob_inline_fields) const { + auto converted_data_fields = ConvertBlobInlineDataFields(data_fields, blob_inline_fields); + // generate non-exist field info - std::optional non_exist_field_info = CreateNonExistFieldInfo(data_fields); + std::optional non_exist_field_info = + CreateNonExistFieldInfo(converted_data_fields); // generate exist field info - ExistFieldInfo exist_field_info = CreateExistFieldInfo(data_fields); + ExistFieldInfo exist_field_info = CreateExistFieldInfo(converted_data_fields); // key: partition key, value: partition idx std::map partition_key_to_idx = @@ -83,12 +87,35 @@ Result> FieldMappingBuilder::CreateFieldMapping( PAIMON_ASSIGN_OR_RAISE( NonPartitionInfo non_partition_info, - CreateNonPartitionInfo(data_fields, exist_field_info, partition_key_to_idx)); + CreateNonPartitionInfo(converted_data_fields, exist_field_info, partition_key_to_idx)); PAIMON_ASSIGN_OR_RAISE(std::optional partition_info, CreatePartitionInfo(exist_field_info, partition_key_to_idx)); return std::make_unique(partition_info, non_partition_info, non_exist_field_info); } +std::vector FieldMappingBuilder::ConvertBlobInlineDataFields( + const std::vector& data_fields, const std::vector& blob_inline_fields) { + if (blob_inline_fields.empty()) { + return data_fields; + } + + std::set blob_inline_field_set(blob_inline_fields.begin(), + blob_inline_fields.end()); + std::vector converted_fields; + converted_fields.reserve(data_fields.size()); + for (const auto& data_field : data_fields) { + if (blob_inline_field_set.find(data_field.Name()) == blob_inline_field_set.end()) { + converted_fields.push_back(data_field); + continue; + } + + auto binary_field = arrow::field(data_field.Name(), arrow::binary(), data_field.Nullable(), + data_field.ArrowField()->metadata()); + converted_fields.emplace_back(data_field.Id(), binary_field, data_field.Description()); + } + return converted_fields; +} + ExistFieldInfo FieldMappingBuilder::CreateExistFieldInfo( const std::vector& data_fields) const { // key:field id, value: {target_idx, read field} diff --git a/src/paimon/core/utils/field_mapping.h b/src/paimon/core/utils/field_mapping.h index 05b79dfcd..4dde0c478 100644 --- a/src/paimon/core/utils/field_mapping.h +++ b/src/paimon/core/utils/field_mapping.h @@ -15,8 +15,6 @@ */ #pragma once -#include -#include #include #include #include @@ -58,9 +56,11 @@ class FieldMappingBuilder { const std::shared_ptr& predicate); Result> CreateFieldMapping( - const std::vector& data_fields) const; + const std::vector& data_fields, + const std::vector& blob_inline_fields) const; Result> CreateFieldMapping( - const std::shared_ptr& data_schema) const; + const std::shared_ptr& data_schema, + const std::vector& blob_inline_fields) const; int32_t GetReadFieldCount() const { return read_fields_.size(); @@ -83,6 +83,9 @@ class FieldMappingBuilder { std::optional CreateNonExistFieldInfo( const std::vector& data_fields) const; ExistFieldInfo CreateExistFieldInfo(const std::vector& data_fields) const; + static std::vector ConvertBlobInlineDataFields( + const std::vector& data_fields, + const std::vector& blob_inline_fields); Result CreateNonPartitionInfo( const std::vector& data_fields, const ExistFieldInfo& exist_field_info, diff --git a/src/paimon/core/utils/field_mapping_test.cpp b/src/paimon/core/utils/field_mapping_test.cpp index c74083e26..29e0deee5 100644 --- a/src/paimon/core/utils/field_mapping_test.cpp +++ b/src/paimon/core/utils/field_mapping_test.cpp @@ -18,8 +18,10 @@ #include +#include "arrow/type.h" #include "arrow/type_fwd.h" #include "gtest/gtest.h" +#include "paimon/common/data/blob_utils.h" #include "paimon/common/predicate/leaf_predicate_impl.h" #include "paimon/common/predicate/predicate_filter.h" #include "paimon/data/decimal.h" @@ -89,7 +91,7 @@ TEST_F(FieldMappingTest, TestEmptyPartitionKeys) { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create( schema_, /*partition_keys=*/std::vector(), predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); ASSERT_EQ(mapping->partition_info, std::nullopt); ASSERT_EQ(mapping->non_exist_field_info, std::nullopt); @@ -121,7 +123,7 @@ TEST_F(FieldMappingTest, TestCompoundPartitionPredicate) { std::vector partition_keys = {"f0", "f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[0], fields_[1], fields_[2]}; @@ -154,7 +156,7 @@ TEST_F(FieldMappingTest, TestPartitionKeysEqualSchema) { std::vector partition_keys = {"f0", "f1", "f2", "f3"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = fields_; @@ -180,7 +182,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -206,7 +208,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema2) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -243,7 +245,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema3) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -286,7 +288,7 @@ TEST_F(FieldMappingTest, TestPartialPartitionKeysInSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[2]}; @@ -330,7 +332,7 @@ TEST_F(FieldMappingTest, TestNoPartitionKeysInReadSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); ASSERT_EQ(mapping->partition_info, std::nullopt); @@ -374,7 +376,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolution) { ASSERT_OK_AND_ASSIGN( auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, /*predicate=*/nullptr)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -460,7 +462,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolutionWithPredicate) { std::vector partition_keys = {"key0", "key1"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -562,7 +564,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolutionWithPredicate2) { std::vector partition_keys = {"key0", "key1"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -629,7 +631,7 @@ TEST_F(FieldMappingTest, TestCompoundPredicateWithoutPushDown) { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); ASSERT_TRUE(mapping_builder); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); ASSERT_FALSE(mapping->partition_info); NonPartitionInfo expected_non_part_info; @@ -641,4 +643,48 @@ TEST_F(FieldMappingTest, TestCompoundPredicateWithoutPushDown) { CheckNonPartitionInfo(mapping->non_partition_info, expected_non_part_info); } +TEST_F(FieldMappingTest, TestBlobInlineFieldConversion) { + // Schema with a blob field (large_binary with blob metadata) and a normal field. + auto blob_field = BlobUtils::ToArrowField("blob_col", /*nullable=*/true); + std::vector data_fields = {DataField(0, arrow::field("int_col", arrow::int32())), + DataField(1, blob_field), + DataField(2, arrow::field("str_col", arrow::utf8()))}; + + auto read_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); + ASSERT_OK_AND_ASSIGN( + auto mapping_builder, + FieldMappingBuilder::Create(read_schema, /*partition_keys=*/{}, /*predicate=*/nullptr)); + + // Without inline fields — blob_col stays as large_binary + { + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); + auto& data_schema = mapping->non_partition_info.non_partition_data_schema; + ASSERT_EQ(data_schema.size(), 3); + ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); + } + + // With inline fields — blob_col should be converted from large_binary to binary + { + ASSERT_OK_AND_ASSIGN(auto mapping, + mapping_builder->CreateFieldMapping(data_fields, {"blob_col"})); + auto& data_schema = mapping->non_partition_info.non_partition_data_schema; + ASSERT_EQ(data_schema.size(), 3); + // blob_col converted to binary + ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::BINARY); + ASSERT_EQ(data_schema[1].Name(), "blob_col"); + ASSERT_EQ(data_schema[1].Nullable(), true); + // Other fields unchanged + ASSERT_EQ(data_schema[0].ArrowField()->type()->id(), arrow::Type::INT32); + ASSERT_EQ(data_schema[2].ArrowField()->type()->id(), arrow::Type::STRING); + } + + // Non-matching inline field name — no conversion should happen + { + ASSERT_OK_AND_ASSIGN( + auto mapping, mapping_builder->CreateFieldMapping(data_fields, {"non_existent_field"})); + auto& data_schema = mapping->non_partition_info.non_partition_data_schema; + ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); + } +} + } // namespace paimon::test diff --git a/src/paimon/format/avro/avro_direct_encoder.cpp b/src/paimon/format/avro/avro_direct_encoder.cpp index d8ace9f7d..f2740009b 100644 --- a/src/paimon/format/avro/avro_direct_encoder.cpp +++ b/src/paimon/format/avro/avro_direct_encoder.cpp @@ -224,7 +224,14 @@ Status AvroDirectEncoder::EncodeArrowToAvro(const ::avro::NodePtr& avro_node, return Status::OK(); } - // Handle regular BYTES + // Handle regular BYTES (binary or large_binary) + if (array.type()->id() == arrow::Type::LARGE_BINARY) { + const auto& large_binary_array = + arrow::internal::checked_cast(array); + std::string_view value = large_binary_array.GetView(row_index); + encoder->encodeBytes(reinterpret_cast(value.data()), value.size()); + return Status::OK(); + } const auto& binary_array = arrow::internal::checked_cast(array); std::string_view value = binary_array.GetView(row_index); diff --git a/src/paimon/format/avro/avro_file_batch_reader_test.cpp b/src/paimon/format/avro/avro_file_batch_reader_test.cpp index 15ae3a908..a8ed3bb6c 100644 --- a/src/paimon/format/avro/avro_file_batch_reader_test.cpp +++ b/src/paimon/format/avro/avro_file_batch_reader_test.cpp @@ -403,6 +403,56 @@ TEST_F(AvroFileBatchReaderTest, TestGetNumberOfRows) { } } +TEST_F(AvroFileBatchReaderTest, TestReadBinaryWrittenFromBinaryAndLargeBinary) { + auto check_binary_read_result = [&](const std::shared_ptr& write_type, + const std::string& file_name) { + std::string data_json = R"([ + ["descriptor-1"], + [""], + [null], + ["descriptor-2"] + ])"; + auto write_field = arrow::field("f0", write_type); + auto write_data_type = arrow::struct_({write_field}); + auto write_array = + arrow::ipc::internal::json::ArrayFromJSON(write_data_type, data_json).ValueOrDie(); + + std::string file_path = PathUtil::JoinPath(dir_->Str(), file_name); + WriteData(write_array, file_path, /*compression=*/"null"); + + // Read back with binary schema + auto read_field = arrow::field("f0", arrow::binary()); + auto read_data_type = arrow::struct_({read_field}); + + ASSERT_OK_AND_ASSIGN(auto reader_builder, + file_format_->CreateReaderBuilder(/*batch_size=*/1024)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file_path)); + ASSERT_OK_AND_ASSIGN(auto batch_reader, reader_builder->Build(in)); + + // Check GetFileSchema: regardless of write type, avro file schema is always binary + ASSERT_OK_AND_ASSIGN(auto c_file_schema, batch_reader->GetFileSchema()); + auto file_schema = arrow::ImportSchema(c_file_schema.get()).ValueOrDie(); + arrow::Schema expected_file_schema({read_field}); + ASSERT_TRUE(file_schema->Equals(expected_file_schema)); + + auto read_schema = arrow::schema({read_field}); + std::unique_ptr c_schema = std::make_unique(); + ASSERT_TRUE(arrow::ExportSchema(*read_schema, c_schema.get()).ok()); + EXPECT_OK(batch_reader->SetReadSchema(c_schema.get(), /*predicate=*/nullptr, + /*selection_bitmap=*/std::nullopt)); + + ASSERT_OK_AND_ASSIGN(auto result_array, ::paimon::test::ReadResultCollector::CollectResult( + batch_reader.get())); + auto expected_array = + arrow::ipc::internal::json::ArrayFromJSON(read_data_type, data_json).ValueOrDie(); + auto expected_chunked_array = std::make_shared(expected_array); + ASSERT_TRUE(result_array->Equals(expected_chunked_array)); + }; + + check_binary_read_result(arrow::binary(), "binary.avro"); + check_binary_read_result(arrow::large_binary(), "large-binary.avro"); +} + INSTANTIATE_TEST_SUITE_P(TestParam, AvroFileBatchReaderTest, ::testing::Values(false, true)); } // namespace paimon::avro::test diff --git a/src/paimon/format/avro/avro_schema_converter.cpp b/src/paimon/format/avro/avro_schema_converter.cpp index 7a2726218..54c22d927 100644 --- a/src/paimon/format/avro/avro_schema_converter.cpp +++ b/src/paimon/format/avro/avro_schema_converter.cpp @@ -267,6 +267,7 @@ Result<::avro::Schema> AvroSchemaConverter::ArrowTypeToAvroSchema( case arrow::Type::STRING: return nullable ? NullableSchema(::avro::StringSchema()) : ::avro::StringSchema(); case arrow::Type::BINARY: + case arrow::Type::LARGE_BINARY: return nullable ? NullableSchema(::avro::BytesSchema()) : ::avro::BytesSchema(); case arrow::Type::type::DATE32: { ::avro::IntSchema date_schema; diff --git a/src/paimon/format/avro/avro_stats_extractor.cpp b/src/paimon/format/avro/avro_stats_extractor.cpp index 680c23766..c8bde0d38 100644 --- a/src/paimon/format/avro/avro_stats_extractor.cpp +++ b/src/paimon/format/avro/avro_stats_extractor.cpp @@ -92,6 +92,7 @@ Result> AvroStatsExtractor::FetchColumnStatistics( case arrow::Type::type::DOUBLE: return ColumnStats::CreateDoubleColumnStats(std::nullopt, std::nullopt, std::nullopt); case arrow::Type::type::BINARY: + case arrow::Type::type::LARGE_BINARY: return ColumnStats::CreateStringColumnStats(std::nullopt, std::nullopt, std::nullopt); case arrow::Type::type::STRING: return ColumnStats::CreateStringColumnStats(std::nullopt, std::nullopt, std::nullopt); diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index dbc0f392a..c1819101b 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -235,10 +235,9 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { file_system->Create(dir->Str() + "/file.blob", /*overwrite=*/true)); std::shared_ptr blob_field = BlobUtils::ToArrowField("blob_col"); auto struct_type = arrow::struct_({blob_field}); - bool blob_as_descriptor = GetParam(); - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor, output_stream, struct_type, - file_system, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream, struct_type, file_system, pool_, nullptr)); ASSERT_OK(writer->Flush()); ASSERT_OK(writer->Finish()); diff --git a/src/paimon/format/blob/blob_format_writer.cpp b/src/paimon/format/blob/blob_format_writer.cpp index f41e21e90..3b199983d 100644 --- a/src/paimon/format/blob/blob_format_writer.cpp +++ b/src/paimon/format/blob/blob_format_writer.cpp @@ -21,6 +21,7 @@ #include "arrow/api.h" #include "arrow/c/bridge.h" #include "paimon/common/data/blob_defs.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/data/blob_utils.h" #include "paimon/common/memory/memory_segment_utils.h" #include "paimon/common/metrics/metrics_impl.h" @@ -31,24 +32,24 @@ namespace paimon::blob { -BlobFormatWriter::BlobFormatWriter(bool blob_as_descriptor, - const std::shared_ptr& out, +BlobFormatWriter::BlobFormatWriter(const std::shared_ptr& out, const std::shared_ptr& data_type, const std::shared_ptr& fs, - const std::shared_ptr& pool) - : blob_as_descriptor_(blob_as_descriptor), - out_(out), + const std::shared_ptr& pool, + WriteConsumer write_consumer) + : out_(out), data_type_(data_type), fs_(fs), - pool_(pool) { + pool_(pool), + write_consumer_(std::move(write_consumer)) { metrics_ = std::make_shared(); tmp_buffer_ = Bytes::AllocateBytes(kTmpBufferSize, pool_.get()); } Result> BlobFormatWriter::Create( - bool blob_as_descriptor, const std::shared_ptr& out, - const std::shared_ptr& data_type, const std::shared_ptr& fs, - const std::shared_ptr& pool) { + const std::shared_ptr& out, const std::shared_ptr& data_type, + const std::shared_ptr& fs, const std::shared_ptr& pool, + WriteConsumer write_consumer) { if (out == nullptr) { return Status::Invalid("blob format writer create failed. out is nullptr"); } @@ -67,7 +68,7 @@ Result> BlobFormatWriter::Create( fmt::format("field {} is not BLOB", data_type->field(0)->ToString())); } return std::unique_ptr( - new BlobFormatWriter(blob_as_descriptor, out, data_type, fs, pool)); + new BlobFormatWriter(out, data_type, fs, pool, std::move(write_consumer))); } Status BlobFormatWriter::AddBatch(ArrowArray* batch) { @@ -91,6 +92,9 @@ Status BlobFormatWriter::AddBatch(ArrowArray* batch) { // Child-level null: record kNullBinLength, skip data writing (aligned with Java) if (child_array->IsNull(0)) { bin_lengths_.push_back(BlobDefs::kNullBinLength); + if (write_consumer_) { + write_consumer_(nullptr); + } return Status::OK(); } @@ -103,7 +107,28 @@ Status BlobFormatWriter::AddBatch(ArrowArray* batch) { assert(blob_array.length() == 1); PAIMON_RETURN_NOT_OK(WriteBlob(blob_array.GetView(0))); - PAIMON_RETURN_NOT_OK(Flush()); + if (write_consumer_) { + // Construct BlobDescriptor from the blob just written. + // blob format: magic(4) + content + bin_length(8) + crc32(4) + // bin_length covers all of the above, so content_length = bin_length - 16. + // The stream is now positioned at the end of crc32, i.e., previous_pos + bin_length. + int64_t bin_length = bin_lengths_.back(); + PAIMON_ASSIGN_OR_RAISE(int64_t end_pos, out_->GetPos()); + int64_t blob_start_pos = end_pos - bin_length; + int64_t content_offset = blob_start_pos + BlobDefs::kContentStartOffset; + int64_t content_length = bin_length - 16; + + PAIMON_ASSIGN_OR_RAISE(std::string uri, out_->GetUri()); + PAIMON_ASSIGN_OR_RAISE(auto descriptor, + BlobDescriptor::Create(uri, content_offset, content_length)); + bool should_flush = write_consumer_(std::move(descriptor)); + if (should_flush) { + PAIMON_RETURN_NOT_OK(Flush()); + } + } else { + // TODO(xinyu.lxy): Java does not flush when writeConsumer is null. + PAIMON_RETURN_NOT_OK(Flush()); + } return Status::OK(); } @@ -138,8 +163,13 @@ Status BlobFormatWriter::WriteBlob(std::string_view blob_data) { PAIMON_RETURN_NOT_OK(WriteWithCrc32(kMagicNumberBytes->data(), kMagicNumberBytes->size())); // write blob content + // Dynamically check whether blob_data is a serialized BlobDescriptor (by magic header) + // rather than relying on blob_as_descriptor_ config. This is consistent with Java behavior: + // at write time, the input bytes are auto-detected as descriptor or raw data. std::unique_ptr in; - if (blob_as_descriptor_) { + PAIMON_ASSIGN_OR_RAISE(bool is_descriptor, + BlobDescriptor::IsBlobDescriptor(blob_data.data(), blob_data.size())); + if (is_descriptor) { PAIMON_ASSIGN_OR_RAISE(std::unique_ptr blob, Blob::FromDescriptor(blob_data.data(), blob_data.size())); PAIMON_ASSIGN_OR_RAISE(in, blob->NewInputStream(fs_)); diff --git a/src/paimon/format/blob/blob_format_writer.h b/src/paimon/format/blob/blob_format_writer.h index c52437e8d..c8fab7956 100644 --- a/src/paimon/format/blob/blob_format_writer.h +++ b/src/paimon/format/blob/blob_format_writer.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -36,6 +37,7 @@ struct ArrowArray; namespace paimon { class Blob; +class BlobDescriptor; class FileSystem; class Metrics; class OutputStream; @@ -47,10 +49,15 @@ namespace paimon::blob { // https://cwiki.apache.org/confluence/display/PAIMON/PIP-35%3A+Introduce+Blob+to+store+multimodal+data class BlobFormatWriter : public FormatWriter { public: + /// Callback invoked after each blob row is written. + /// Receives the BlobDescriptor of the written blob (nullptr for null blobs). + /// Similar to Java's BlobConsumer. Returns true if the output stream should be flushed. + using WriteConsumer = std::function descriptor)>; + static Result> Create( - bool blob_as_descriptor, const std::shared_ptr& out, - const std::shared_ptr& data_type, const std::shared_ptr& fs, - const std::shared_ptr& pool); + const std::shared_ptr& out, const std::shared_ptr& data_type, + const std::shared_ptr& fs, const std::shared_ptr& pool, + WriteConsumer write_consumer); Status AddBatch(ArrowArray* batch) override; @@ -65,10 +72,10 @@ class BlobFormatWriter : public FormatWriter { } private: - BlobFormatWriter(bool blob_as_descriptor, const std::shared_ptr& out, + BlobFormatWriter(const std::shared_ptr& out, const std::shared_ptr& data_type, - const std::shared_ptr& fs, - const std::shared_ptr& pool); + const std::shared_ptr& fs, const std::shared_ptr& pool, + WriteConsumer write_consumer); Status WriteBlob(std::string_view blob_data); @@ -83,7 +90,6 @@ class BlobFormatWriter : public FormatWriter { static constexpr uint32_t kTmpBufferSize = 1024 * 1024; private: - bool blob_as_descriptor_; uint32_t crc32_ = 0; std::vector bin_lengths_; std::shared_ptr out_; @@ -92,6 +98,7 @@ class BlobFormatWriter : public FormatWriter { std::shared_ptr fs_; std::shared_ptr pool_; std::shared_ptr metrics_; + WriteConsumer write_consumer_; }; } // namespace paimon::blob diff --git a/src/paimon/format/blob/blob_format_writer_test.cpp b/src/paimon/format/blob/blob_format_writer_test.cpp index 3f3779108..8758447c4 100644 --- a/src/paimon/format/blob/blob_format_writer_test.cpp +++ b/src/paimon/format/blob/blob_format_writer_test.cpp @@ -18,9 +18,11 @@ #include #include +#include #include "arrow/c/bridge.h" #include "gtest/gtest.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/stream_utils.h" #include "paimon/data/blob.h" @@ -89,9 +91,9 @@ INSTANTIATE_TEST_SUITE_P(BlobAsDescriptor, BlobFormatWriterTest, ::testing::Valu TEST_P(BlobFormatWriterTest, TestSimple) { // write - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); std::vector> expected_blobs; std::string file1 = paimon::test::GetDataDir() + "/avro/data/avro_with_null"; @@ -149,41 +151,82 @@ TEST_P(BlobFormatWriterTest, TestSimple) { } } +TEST_P(BlobFormatWriterTest, TestWriteConsumerReceivesDescriptors) { + std::vector> captured_descriptors; + BlobFormatWriter::WriteConsumer consumer = + [&captured_descriptors](std::unique_ptr descriptor) -> bool { + captured_descriptors.push_back(std::move(descriptor)); + return true; // request flush + }; + + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, consumer)); + + // Write a normal blob row + std::string file = paimon::test::GetDataDir() + "/xxhash.data"; + ASSERT_OK_AND_ASSIGN(std::shared_ptr blob, + Blob::FromPath(file, /*offset=*/0, /*length=*/91)); + ASSERT_OK_AND_ASSIGN(auto array, PrepareBlobArray(blob)); + ASSERT_OK(AddBatchOnce(writer, array)); + + ASSERT_EQ(captured_descriptors.size(), 1); + ASSERT_TRUE(captured_descriptors[0]); + ASSERT_EQ(captured_descriptors[0]->Uri(), dir_->Str() + "/file.blob"); + ASSERT_EQ(captured_descriptors[0]->Offset(), 4); // after magic(4) + ASSERT_EQ(captured_descriptors[0]->Length(), 91); + + // Write a null blob row — consumer should receive nullptr descriptor + arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), + {std::make_shared()}); + auto blob_builder = static_cast(struct_builder.field_builder(0)); + ASSERT_TRUE(struct_builder.Append().ok()); + ASSERT_TRUE(blob_builder->AppendNull().ok()); + std::shared_ptr null_array; + ASSERT_TRUE(struct_builder.Finish(&null_array).ok()); + ASSERT_OK(AddBatchOnce(writer, null_array)); + + ASSERT_EQ(captured_descriptors.size(), 2); + ASSERT_FALSE(captured_descriptors[1]); + + ASSERT_OK(writer->Finish()); +} + TEST_P(BlobFormatWriterTest, TestCreateWithInvalidParameters) { // Test with nullptr output stream ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(blob_as_descriptor_, nullptr, struct_type_, file_system_, pool_), + BlobFormatWriter::Create(nullptr, struct_type_, file_system_, pool_, nullptr), "blob format writer create failed. out is nullptr"); // Test with nullptr data type ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, nullptr, file_system_, pool_), + BlobFormatWriter::Create(output_stream_, nullptr, file_system_, pool_, nullptr), "blob format writer create failed. data_type is nullptr"); // Test with nullptr memory pool - ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, nullptr), - "blob format writer create failed. pool is nullptr"); + ASSERT_NOK_WITH_MSG( + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, nullptr, nullptr), + "blob format writer create failed. pool is nullptr"); // Test with invalid field count (more than 1 field) auto multi_field_type = arrow::struct_( {arrow::field("blob_col1", arrow::binary()), arrow::field("blob_col2", arrow::binary())}); - ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, - multi_field_type, file_system_, pool_), - "blob data type field number 2 is not 1"); + ASSERT_NOK_WITH_MSG( + BlobFormatWriter::Create(output_stream_, multi_field_type, file_system_, pool_, nullptr), + "blob data type field number 2 is not 1"); // Test with non-blob field (missing blob metadata) auto non_blob_field = arrow::field("regular_col", arrow::binary()); auto non_blob_type = arrow::struct_({non_blob_field}); - ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, non_blob_type, - file_system_, pool_), - "field regular_col: binary is not BLOB"); + ASSERT_NOK_WITH_MSG( + BlobFormatWriter::Create(output_stream_, non_blob_type, file_system_, pool_, nullptr), + "field regular_col: binary is not BLOB"); } TEST_P(BlobFormatWriterTest, TestInvalidCase) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Test nullptr batch ASSERT_NOK_WITH_MSG(writer->AddBatch(nullptr), @@ -200,9 +243,9 @@ TEST_P(BlobFormatWriterTest, TestInvalidCase) { } TEST_P(BlobFormatWriterTest, TestAddBatchWithInvalidBatchLength) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Test batch with wrong length (not 1) arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), @@ -228,9 +271,9 @@ TEST_P(BlobFormatWriterTest, TestAddBatchWithInvalidBatchLength) { } TEST_P(BlobFormatWriterTest, TestReachTargetSize) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Initially should not reach target size ASSERT_OK_AND_ASSIGN(bool reached, writer->ReachTargetSize(true, 1000)); @@ -253,9 +296,9 @@ TEST_P(BlobFormatWriterTest, TestReachTargetSize) { } TEST_P(BlobFormatWriterTest, TestGetWriterMetrics) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); auto metrics = writer->GetWriterMetrics(); ASSERT_TRUE(metrics); @@ -263,9 +306,9 @@ TEST_P(BlobFormatWriterTest, TestGetWriterMetrics) { TEST_P(BlobFormatWriterTest, TestEmptyWriter) { // Test creating a writer and finishing without adding any data - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); ASSERT_OK(writer->Flush()); ASSERT_OK(writer->Finish()); @@ -284,9 +327,9 @@ TEST_P(BlobFormatWriterTest, TestEmptyWriter) { } TEST_P(BlobFormatWriterTest, TestLargeBlob) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Create a temporary large file for testing std::string large_file_path = dir_->Str() + "/large_test_file.bin"; @@ -339,9 +382,9 @@ TEST_P(BlobFormatWriterTest, TestLargeBlob) { } TEST_P(BlobFormatWriterTest, TestAddBatchWithNullValues) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Write one row with child-level null blob arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), @@ -387,18 +430,18 @@ TEST_P(BlobFormatWriterTest, TestAddBatchWithNullValues) { ASSERT_TRUE(struct_builder2.Finish(&null_struct_array).ok()); auto null_c_array = std::make_unique(); ASSERT_TRUE(arrow::ExportArray(*null_struct_array, null_c_array.get()).ok()); - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer2, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer2, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); ASSERT_NOK_WITH_MSG(writer2->AddBatch(null_c_array.get()), "BlobFormatWriter does not support struct-level null."); ArrowArrayRelease(null_c_array.get()); } TEST_P(BlobFormatWriterTest, TestAddBatchWithZeroLengthBlob) { - ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, - BlobFormatWriter::Create(blob_as_descriptor_, output_stream_, struct_type_, - file_system_, pool_)); + ASSERT_OK_AND_ASSIGN( + std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); // Create a zero-length file std::string zero_file_path = dir_->Str() + "/zero_length_file.bin"; diff --git a/src/paimon/format/blob/blob_writer_builder.h b/src/paimon/format/blob/blob_writer_builder.h index 0ddde46d1..15f6d47b4 100644 --- a/src/paimon/format/blob/blob_writer_builder.h +++ b/src/paimon/format/blob/blob_writer_builder.h @@ -24,8 +24,6 @@ #include #include "arrow/api.h" -#include "paimon/common/utils/options_utils.h" -#include "paimon/defs.h" #include "paimon/format/blob/blob_format_writer.h" #include "paimon/format/format_writer.h" #include "paimon/format/writer_builder.h" @@ -61,16 +59,19 @@ class BlobWriterBuilder : public SpecificFSWriterBuilder { return this; } + /// Sets a write consumer that will be called after each blob row is written. + BlobWriterBuilder* WithWriteConsumer(BlobFormatWriter::WriteConsumer consumer) { + write_consumer_ = std::move(consumer); + return this; + } + Result> Build(const std::shared_ptr& out, const std::string& compression) override { assert(out); if (fs_ == nullptr) { return Status::Invalid("File system is nullptr. Please call WithFileSystem() first."); } - PAIMON_ASSIGN_OR_RAISE( - bool blob_as_descriptor, - OptionsUtils::GetValueFromMap(options_, Options::BLOB_AS_DESCRIPTOR, false)); - return BlobFormatWriter::Create(blob_as_descriptor, out, data_type_, fs_, pool_); + return BlobFormatWriter::Create(out, data_type_, fs_, pool_, write_consumer_); } private: @@ -78,6 +79,7 @@ class BlobWriterBuilder : public SpecificFSWriterBuilder { std::shared_ptr data_type_; std::map options_; std::shared_ptr fs_; + BlobFormatWriter::WriteConsumer write_consumer_; }; } // namespace paimon::blob diff --git a/src/paimon/format/blob/blob_writer_builder_test.cpp b/src/paimon/format/blob/blob_writer_builder_test.cpp index 61ef15ca3..349b8ea8b 100644 --- a/src/paimon/format/blob/blob_writer_builder_test.cpp +++ b/src/paimon/format/blob/blob_writer_builder_test.cpp @@ -16,9 +16,16 @@ #include "paimon/format/blob/blob_writer_builder.h" +#include + #include "arrow/api.h" +#include "arrow/c/bridge.h" #include "gtest/gtest.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/data/blob_utils.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/defs.h" +#include "paimon/format/format_writer.h" #include "paimon/fs/file_system.h" #include "paimon/fs/local/local_file_system.h" #include "paimon/testing/utils/testharness.h" @@ -52,4 +59,34 @@ TEST_F(BlobWriterBuilderTest, TestSimple) { ASSERT_OK(builder.Build(output_stream_, "none")); } +TEST_F(BlobWriterBuilderTest, TestWithWriteConsumer) { + std::vector> captured; + BlobWriterBuilder builder(struct_type_, {{Options::BLOB_AS_DESCRIPTOR, "false"}}); + builder.WithFileSystem(file_system_); + builder.WithWriteConsumer([&captured](std::unique_ptr descriptor) -> bool { + captured.push_back(std::move(descriptor)); + return true; + }); + + ASSERT_OK_AND_ASSIGN(auto writer, builder.Build(output_stream_, "none")); + + // Build a single-row struct array with raw blob data + arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), + {std::make_shared()}); + auto blob_builder = static_cast(struct_builder.field_builder(0)); + ASSERT_TRUE(struct_builder.Append().ok()); + ASSERT_TRUE(blob_builder->Append("hello", 5).ok()); + std::shared_ptr array; + ASSERT_TRUE(struct_builder.Finish(&array).ok()); + + auto c_array = std::make_unique(); + ASSERT_TRUE(arrow::ExportArray(*array, c_array.get()).ok()); + ASSERT_OK(writer->AddBatch(c_array.get())); + + ASSERT_EQ(captured.size(), 1); + ASSERT_TRUE(captured[0]); + ASSERT_EQ(captured[0]->Length(), 5); + ASSERT_OK(writer->Finish()); +} + } // namespace paimon::blob::test diff --git a/src/paimon/format/orc/orc_adapter.cpp b/src/paimon/format/orc/orc_adapter.cpp index b1dd16c2d..d4819e128 100644 --- a/src/paimon/format/orc/orc_adapter.cpp +++ b/src/paimon/format/orc/orc_adapter.cpp @@ -1316,6 +1316,9 @@ arrow::Status WriteBatch(const arrow::Array& array, ::orc::ColumnVectorBatch* co case arrow::Type::type::BINARY: return WriteGenericBatch( array, column_vector_batch); + case arrow::Type::type::LARGE_BINARY: + return WriteGenericBatch( + array, column_vector_batch); case arrow::Type::type::STRING: return WriteGenericBatch( array, column_vector_batch); @@ -1379,6 +1382,7 @@ arrow::Result> GetOrcType(const arrow::DataType& ty case arrow::Type::type::STRING: return ::orc::createPrimitiveType(::orc::TypeKind::STRING); case arrow::Type::type::BINARY: + case arrow::Type::type::LARGE_BINARY: return ::orc::createPrimitiveType(::orc::TypeKind::BINARY); case arrow::Type::type::DATE32: return ::orc::createPrimitiveType(::orc::TypeKind::DATE); diff --git a/src/paimon/format/orc/orc_adapter_test.cpp b/src/paimon/format/orc/orc_adapter_test.cpp index 51577e1a6..6813d32c2 100644 --- a/src/paimon/format/orc/orc_adapter_test.cpp +++ b/src/paimon/format/orc/orc_adapter_test.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -181,12 +182,13 @@ TEST_F(OrcAdapterTest, TestGetOrcType) { auto col21_field = arrow::field("col21", arrow::timestamp(arrow::TimeUnit::MILLI, timezone)); auto col22_field = arrow::field("col22", arrow::timestamp(arrow::TimeUnit::MICRO, timezone)); auto col23_field = arrow::field("col23", arrow::timestamp(arrow::TimeUnit::NANO, timezone)); + auto col24_field = arrow::field("col24", arrow::large_binary()); auto arrow_schema = std::make_shared(arrow::FieldVector( {col1_field, col2_field, col3_field, col4_field, col5_field, col6_field, col7_field, col8_field, col9_field, col10_field, col11_field, col12_field, col13_field, col14_field, col15_field, col16_field, col17_field, col18_field, - col19_field, col20_field, col21_field, col22_field, col23_field})); + col19_field, col20_field, col21_field, col22_field, col23_field, col24_field})); ASSERT_OK_AND_ASSIGN(std::unique_ptr<::orc::Type> orc_type, OrcAdapter::GetOrcType(*arrow_schema)); ASSERT_TRUE(orc_type); @@ -196,7 +198,7 @@ TEST_F(OrcAdapterTest, TestGetOrcType) { "array,col14:map,col15:timestamp,col16:struct,col17:timestamp,col18:timestamp,col19:timestamp,col20:timestamp " "with local time zone,col21:timestamp with local time zone,col22:timestamp with local time " - "zone,col23:timestamp with local time zone>", + "zone,col23:timestamp with local time zone,col24:binary>", orc_type->toString()); } @@ -206,11 +208,6 @@ TEST_F(OrcAdapterTest, TestGetOrcTypeWithInvalidArrowType) { auto arrow_schema = arrow::schema(arrow::FieldVector({col1_field})); ASSERT_NOK(OrcAdapter::GetOrcType(*arrow_schema)); } - { - auto col1_field = arrow::field("col1", arrow::large_binary()); - auto arrow_schema = arrow::schema(arrow::FieldVector({col1_field})); - ASSERT_NOK(OrcAdapter::GetOrcType(*arrow_schema)); - } { auto col1_field = arrow::field("col1", arrow::uint32()); auto arrow_schema = arrow::schema(arrow::FieldVector({col1_field})); @@ -567,6 +564,39 @@ TEST_P(OrcAdapterTest, TestAppendBatchWithBinaryForAllNull) { ASSERT_TRUE(converted_array->Equals(src_array)) << converted_array->ToString(); } +TEST_P(OrcAdapterTest, TestWriteBatchWithLargeBinary) { + arrow::FieldVector fields = {arrow::field("f0", arrow::large_binary())}; + auto src_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), R"([ + ["descriptor-1"], + [""], + [null], + ["descriptor-2"] + ])") + .ValueOrDie()); + + auto [orc_reader_holder, read_batch] = GenerateOrcReadBatch(src_array); + auto* struct_batch = dynamic_cast<::orc::StructVectorBatch*>(read_batch.get()); + ASSERT_TRUE(struct_batch); + ASSERT_EQ(1, struct_batch->fields.size()); + + auto* large_binary_batch = dynamic_cast<::orc::StringVectorBatch*>(struct_batch->fields[0]); + ASSERT_TRUE(large_binary_batch); + ASSERT_EQ(4, large_binary_batch->numElements); + + std::vector expected_values = {"descriptor-1", "", "descriptor-2"}; + ASSERT_TRUE(large_binary_batch->notNull[0]); + ASSERT_EQ(expected_values[0], + std::string(large_binary_batch->data[0], large_binary_batch->length[0])); + ASSERT_TRUE(large_binary_batch->notNull[1]); + ASSERT_EQ(expected_values[1], + std::string(large_binary_batch->data[1], large_binary_batch->length[1])); + ASSERT_FALSE(large_binary_batch->notNull[2]); + ASSERT_TRUE(large_binary_batch->notNull[3]); + ASSERT_EQ(expected_values[2], + std::string(large_binary_batch->data[3], large_binary_batch->length[3])); +} + TEST_P(OrcAdapterTest, TestDecimalAndTimestamp) { auto timezone = DateTimeUtils::GetLocalTimezoneName(); arrow::FieldVector fields = { diff --git a/src/paimon/format/orc/orc_file_batch_reader_test.cpp b/src/paimon/format/orc/orc_file_batch_reader_test.cpp index defbf4b55..7e3158c5b 100644 --- a/src/paimon/format/orc/orc_file_batch_reader_test.cpp +++ b/src/paimon/format/orc/orc_file_batch_reader_test.cpp @@ -194,6 +194,50 @@ INSTANTIATE_TEST_SUITE_P(TestParam, OrcFileBatchReaderTest, ::testing::Values(TestParam{128 * 1024, false}, TestParam{16, false}, TestParam{16, true})); +TEST_F(OrcFileBatchReaderTest, TestReadBinaryWrittenFromBinaryAndLargeBinary) { + auto dir = paimon::test::UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + auto file_system = dir->GetFileSystem(); + + auto check_binary_read_result = [&](const std::shared_ptr& write_type, + const std::string& file_name) { + std::string data_json = R"([ + ["descriptor-1"], + [""], + [null], + ["descriptor-2"] + ])"; + auto write_field = arrow::field("f0", write_type); + auto write_schema = arrow::schema({write_field}); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({write_field}), data_json) + .ValueOrDie()); + + std::string file_path = dir->Str() + "/" + file_name; + WriteArray(file_system, file_path, write_array, write_schema, /*options=*/{}); + + auto read_field = arrow::field("f0", arrow::binary()); + arrow::Schema read_schema({read_field}); + auto orc_batch_reader = PrepareOrcFileBatchReader(file_path, &read_schema, batch_size_, + DEFAULT_NATURAL_READ_SIZE); + + ASSERT_OK_AND_ASSIGN(auto c_file_schema, orc_batch_reader->GetFileSchema()); + auto file_schema = arrow::ImportSchema(c_file_schema.get()).ValueOrDie(); + ASSERT_TRUE(file_schema->Equals(read_schema)); + + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({read_field}), data_json) + .ValueOrDie()); + auto expected_chunked_array = std::make_shared(expected_array); + ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( + orc_batch_reader.get())); + ASSERT_TRUE(result_array->Equals(expected_chunked_array)); + }; + + check_binary_read_result(arrow::binary(), "binary.orc"); + check_binary_read_result(arrow::large_binary(), "large-binary.orc"); +} + TEST_F(OrcFileBatchReaderTest, TestSetReadSchema) { std::string file_name = paimon::test::GetDataDir() + "/orc/append_09.db/append_09/f1=10/bucket-1/" diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 6a41d8c78..3b12b0183 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -161,6 +161,52 @@ class ParquetFileBatchReaderTest : public ::testing::Test, std::shared_ptr struct_array_; }; +TEST_F(ParquetFileBatchReaderTest, TestReadBinaryWrittenFromBinaryAndLargeBinary) { + auto check_binary_read_result = [&](const std::shared_ptr& write_type, + const std::string& file_name) { + auto write_field = arrow::field("f0", write_type); + auto write_schema = arrow::schema({write_field}); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({write_field}), R"([ + ["descriptor-1"], + [""], + [null], + ["descriptor-2"] + ])") + .ValueOrDie()); + + std::string file_path = PathUtil::JoinPath(dir_->Str(), file_name); + WriteArray(file_path, write_array, write_schema, /*write_batch_size=*/write_array->length(), + /*enable_dictionary=*/false, /*max_row_group_length=*/write_array->length()); + + auto read_field = arrow::field("f0", arrow::binary()); + auto read_schema = arrow::schema({read_field}); + auto parquet_batch_reader = + PrepareParquetFileBatchReader(file_path, read_schema, /*predicate=*/nullptr, + /*selection_bitmap=*/std::nullopt, batch_size_); + + ASSERT_OK_AND_ASSIGN(auto c_file_schema, parquet_batch_reader->GetFileSchema()); + auto file_schema = arrow::ImportSchema(c_file_schema.get()).ValueOrDie(); + ASSERT_TRUE(file_schema->Equals(*read_schema)); + + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({read_field}), R"([ + ["descriptor-1"], + [""], + [null], + ["descriptor-2"] + ])") + .ValueOrDie()); + auto expected_chunked_array = std::make_shared(expected_array); + ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( + parquet_batch_reader.get())); + ASSERT_TRUE(result_array->Equals(expected_chunked_array)); + }; + + check_binary_read_result(arrow::binary(), "binary.parquet"); + check_binary_read_result(arrow::large_binary(), "large-binary.parquet"); +} + TEST_F(ParquetFileBatchReaderTest, TestSimple) { std::string file_name = paimon::test::GetDataDir() + "/parquet/parquet_append_table.db/parquet_append_table/bucket-0/" diff --git a/src/paimon/testing/utils/test_helper.h b/src/paimon/testing/utils/test_helper.h index 0fccb85ae..93d13161b 100644 --- a/src/paimon/testing/utils/test_helper.h +++ b/src/paimon/testing/utils/test_helper.h @@ -268,15 +268,16 @@ class TestHelper { arrow::Concatenate(read_result->chunks())); PAIMON_ASSIGN_OR_RAISE(auto reconstruct_array, ReconstructBlobArray(concat_array, all_columns_schema)); - PAIMON_ASSIGN_OR_RAISE( - auto separated_array, - BlobUtils::SeparateBlobArray( - std::dynamic_pointer_cast(reconstruct_array))); + PAIMON_ASSIGN_OR_RAISE(auto separated_array, + BlobUtils::SeparateBlobArray( + std::dynamic_pointer_cast(reconstruct_array), + /*inline_fields=*/{})); arrow::EqualOptions equal_options = arrow::EqualOptions::Defaults(); // check main columns - auto separated_schema = BlobUtils::SeparateBlobSchema(all_columns_schema); + auto separated_schema = + BlobUtils::SeparateBlobSchema(all_columns_schema, /*inline_fields=*/{}); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( auto main_expected_array, arrow::ipc::internal::json::ArrayFromJSON( diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index afe308b8f..990fd80c0 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -76,11 +77,18 @@ class RecordBatch; } // namespace paimon namespace paimon::test { + +struct ScanReadResult { + std::unique_ptr batch_reader; + std::shared_ptr chunked_array; +}; + class BlobTableInteTest : public testing::Test, public ::testing::WithParamInterface { public: void SetUp() override { pool_ = GetDefaultPool(); dir_ = UniqueTestDirectory::Create("local"); + blob_dir_ = UniqueTestDirectory::Create("local"); } void TearDown() override { @@ -89,7 +97,13 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter void CreateTable(const std::vector& partition_keys, const std::map& options) const { - auto schema = arrow::schema(fields_); + CreateTable(fields_, partition_keys, options); + } + + void CreateTable(const arrow::FieldVector& fields, + const std::vector& partition_keys, + const std::map& options) const { + auto schema = arrow::schema(fields); ::ArrowSchema c_schema; ASSERT_TRUE(arrow::ExportSchema(*schema, &c_schema).ok()); @@ -160,11 +174,10 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter return file_store_commit->Commit(commit_msgs); } - Status ScanAndRead(const std::string& table_path, const std::vector& read_schema, - const std::shared_ptr& expected_array, - const std::shared_ptr& predicate = nullptr, - const std::vector& row_ranges = {}) const { - // scan + /// Scan table and return the plan (without reading data). + Result> ScanTable(const std::string& table_path, + const std::shared_ptr& predicate = nullptr, + const std::vector& row_ranges = {}) const { ScanContextBuilder scan_context_builder(table_path); scan_context_builder.SetPredicate(predicate); if (!row_ranges.empty()) { @@ -174,47 +187,72 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter PAIMON_ASSIGN_OR_RAISE(auto scan_context, scan_context_builder.Finish()); PAIMON_ASSIGN_OR_RAISE(auto table_scan, TableScan::Create(std::move(scan_context))); PAIMON_ASSIGN_OR_RAISE(auto result_plan, table_scan->CreatePlan()); - if (!expected_array) { - EXPECT_TRUE(result_plan->Splits().empty()); - } + return result_plan; + } - // read - auto splits = result_plan->Splits(); + /// Read from table using a pre-scanned plan, returning the ChunkedArray and batch_reader. + /// The batch_reader must outlive the returned ChunkedArray (array memory depends on reader). + Result ReadTable(const std::string& table_path, + const std::vector& read_schema, + const std::shared_ptr& plan, + const std::shared_ptr& predicate = nullptr, + const std::map& options = {}) const { + auto splits = plan->Splits(); ReadContextBuilder read_context_builder(table_path); read_context_builder.SetReadSchema(read_schema).SetPredicate(predicate); + if (!options.empty()) { + read_context_builder.SetOptions(options); + } PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_context, read_context_builder.Finish()); PAIMON_ASSIGN_OR_RAISE(auto table_read, TableRead::Create(std::move(read_context))); PAIMON_ASSIGN_OR_RAISE(auto batch_reader, table_read->CreateReader(splits)); PAIMON_ASSIGN_OR_RAISE(auto read_result, ReadResultCollector::CollectResult(batch_reader.get())); + return ScanReadResult{std::move(batch_reader), std::move(read_result)}; + } - if (!expected_array) { - EXPECT_FALSE(read_result); - return Status::OK(); - } - // add row kind array for expected array + /// Convenience: scan + read in one call. + Result ScanAndReadResult(const std::string& table_path, + const std::vector& read_schema, + const std::shared_ptr& predicate = nullptr, + const std::vector& row_ranges = {}) const { + PAIMON_ASSIGN_OR_RAISE(auto result_plan, ScanTable(table_path, predicate, row_ranges)); + return ReadTable(table_path, read_schema, result_plan, predicate); + } + + /// Prepend a _VALUE_KIND (Insert) column to a StructArray. + static Result> PrependRowKindColumn( + const std::shared_ptr& array) { auto row_kind_scalar = std::make_shared(RowKind::Insert()->ToByteValue()); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( - auto row_kind_array, - arrow::MakeArrayFromScalar(*row_kind_scalar, expected_array->length())); - arrow::ArrayVector expected_with_row_kind_fields = expected_array->fields(); - std::vector expected_with_row_kind_field_names = - arrow::schema(expected_array->type()->fields())->field_names(); - expected_with_row_kind_fields.insert(expected_with_row_kind_fields.begin(), row_kind_array); - expected_with_row_kind_field_names.insert(expected_with_row_kind_field_names.begin(), - "_VALUE_KIND"); - - // check read result + auto row_kind_array, arrow::MakeArrayFromScalar(*row_kind_scalar, array->length())); + arrow::ArrayVector fields_with_row_kind = array->fields(); + std::vector names_with_row_kind = + arrow::schema(array->type()->fields())->field_names(); + fields_with_row_kind.insert(fields_with_row_kind.begin(), row_kind_array); + names_with_row_kind.insert(names_with_row_kind.begin(), "_VALUE_KIND"); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( - auto expected_with_row_kind_array, - arrow::StructArray::Make(expected_with_row_kind_fields, - expected_with_row_kind_field_names)); - auto expected_chunk_array = - std::make_shared(expected_with_row_kind_array); - EXPECT_TRUE(expected_chunk_array->Equals(read_result)) - << "result:" << read_result->ToString() << std::endl + auto result, arrow::StructArray::Make(fields_with_row_kind, names_with_row_kind)); + return std::dynamic_pointer_cast(result); + } + + Status ScanAndRead(const std::string& table_path, const std::vector& read_schema, + const std::shared_ptr& expected_array, + const std::shared_ptr& predicate = nullptr, + const std::vector& row_ranges = {}) const { + PAIMON_ASSIGN_OR_RAISE(auto scan_read, + ScanAndReadResult(table_path, read_schema, predicate, row_ranges)); + + if (!expected_array) { + EXPECT_FALSE(scan_read.chunked_array); + return Status::OK(); + } + PAIMON_ASSIGN_OR_RAISE(auto expected_with_row_kind, PrependRowKindColumn(expected_array)); + auto expected_chunk_array = std::make_shared(expected_with_row_kind); + EXPECT_TRUE(expected_chunk_array->Equals(scan_read.chunked_array)) + << "result:" << scan_read.chunked_array->ToString() << std::endl << "expected:" << expected_chunk_array->ToString(); return Status::OK(); } @@ -236,9 +274,154 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter .ValueOrDie()); } + /// Convert a StructArray with raw blob bytes into a StructArray with serialized + /// BlobDescriptor bytes. Each raw blob value is written to a temporary file, and + /// the corresponding cell is replaced with the serialized BlobDescriptor pointing + /// to that file. + Result> ConvertRawBlobToDescriptor( + const std::shared_ptr& raw_array, + const std::set& blob_fields) { + auto fs = std::make_shared(); + int64_t num_rows = raw_array->length(); + auto fields = raw_array->type()->fields(); + + arrow::ArrayVector child_arrays; + + for (const auto& field : fields) { + auto col = raw_array->GetFieldByName(field->name()); + if (blob_fields.count(field->name()) == 0) { + child_arrays.push_back(col); + continue; + } + const auto& binary_array = + arrow::internal::checked_cast(*col); + arrow::LargeBinaryBuilder desc_builder; + for (int64_t i = 0; i < num_rows; ++i) { + if (binary_array.IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(desc_builder.AppendNull()); + continue; + } + std::string_view raw_value = binary_array.GetView(i); + std::string file_path = + blob_dir_->Str() + "/blob_" + std::to_string(blob_file_counter_++) + ".bin"; + PAIMON_ASSIGN_OR_RAISE(auto out, fs->Create(file_path, /*overwrite=*/true)); + PAIMON_ASSIGN_OR_RAISE( + auto written, + out->Write(raw_value.data(), static_cast(raw_value.size()))); + PAIMON_RETURN_NOT_OK(out->Flush()); + PAIMON_RETURN_NOT_OK(out->Close()); + if (static_cast(written) != raw_value.size()) { + return Status::Invalid("Short write: expected {}, wrote {}", raw_value.size(), + written); + } + PAIMON_ASSIGN_OR_RAISE(auto blob, Blob::FromPath(file_path)); + auto descriptor = blob->ToDescriptor(pool_); + PAIMON_RETURN_NOT_OK_FROM_ARROW( + desc_builder.Append(descriptor->data(), descriptor->size())); + } + std::shared_ptr desc_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(desc_builder.Finish(&desc_array)); + child_arrays.push_back(desc_array); + } + + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result, + arrow::StructArray::Make(child_arrays, fields)); + return result; + } + + /// Convert a StructArray with serialized BlobDescriptor bytes back to a StructArray + /// with raw blob bytes. Only blob fields are resolved; other columns (including + /// _VALUE_KIND) are kept as-is. + Result> ConvertDescriptorToRawBlob( + const std::shared_ptr& desc_array, + const std::set& blob_fields) const { + auto fs = std::make_shared(); + int64_t num_rows = desc_array->length(); + auto fields = desc_array->type()->fields(); + + arrow::ArrayVector child_arrays; + + for (const auto& field : fields) { + auto col = desc_array->GetFieldByName(field->name()); + if (blob_fields.count(field->name()) == 0) { + child_arrays.push_back(col); + continue; + } + const auto& binary_array = + arrow::internal::checked_cast(*col); + arrow::LargeBinaryBuilder raw_builder; + for (int64_t i = 0; i < num_rows; ++i) { + if (binary_array.IsNull(i)) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.AppendNull()); + continue; + } + std::string_view descriptor_bytes = binary_array.GetView(i); + PAIMON_ASSIGN_OR_RAISE(auto blob, Blob::FromDescriptor(descriptor_bytes.data(), + descriptor_bytes.size())); + PAIMON_ASSIGN_OR_RAISE(auto data, blob->ToData(fs, pool_)); + PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.Append(data->data(), data->size())); + } + std::shared_ptr raw_array_col; + PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.Finish(&raw_array_col)); + child_arrays.push_back(raw_array_col); + } + + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result, + arrow::StructArray::Make(child_arrays, fields)); + return result; + } + + /// Verify DataFileMeta properties from a scan plan. + /// Each vector element corresponds to one expected DataFileMeta (ordered by file index). + static void VerifyDataFileMetas( + const std::shared_ptr& plan, size_t expected_file_count, + const std::vector& expected_row_counts, + const std::vector& expected_min_seqs, + const std::vector& expected_max_seqs, + const std::vector& expected_first_row_ids, + const std::vector>>& expected_write_cols) { + std::vector> all_files; + for (const auto& split : plan->Splits()) { + auto data_split = std::dynamic_pointer_cast(split); + ASSERT_TRUE(data_split); + for (const auto& file : data_split->DataFiles()) { + all_files.push_back(file); + } + } + ASSERT_EQ(all_files.size(), expected_file_count); + ASSERT_EQ(expected_row_counts.size(), expected_file_count); + ASSERT_EQ(expected_min_seqs.size(), expected_file_count); + ASSERT_EQ(expected_max_seqs.size(), expected_file_count); + ASSERT_EQ(expected_first_row_ids.size(), expected_file_count); + ASSERT_EQ(expected_write_cols.size(), expected_file_count); + for (size_t i = 0; i < all_files.size(); ++i) { + const auto& file = all_files[i]; + EXPECT_EQ(file->row_count, expected_row_counts[i]) + << "file[" << i << "] row_count mismatch"; + EXPECT_EQ(file->min_sequence_number, expected_min_seqs[i]) + << "file[" << i << "] min_sequence_number mismatch"; + EXPECT_EQ(file->max_sequence_number, expected_max_seqs[i]) + << "file[" << i << "] max_sequence_number mismatch"; + ASSERT_TRUE(file->first_row_id.has_value()) + << "file[" << i << "] first_row_id should not be null"; + EXPECT_EQ(file->first_row_id.value(), expected_first_row_ids[i]) + << "file[" << i << "] first_row_id mismatch"; + EXPECT_EQ(file->write_cols, expected_write_cols[i]) + << "file[" << i << "] write_cols mismatch, actual: " + << (file->write_cols ? fmt::format("[{}]", fmt::join(*file->write_cols, ", ")) + : "nullopt") + << ", expected: " + << (expected_write_cols[i] + ? fmt::format("[{}]", fmt::join(*expected_write_cols[i], ", ")) + : "nullopt"); + } + } + private: std::shared_ptr pool_; std::unique_ptr dir_; + std::unique_ptr blob_dir_; + int blob_file_counter_ = 0; arrow::FieldVector fields_ = {arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("f1"), arrow::field("f2", arrow::utf8())}; }; @@ -261,142 +444,75 @@ INSTANTIATE_TEST_SUITE_P(FileFormat, BlobTableInteTest, ::testing::ValuesIn(GetTestValuesForBlobTableInteTest())); TEST_P(BlobTableInteTest, TestAppendTableWriteWithBlobAsDescriptorTrue) { - auto dir = UniqueTestDirectory::Create(); arrow::FieldVector fields = {arrow::field("f0", arrow::utf8()), arrow::field("f1", arrow::int32()), BlobUtils::ToArrowField("blob", true)}; - auto schema = arrow::schema(fields); - auto file_format = GetParam(); std::map options = { - {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, file_format}, + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, {Options::BLOB_AS_DESCRIPTOR, "true"}, {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); - ASSERT_OK_AND_ASSIGN( - auto helper, TestHelper::Create(dir->Str(), schema, /*partition_keys=*/{}, - /*primary_keys=*/{}, options, /*is_streaming_mode=*/true)); - int64_t commit_identifier = 0; - - auto generate_blob_array = [&](const std::vector>& blob_descriptors) - -> std::shared_ptr { - arrow::StructBuilder struct_builder( - arrow::struct_(fields), arrow::default_memory_pool(), - {std::make_shared(), std::make_shared(), - std::make_shared()}); - auto string_builder = static_cast(struct_builder.field_builder(0)); - auto int_builder = static_cast(struct_builder.field_builder(1)); - auto binary_builder = - static_cast(struct_builder.field_builder(2)); - for (size_t i = 0; i < blob_descriptors.size(); ++i) { - EXPECT_TRUE(struct_builder.Append().ok()); - EXPECT_TRUE(string_builder->Append("str_" + std::to_string(i)).ok()); - if (i % 3 == 0) { - // test null - EXPECT_TRUE(int_builder->AppendNull().ok()); - } else { - EXPECT_TRUE(int_builder->Append(i).ok()); - } - EXPECT_TRUE( - binary_builder->Append(blob_descriptors[i]->data(), blob_descriptors[i]->size()) - .ok()); - } - std::shared_ptr array; - EXPECT_TRUE(struct_builder.Finish(&array).ok()); - return array; - }; - - // prepare data - std::vector> expected_blob_descriptors; - std::string file1 = paimon::test::GetDataDir() + "/avro/data/avro_with_null"; - ASSERT_OK_AND_ASSIGN(auto blob1, Blob::FromPath(file1)); - expected_blob_descriptors.emplace_back(blob1->ToDescriptor(pool_)); - - std::string file2 = paimon::test::GetDataDir() + "/xxhash.data"; - ASSERT_OK_AND_ASSIGN(auto blob2, Blob::FromPath(file2, /*offset=*/0, /*length=*/91)); - expected_blob_descriptors.emplace_back(blob2->ToDescriptor(pool_)); - ASSERT_OK_AND_ASSIGN(auto blob3, Blob::FromPath(file2, /*offset=*/92, /*length=*/85)); - expected_blob_descriptors.emplace_back(blob3->ToDescriptor(pool_)); - ASSERT_OK_AND_ASSIGN(auto blob4, Blob::FromPath(file2, /*offset=*/300, /*length=*/3000)); - expected_blob_descriptors.emplace_back(blob4->ToDescriptor(pool_)); - - auto array = generate_blob_array(expected_blob_descriptors); - ::ArrowArray arrow_array; - ASSERT_TRUE(arrow::ExportArray(*array, &arrow_array).ok()); - RecordBatchBuilder batch_builder(&arrow_array); - ASSERT_OK_AND_ASSIGN(std::unique_ptr batch, batch_builder.Finish()); + // prepare data: input uses plain raw blob bytes for readability + std::string raw_json = R"([ + ["str_0", null, "hello_blob_0"], + ["str_1", 1, "blob_data_1"], + ["str_2", 2, "blob_data_2"], + ["str_3", null, "blob_data_3"] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, ConvertRawBlobToDescriptor(raw_array, {"blob"})); + // write descriptor array + auto schema = arrow::schema(fields); ASSERT_OK_AND_ASSIGN(auto commit_msgs, - helper->WriteAndCommit(std::move(batch), commit_identifier++, - /*expected_commit_messages=*/std::nullopt)); - - arrow::FieldVector fields_with_row_kind = fields; - fields_with_row_kind.insert(fields_with_row_kind.begin(), - arrow::field("_VALUE_KIND", arrow::int8())); - auto schema_with_row_kind = arrow::schema(fields_with_row_kind); - ASSERT_OK_AND_ASSIGN(std::vector> data_splits, - helper->NewScan(StartupMode::LatestFull(), /*snapshot_id=*/std::nullopt)); - std::string expected_data = R"([ - [0, "str_0", null], - [0, "str_1", 1], - [0, "str_2", 2], - [0, "str_3", null] - ])"; - ASSERT_OK_AND_ASSIGN(bool success, helper->ReadAndCheckResultForBlobTable( - schema_with_row_kind, data_splits, expected_data, - expected_blob_descriptors)); - ASSERT_TRUE(success); + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // read result contains descriptors pointing to paimon internal blob files + // resolve descriptors back to raw bytes, then prepend _VALUE_KIND and compare + ASSERT_OK_AND_ASSIGN(auto result, ScanAndReadResult(table_path, schema->field_names())); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"blob"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); } TEST_P(BlobTableInteTest, TestAppendTableWriteWithBlobAsDescriptorFalse) { - auto dir = UniqueTestDirectory::Create(); arrow::FieldVector fields = {arrow::field("f0", arrow::utf8()), arrow::field("f1", arrow::int32()), BlobUtils::ToArrowField("blob", true)}; - auto schema = arrow::schema(fields); - auto file_format = GetParam(); std::map options = { - {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, file_format}, + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, {Options::BLOB_AS_DESCRIPTOR, "false"}, {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); - ASSERT_OK_AND_ASSIGN( - auto helper, TestHelper::Create(dir->Str(), schema, /*partition_keys=*/{}, - /*primary_keys=*/{}, options, /*is_streaming_mode=*/true)); - int64_t commit_identifier = 0; - - std::string data = R"([ + std::string data_json = R"([ ["str_0", null, "apple"], ["str_1", 1, "banana"], ["str_2", 2, "cat"], ["str_3", null, "dog"] ])"; - ASSERT_OK_AND_ASSIGN(std::unique_ptr batch, - TestHelper::MakeRecordBatch(arrow::struct_(fields), data, - /*partition_map=*/{}, /*bucket=*/0, {})); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), data_json).ValueOrDie()); + auto schema = arrow::schema(fields); ASSERT_OK_AND_ASSIGN(auto commit_msgs, - helper->WriteAndCommit(std::move(batch), commit_identifier++, - /*expected_commit_messages=*/std::nullopt)); - - arrow::FieldVector fields_with_row_kind = fields; - fields_with_row_kind.insert(fields_with_row_kind.begin(), - arrow::field("_VALUE_KIND", arrow::int8())); - auto data_type = arrow::struct_(fields_with_row_kind); - ASSERT_OK_AND_ASSIGN(std::vector> data_splits, - helper->NewScan(StartupMode::LatestFull(), /*snapshot_id=*/std::nullopt)); - std::string expected_data = R"([ - [0, "str_0", null, "apple"], - [0, "str_1", 1, "banana"], - [0, "str_2", 2, "cat"], - [0, "str_3", null, "dog"] - ])"; - ASSERT_OK_AND_ASSIGN(bool success, - helper->ReadAndCheckResult(data_type, data_splits, expected_data)); - ASSERT_TRUE(success); + WriteArray(table_path, {}, schema->field_names(), {write_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // BLOB_AS_DESCRIPTOR=false: blob data is stored inline, read result should match input + ASSERT_OK(ScanAndRead(table_path, schema->field_names(), write_array)); } TEST_P(BlobTableInteTest, TestBasic) { @@ -1298,7 +1414,6 @@ TEST_P(BlobTableInteTest, TestWithRowIdsForMultipleBlobFiles) { {Options::BUCKET, "-1"}, {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, - {Options::BLOB_AS_DESCRIPTOR, "false"}, {Options::FILE_SYSTEM, "local"}}; CreateTable(/*partition_keys=*/{}, options); std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); @@ -1398,107 +1513,60 @@ TEST_P(BlobTableInteTest, TestWithRowIdsForMultipleBlobFiles) { } TEST_P(BlobTableInteTest, TestAppendTableWriteWithMultipleBlobFields) { - auto dir = UniqueTestDirectory::Create(); arrow::FieldVector fields = { arrow::field("f0", arrow::utf8()), arrow::field("f1", arrow::int32()), BlobUtils::ToArrowField("blob1", true), BlobUtils::ToArrowField("blob2", true)}; - auto schema = arrow::schema(fields); - auto file_format = GetParam(); std::map options = { - {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, file_format}, + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, - {Options::BLOB_AS_DESCRIPTOR, "false"}, {Options::FILE_SYSTEM, "local"}}; - - ASSERT_OK_AND_ASSIGN( - auto helper, TestHelper::Create(dir->Str(), schema, /*partition_keys=*/{}, - /*primary_keys=*/{}, options, /*is_streaming_mode=*/true)); - int64_t commit_identifier = 0; + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); - std::string data = R"([ + std::string data_json = R"([ ["str_0", null, "apple", "red"], ["str_1", 1, "banana", "yellow"], ["str_2", 2, "cat", "black"] ])"; - ASSERT_OK_AND_ASSIGN(std::unique_ptr batch, - TestHelper::MakeRecordBatch(arrow::struct_(fields), data, - /*partition_map=*/{}, /*bucket=*/0, {})); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), data_json).ValueOrDie()); + auto schema = arrow::schema(fields); ASSERT_OK_AND_ASSIGN(auto commit_msgs, - helper->WriteAndCommit(std::move(batch), commit_identifier++, - /*expected_commit_messages=*/std::nullopt)); - ASSERT_EQ(commit_msgs.size(), 1); - - ASSERT_OK_AND_ASSIGN(std::optional snapshot, helper->LatestSnapshot()); - ASSERT_TRUE(snapshot); - ASSERT_EQ(1, snapshot.value().Id()); - ASSERT_EQ(3, snapshot.value().NextRowId().value()); - - // Scan and read: verify all fields including multiple blob fields - arrow::FieldVector fields_with_row_kind = fields; - fields_with_row_kind.insert(fields_with_row_kind.begin(), - arrow::field("_VALUE_KIND", arrow::int8())); - auto data_type = arrow::struct_(fields_with_row_kind); - ASSERT_OK_AND_ASSIGN(std::vector> data_splits, - helper->NewScan(StartupMode::LatestFull(), /*snapshot_id=*/std::nullopt)); - std::string expected_data = R"([ - [0, "str_0", null, "apple", "red"], - [0, "str_1", 1, "banana", "yellow"], - [0, "str_2", 2, "cat", "black"] - ])"; - ASSERT_OK_AND_ASSIGN(bool success, - helper->ReadAndCheckResult(data_type, data_splits, expected_data)); - ASSERT_TRUE(success); + WriteArray(table_path, {}, schema->field_names(), {write_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + ASSERT_OK(ScanAndRead(table_path, schema->field_names(), write_array)); } TEST_P(BlobTableInteTest, TestAppendWriteWithNullBlob) { - auto dir = UniqueTestDirectory::Create(); arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("blob", true)}; - auto schema = arrow::schema(fields); - auto file_format = GetParam(); std::map options = {{Options::MANIFEST_FORMAT, "orc"}, - {Options::FILE_FORMAT, file_format}, + {Options::FILE_FORMAT, GetParam()}, {Options::BUCKET, "-1"}, {Options::FILE_SYSTEM, "local"}, {Options::ROW_TRACKING_ENABLED, "true"}, - {Options::DATA_EVOLUTION_ENABLED, "true"}, - {Options::BLOB_AS_DESCRIPTOR, "false"}}; - - ASSERT_OK_AND_ASSIGN( - auto helper, TestHelper::Create(dir->Str(), schema, /*partition_keys=*/{}, - /*primary_keys=*/{}, options, /*is_streaming_mode=*/true)); + {Options::DATA_EVOLUTION_ENABLED, "true"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); // Write: row 0 non-null blob, row 1 null blob, row 2 non-null blob - std::string data = R"([ + std::string data_json = R"([ [1, "hello"], [2, null], [3, "world"] ])"; - ASSERT_OK_AND_ASSIGN(std::unique_ptr batch, - TestHelper::MakeRecordBatch(arrow::struct_(fields), data, - /*partition_map=*/{}, /*bucket=*/0, {})); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), data_json).ValueOrDie()); + + auto schema = arrow::schema(fields); ASSERT_OK_AND_ASSIGN(auto commit_msgs, - helper->WriteAndCommit(std::move(batch), /*commit_identifier=*/0, - /*expected_commit_messages=*/std::nullopt)); - - // Read and verify - arrow::FieldVector fields_with_row_kind = fields; - fields_with_row_kind.insert(fields_with_row_kind.begin(), - arrow::field("_VALUE_KIND", arrow::int8())); - auto data_type = arrow::struct_(fields_with_row_kind); - ASSERT_OK_AND_ASSIGN(std::vector> data_splits, - helper->NewScan(StartupMode::LatestFull(), /*snapshot_id=*/std::nullopt)); - std::string expected_data = R"([ - [0, 1, "hello"], - [0, 2, null], - [0, 3, "world"] - ])"; - ASSERT_OK_AND_ASSIGN(bool success, - helper->ReadAndCheckResult(data_type, data_splits, expected_data)); - ASSERT_TRUE(success); + WriteArray(table_path, {}, schema->field_names(), {write_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + ASSERT_OK(ScanAndRead(table_path, schema->field_names(), write_array)); } TEST_P(BlobTableInteTest, TestReadTableWithMultiBlobFields) { @@ -1574,4 +1642,731 @@ TEST_P(BlobTableInteTest, TestReadTableWithMultiBlobFields) { } } +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWithoutExternalStorage) { + if (GetParam() == "lance") { + return; + } + // Two blob fields configured via BLOB_DESCRIPTOR_FIELD, no external storage. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Input uses plain raw bytes for readability + std::string raw_json = R"([ + [1, "image_data_0", "video_data_0"], + [2, "image_data_1", "video_data_1"], + [3, "image_data_2", "video_data_2"] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, ConvertRawBlobToDescriptor(raw_array, {"b0", "b1"})); + + // write descriptor array + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Scan and verify DataFileMeta: no external storage -> write_cols should be nullopt + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/1, /*expected_row_counts=*/{3}, + /*expected_min_seqs=*/{1}, /*expected_max_seqs=*/{1}, + /*expected_first_row_ids=*/{0}, + /*expected_write_cols=*/{std::nullopt}); + + // Read and resolve descriptors back to raw bytes + std::map read_options = {}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // Descriptor bytes should be unchanged (inline, not repacked) + ASSERT_TRUE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + ASSERT_TRUE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWithExternalStorage) { + if (GetParam() == "lance") { + return; + } + // Two blob fields configured via BLOB_DESCRIPTOR_FIELD + BLOB_EXTERNAL_STORAGE_FIELD + // with BLOB_EXTERNAL_STORAGE_PATH pointing to blob_dir_. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Input uses plain raw bytes for readability + std::string raw_json = R"([ + [1, "image_data_0", "video_data_0"], + [2, "image_data_1", "video_data_1"], + [3, "image_data_2", "video_data_2"] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, ConvertRawBlobToDescriptor(raw_array, {"b0", "b1"})); + + // write descriptor array + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Scan and verify DataFileMeta: with external storage -> write_cols should be explicit + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/1, /*expected_row_counts=*/{3}, + /*expected_min_seqs=*/{1}, /*expected_max_seqs=*/{1}, + /*expected_first_row_ids=*/{0}, + /*expected_write_cols=*/{std::vector{"f0", "b0", "b1"}}); + + // Read and resolve descriptors back to raw bytes + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "true"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // Descriptor bytes should differ (repacked by external storage) + ASSERT_FALSE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + ASSERT_FALSE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorage) { + if (GetParam() == "lance") { + return; + } + // 4 blob fields: b0,b1 have external storage, b2,b3 are descriptor-only (no external storage). + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1,b2,b3"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Input uses plain raw bytes for readability; some blob fields are null + std::string raw_json = R"([ + [1, "img_0", null, "doc_0", "log_0"], + [2, null, "vid_1", null, "log_1"], + [3, "img_2", "vid_2", "doc_2", null ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, + ConvertRawBlobToDescriptor(raw_array, {"b0", "b1", "b2", "b3"})); + + // write descriptor array + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Scan and verify DataFileMeta: external storage on b0,b1 -> write_cols should be explicit + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas( + plan, /*expected_file_count=*/1, /*expected_row_counts=*/{3}, + /*expected_min_seqs=*/{1}, /*expected_max_seqs=*/{1}, + /*expected_first_row_ids=*/{0}, + /*expected_write_cols=*/{std::vector{"f0", "b0", "b1", "b2", "b3"}}); + + // Read and resolve all descriptors back to raw bytes + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "true"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1", "b2", "b3"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // b0,b1 repacked by external storage, should differ + ASSERT_FALSE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + ASSERT_FALSE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); + // b2,b3 inline descriptor, should match + ASSERT_TRUE(read_struct->GetFieldByName("b2")->Equals(desc_array->GetFieldByName("b2"))); + ASSERT_TRUE(read_struct->GetFieldByName("b3")->Equals(desc_array->GetFieldByName("b3"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialInline) { + if (GetParam() == "lance") { + return; + } + // 4 blob fields: b0,b1 are descriptor (inline), b2,b3 are regular blob (written to .blob + // files). No external storage. + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Input uses plain raw bytes: + // b0: all non-null, b1: has nulls, b2: all non-null, b3: has nulls + std::string raw_json = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ], + [3, "img_2", null, "raw_2_2", "raw_3_2" ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, + ConvertRawBlobToDescriptor(raw_array, {"b0", "b1", "b2", "b3"})); + + // write: b0,b1 as descriptor bytes; b2,b3 as raw bytes (paimon writes them to .blob files) + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Scan and verify DataFileMeta: b2,b3 go to .blob files, "f0", "b0", "b1" go to main files. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/3, /*expected_row_counts=*/{3, 3, 3}, + /*expected_min_seqs=*/{1, 1, 1}, /*expected_max_seqs=*/{1, 1, 1}, + /*expected_first_row_ids=*/{0, 0, 0}, + /*expected_write_cols=*/ + {std::vector{"f0", "b0", "b1"}, std::vector{"b2"}, + std::vector{"b3"}}); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "true"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // b0,b1 inline descriptor (not repacked), should match input + ASSERT_TRUE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + ASSERT_TRUE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); + + // Resolve b0,b1 descriptors back to raw bytes, then compare full struct + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1", "b2", "b3"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageRepack) { + if (GetParam() == "lance") { + return; + } + // 4 blob fields: b0,b1 are descriptor + external-storage-field WITH external-storage-path. + // b2,b3 are regular blob (written to .blob files). + // All blob descriptors get repacked by external storage or .blob writer. + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // b0: all non-null, b1: has nulls, b2: all non-null, b3: has nulls + std::string raw_json = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ], + [3, "img_2", null, "raw_2_2", "raw_3_2" ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, + ConvertRawBlobToDescriptor(raw_array, {"b0", "b1", "b2", "b3"})); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // b0,b1 repacked to external storage; b2,b3 go to .blob files. + // Main file contains f0,b0,b1; .blob files for b2 and b3. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/3, /*expected_row_counts=*/{3, 3, 3}, + /*expected_min_seqs=*/{1, 1, 1}, /*expected_max_seqs=*/{1, 1, 1}, + /*expected_first_row_ids=*/{0, 0, 0}, + /*expected_write_cols=*/ + {std::vector{"f0", "b0", "b1"}, std::vector{"b2"}, + std::vector{"b3"}}); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "true"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // Resolve descriptors back to raw bytes and compare + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1", "b2", "b3"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // All blob columns should differ from input desc_array (all repacked) + ASSERT_FALSE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + ASSERT_FALSE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); + ASSERT_FALSE(read_struct->GetFieldByName("b2")->Equals(desc_array->GetFieldByName("b2"))); + ASSERT_FALSE(read_struct->GetFieldByName("b3")->Equals(desc_array->GetFieldByName("b3"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageSingleField) { + if (GetParam() == "lance") { + return; + } + // 4 blob fields: b0,b1 are descriptor; only b1 has external storage. + // b2,b3 are regular blob (written to .blob files). + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // b0: all non-null, b1: has nulls, b2: all non-null, b3: has nulls + std::string raw_json = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ], + [3, "img_2", null, "raw_2_2", "raw_3_2" ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, + ConvertRawBlobToDescriptor(raw_array, {"b0", "b1", "b2", "b3"})); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // b1 repacked to external storage; b2,b3 go to .blob files; b0 stays inline in main file. + // Main file contains f0,b0,b1; .blob files for b2 and b3. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/3, /*expected_row_counts=*/{3, 3, 3}, + /*expected_min_seqs=*/{1, 1, 1}, /*expected_max_seqs=*/{1, 1, 1}, + /*expected_first_row_ids=*/{0, 0, 0}, + /*expected_write_cols=*/ + {std::vector{"f0", "b0", "b1"}, std::vector{"b2"}, + std::vector{"b3"}}); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "true"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // Resolve all descriptors back to raw bytes and compare + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1", "b2", "b3"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // b0 is inline descriptor (not repacked), should match input + ASSERT_TRUE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); + // b1 is repacked by external storage, should differ + ASSERT_FALSE(read_struct->GetFieldByName("b1")->Equals(desc_array->GetFieldByName("b1"))); + // b2,b3 are repacked by .blob writer, should differ + ASSERT_FALSE(read_struct->GetFieldByName("b2")->Equals(desc_array->GetFieldByName("b2"))); + ASSERT_FALSE(read_struct->GetFieldByName("b3")->Equals(desc_array->GetFieldByName("b3"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageNoAsDescriptor) { + if (GetParam() == "lance") { + return; + } + // Same as TestBlobDescriptorFieldPartialExternalStorageSingleField but without + // BLOB_AS_DESCRIPTOR in table options. Only b0 is explicitly converted to descriptor before + // write. b1 is written as raw bytes but still configured as descriptor field, so paimon should + // auto-convert it to descriptor internally (write auto-detects descriptor via magic header). + // After read with BLOB_AS_DESCRIPTOR=true, b0 and b1 are both stored as descriptor. + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // b0: all non-null, b1: has nulls, b2: all non-null, b3: has nulls + std::string raw_json = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ], + [3, "img_2", null, "raw_2_2", "raw_3_2" ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + // Only convert b0 to descriptor; b1,b2,b3 remain as raw bytes + ASSERT_OK_AND_ASSIGN(auto desc_array, ConvertRawBlobToDescriptor(raw_array, {"b0"})); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {desc_array})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // b1 repacked to external storage; b2,b3 go to .blob files; b0 stays inline in main file. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + VerifyDataFileMetas(plan, /*expected_file_count=*/3, /*expected_row_counts=*/{3, 3, 3}, + /*expected_min_seqs=*/{1, 1, 1}, /*expected_max_seqs=*/{1, 1, 1}, + /*expected_first_row_ids=*/{0, 0, 0}, + /*expected_write_cols=*/ + {std::vector{"f0", "b0", "b1"}, std::vector{"b2"}, + std::vector{"b3"}}); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + + // b0 is inline descriptor (not repacked), should match input desc_array + ASSERT_TRUE(read_struct->GetFieldByName("b0")->Equals(desc_array->GetFieldByName("b0"))); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorMultiCommitAndShuffledReadSchema) { + if (GetParam() == "lance") { + return; + } + // Similar to TestBlobDescriptorFieldPartialExternalStorageNoAsDescriptor but: + // 1. Multiple write+commit rounds + // 2. Read schema is shuffled: b3, b2, b1, b0, f0 + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + auto schema = arrow::schema(fields); + + // --- First write+commit --- + std::string raw_json_1 = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ] + ])"; + auto raw_array_1 = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json_1).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array_1, ConvertRawBlobToDescriptor(raw_array_1, {"b0"})); + ASSERT_OK_AND_ASSIGN(auto commit_msgs_1, + WriteArray(table_path, {}, schema->field_names(), {desc_array_1})); + ASSERT_OK(Commit(table_path, commit_msgs_1)); + + // --- Second write+commit --- + std::string raw_json_2 = R"([ + [3, "img_2", "vid_2", "raw_2_2", "raw_3_2"], + [4, null, "vid_3", "raw_2_3", "raw_3_3"] + ])"; + auto raw_array_2 = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json_2).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array_2, ConvertRawBlobToDescriptor(raw_array_2, {"b0"})); + ASSERT_OK_AND_ASSIGN(auto commit_msgs_2, + WriteArray(table_path, {}, schema->field_names(), {desc_array_2})); + ASSERT_OK(Commit(table_path, commit_msgs_2)); + + // --- Third write+commit --- + std::string raw_json_3 = R"([ + [5, "img_4", null, "raw_2_4", null ], + [6, "img_5", "vid_5", null, "raw_3_5"] + ])"; + auto raw_array_3 = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json_3).ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array_3, ConvertRawBlobToDescriptor(raw_array_3, {"b0"})); + ASSERT_OK_AND_ASSIGN(auto commit_msgs_3, + WriteArray(table_path, {}, schema->field_names(), {desc_array_3})); + ASSERT_OK(Commit(table_path, commit_msgs_3)); + + // --- Read with shuffled schema: b3, b2, b1, b0, f0 --- + std::vector shuffled_read_schema = {"b3", "b2", "b1", "b0", "f0"}; + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, shuffled_read_schema, plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // Build expected array in shuffled order from all 3 batches + arrow::FieldVector shuffled_fields = { + BlobUtils::ToArrowField("b3", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b0", true), + arrow::field("f0", arrow::int32())}; + std::string expected_json = R"([ + ["raw_3_0", "raw_2_0", null, "img_0", 1], + [null, "raw_2_1", "vid_1", "img_1", 2], + ["raw_3_2", "raw_2_2", "vid_2", "img_2", 3], + ["raw_3_3", "raw_2_3", "vid_3", null, 4], + [null, "raw_2_4", null, "img_4", 5], + ["raw_3_5", null, "vid_5", "img_5", 6] + ])"; + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(shuffled_fields), expected_json) + .ValueOrDie()); + + // Resolve descriptors (b0, b1 are descriptor fields) back to raw bytes + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); +} + +TEST_P(BlobTableInteTest, TestDataEvolutionWithBlobDescriptorField) { + if (GetParam() == "lance") { + return; + } + // Test DataEvolution (split-column write) combined with blob descriptor fields. + // Schema: f0(int32), b0(blob descriptor inline), b1(blob descriptor+external), b2(blob), + // b3(blob) Commit 1: file A writes (f0, b2, b3) Commit 2: file B writes (f0, b0, b1) with + // SetFirstRowId(0) -> merges with commit 1 Commit 3: file A writes (f0, b0, b1, b3) Commit 4: + // file B writes (b0, b1, b3) with SetFirstRowId(3) -> merges with commit 3 Duplicate columns + // (b0, b1, b3) in commit 4 - newer file (B) takes precedence. + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // --- Commit 1: file A (f0, b2, b3), Commit 2: file B (f0, b0, b1) SetFirstRowId(0) --- + std::string file_a1_json = R"([ + [1, "raw_2_0", "raw_3_0"], + [2, "raw_2_1", null ], + [3, null, "raw_3_2"] + ])"; + arrow::FieldVector file_a1_fields = {fields[0], fields[3], fields[4]}; + auto file_a1_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(file_a1_fields), file_a1_json) + .ValueOrDie()); + + std::string file_b1_json = R"([ + [1, "img_0", "vid_0"], + [2, "img_1", null ], + [3, "img_2", "vid_2"] + ])"; + arrow::FieldVector file_b1_fields = {fields[0], fields[1], fields[2]}; + auto file_b1_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(file_b1_fields), file_b1_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto file_b1_desc, ConvertRawBlobToDescriptor(file_b1_array, {"b0"})); + + ASSERT_OK_AND_ASSIGN(auto commit_msgs_a1, + WriteArray(table_path, {}, {"f0", "b2", "b3"}, {file_a1_array})); + ASSERT_OK(Commit(table_path, commit_msgs_a1)); + + ASSERT_OK_AND_ASSIGN(auto commit_msgs_b1, + WriteArray(table_path, {}, {"f0", "b0", "b1"}, {file_b1_desc})); + SetFirstRowId(0, commit_msgs_b1); + ASSERT_OK(Commit(table_path, commit_msgs_b1)); + + // --- Commit 3: file A (f0, b0, b1, b3), Commit 4: file B (b0, b1, b3) SetFirstRowId(3) --- + // Duplicate cols b0, b1, b3: file B (commit 4, newer) takes precedence. + std::string file_a2_json = R"([ + [4, "img_3_old", "vid_3_old", "raw_3_3_old"], + [5, null, "vid_4_old", "raw_3_4_old"], + [6, "img_5_old", null, null ] + ])"; + arrow::FieldVector file_a2_fields = {fields[0], fields[1], fields[2], fields[4]}; + auto file_a2_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(file_a2_fields), file_a2_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto file_a2_desc, ConvertRawBlobToDescriptor(file_a2_array, {"b0"})); + + std::string file_b2_json = R"([ + ["img_3", "vid_3", "raw_3_3"], + [null, "vid_4", "raw_3_4"], + ["img_5", null, null ] + ])"; + arrow::FieldVector file_b2_fields = {fields[1], fields[2], fields[4]}; + auto file_b2_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(file_b2_fields), file_b2_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto file_b2_desc, ConvertRawBlobToDescriptor(file_b2_array, {"b0"})); + + ASSERT_OK_AND_ASSIGN(auto commit_msgs_a2, + WriteArray(table_path, {}, {"f0", "b0", "b1", "b3"}, {file_a2_desc})); + ASSERT_OK(Commit(table_path, commit_msgs_a2)); + + ASSERT_OK_AND_ASSIGN(auto commit_msgs_b2, + WriteArray(table_path, {}, {"b0", "b1", "b3"}, {file_b2_desc})); + SetFirstRowId(3, commit_msgs_b2); + ASSERT_OK(Commit(table_path, commit_msgs_b2)); + + // --- Read all data with full schema --- + // Round 2: b0, b1, b3 come from file B (newer), f0 from file A, b2 not written -> null + std::vector read_schema = {"f0", "b0", "b1", "b2", "b3"}; + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, read_schema, plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_EQ(read_struct->length(), 6); + + // Expected: round1 all columns present; round2 b2=null, b0/b1/b3 from file B (newer) + std::string expected_json = R"([ + [1, "img_0", "vid_0", "raw_2_0", "raw_3_0"], + [2, "img_1", null, "raw_2_1", null ], + [3, "img_2", "vid_2", null, "raw_3_2" ], + [4, "img_3", "vid_3", null, "raw_3_3" ], + [5, null, "vid_4", null, "raw_3_4" ], + [6, "img_5", null, null, null ] + ])"; + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), expected_json) + .ValueOrDie()); + + // Resolve descriptors back to raw bytes + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); + ASSERT_TRUE(resolved->type()->Equals(expected_with_rk->type())) + << resolved->type()->ToString() << std::endl + << expected_with_rk->type()->ToString(); + ASSERT_TRUE(resolved->Equals(expected_with_rk)) << resolved->ToString() << std::endl + << expected_with_rk->ToString(); +} + +TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWriteRawBytesDirectly) { + if (GetParam() == "lance") { + return; + } + // Similar to TestBlobDescriptorFieldWithoutExternalStorage but writes raw bytes directly + // without converting to descriptor first. The writer should auto-detect that the data + // is NOT a descriptor (no magic header) and handle it accordingly. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true)}; + + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, {Options::FILE_FORMAT, GetParam()}, + {Options::TARGET_FILE_SIZE, "700"}, {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Write raw bytes directly (no ConvertRawBlobToDescriptor) + std::string raw_json = R"([ + [1, "image_data_0", "video_data_0"], + [2, "image_data_1", "video_data_1"], + [3, "image_data_2", "video_data_2"] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + + auto schema = arrow::schema(fields); + ASSERT_NOK_WITH_MSG( + WriteArray(table_path, {}, schema->field_names(), {raw_array}), + "BLOB inline fields configured by blob-descriptor-field or blob-view-field require values " + "to be a BlobDescriptor or BlobViewStruct."); +} + } // namespace paimon::test From 7691ce06a44f93943b9675d5b2727340c3006587 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 25 May 2026 19:02:41 +0800 Subject: [PATCH 2/9] fix precommit --- src/paimon/core/casting/binary_to_blob_cast_executor.cpp | 2 +- src/paimon/core/casting/binary_to_blob_cast_executor.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.cpp b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp index 878e12d3c..d160ba14c 100644 --- a/src/paimon/core/casting/binary_to_blob_cast_executor.cpp +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp @@ -78,4 +78,4 @@ Result> BinaryToBlobCastExecutor::Cast( return arrow::MakeArray(array_data); } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.h b/src/paimon/core/casting/binary_to_blob_cast_executor.h index e12f365d7..111a5bc71 100644 --- a/src/paimon/core/casting/binary_to_blob_cast_executor.h +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.h @@ -39,4 +39,4 @@ class BinaryToBlobCastExecutor : public CastExecutor { arrow::MemoryPool* pool) const override; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon From 9b0d4d241eab714966c17a9dfd7426ae9ce359f4 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 25 May 2026 21:06:39 +0800 Subject: [PATCH 3/9] fix clang tidy --- src/paimon/core/io/external_storage_blob_writer.cpp | 2 -- src/paimon/core/io/external_storage_blob_writer.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/paimon/core/io/external_storage_blob_writer.cpp b/src/paimon/core/io/external_storage_blob_writer.cpp index 339cf04f2..a8f2be20f 100644 --- a/src/paimon/core/io/external_storage_blob_writer.cpp +++ b/src/paimon/core/io/external_storage_blob_writer.cpp @@ -56,8 +56,6 @@ ExternalStorageBlobWriter::ExternalStorageBlobWriter( options_(options), logger_(Logger::GetLogger("ExternalStorageBlobWriter")) {} -ExternalStorageBlobWriter::~ExternalStorageBlobWriter() {} - Result> ExternalStorageBlobWriter::CreateFieldRollingWriter(FieldWriter* field_writer) { auto field = write_schema_->GetFieldByName(field_writer->field_name); diff --git a/src/paimon/core/io/external_storage_blob_writer.h b/src/paimon/core/io/external_storage_blob_writer.h index c5cc10282..7b144b1c7 100644 --- a/src/paimon/core/io/external_storage_blob_writer.h +++ b/src/paimon/core/io/external_storage_blob_writer.h @@ -65,8 +65,6 @@ class ExternalStorageBlobWriter { const std::shared_ptr& memory_pool, const CoreOptions& options); - ~ExternalStorageBlobWriter(); - /// Transforms a batch by writing external storage fields to .blob files and replacing /// the BLOB values with serialized BlobDescriptor bytes. Result> TransformBatch( From 260ab0a1901e78ac03da4a66eb060f5820c37105 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Tue, 26 May 2026 20:20:14 +0800 Subject: [PATCH 4/9] fix --- src/paimon/common/data/blob_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index ca0f7923e..cbd0113b0 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -35,7 +35,7 @@ BlobUtils::SeparatedSchemas BlobUtils::SeparateBlobSchema( const std::shared_ptr& schema, const std::set& inline_fields) { std::vector> main_fields; std::vector> blob_fields; - for (int i = 0; i < schema->num_fields(); i++) { + for (int32_t i = 0; i < schema->num_fields(); i++) { auto field = schema->field(i); if (IsBlobField(field) && inline_fields.count(field->name()) == 0) { // Non-inline BLOB -> goes to blob file From ab407727e37b390663e3647d9053eba8a1377fec Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Wed, 27 May 2026 11:37:49 +0800 Subject: [PATCH 5/9] complete error message --- src/paimon/common/data/blob_utils.cpp | 7 ++++--- src/paimon/common/data/blob_utils_test.cpp | 18 ++++++++++++------ test/inte/blob_table_inte_test.cpp | 8 ++++---- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index cbd0113b0..0b29d05a5 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -149,9 +149,10 @@ Status BlobUtils::ValidateInlineBlobDescriptors( PAIMON_ASSIGN_OR_RAISE(bool is_descriptor, BlobDescriptor::IsBlobDescriptor(value.data(), value.size())); if (!is_descriptor) { - return Status::Invalid( - "BLOB inline fields configured by blob-descriptor-field or blob-view-field " - "require values to be a BlobDescriptor or BlobViewStruct."); + return Status::Invalid(fmt::format( + "BLOB inline field {} configured by blob-descriptor-field or blob-view-field " + "require values to be a BlobDescriptor or BlobViewStruct.", + field_name)); } } } diff --git a/src/paimon/common/data/blob_utils_test.cpp b/src/paimon/common/data/blob_utils_test.cpp index 8d0e507b8..268ed8c70 100644 --- a/src/paimon/common/data/blob_utils_test.cpp +++ b/src/paimon/common/data/blob_utils_test.cpp @@ -275,8 +275,10 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithRawBytes) { auto struct_array = arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), - "BLOB inline fields configured by blob-descriptor-field"); + ASSERT_NOK_WITH_MSG( + BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline field b0 configured by blob-descriptor-field or blob-view-field " + "require values to be a BlobDescriptor or BlobViewStruct."); } TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMixedValidAndInvalid) { @@ -292,8 +294,10 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMixedValidAndInvalid) { auto struct_array = arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), - "BLOB inline fields configured by blob-descriptor-field"); + ASSERT_NOK_WITH_MSG( + BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline field b0 configured by blob-descriptor-field or blob-view-field " + "require values to be a BlobDescriptor or BlobViewStruct."); } TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { @@ -315,8 +319,10 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { {BlobUtils::ToArrowField("b0"), BlobUtils::ToArrowField("b1")}) .ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0", "b1"}), - "BLOB inline fields configured by blob-descriptor-field"); + ASSERT_NOK_WITH_MSG( + BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0", "b1"}), + "BLOB inline field b1 configured by blob-descriptor-field or blob-view-field " + "require values to be a BlobDescriptor or BlobViewStruct."); } } // namespace paimon::test diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index 990fd80c0..ad4e28283 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -2363,10 +2363,10 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWriteRawBytesDirectly) { arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); auto schema = arrow::schema(fields); - ASSERT_NOK_WITH_MSG( - WriteArray(table_path, {}, schema->field_names(), {raw_array}), - "BLOB inline fields configured by blob-descriptor-field or blob-view-field require values " - "to be a BlobDescriptor or BlobViewStruct."); + ASSERT_NOK_WITH_MSG(WriteArray(table_path, {}, schema->field_names(), {raw_array}), + "BLOB inline field b0 configured by blob-descriptor-field or " + "blob-view-field require values " + "to be a BlobDescriptor or BlobViewStruct."); } } // namespace paimon::test From 6f563857fe9d213055a1b7ed915af8d1983c78e9 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Fri, 29 May 2026 13:13:56 +0800 Subject: [PATCH 6/9] fix review --- src/paimon/common/data/blob_utils.cpp | 4 + src/paimon/common/data/blob_utils_test.cpp | 6 + src/paimon/core/append/append_only_writer.cpp | 2 +- .../casting/binary_to_blob_cast_executor.cpp | 2 +- .../casting/binary_to_blob_cast_executor.h | 2 +- .../core/io/external_storage_blob_writer.cpp | 151 ++++++++-------- .../core/io/external_storage_blob_writer.h | 21 ++- .../io/external_storage_blob_writer_test.cpp | 10 +- .../core/io/field_mapping_reader_test.cpp | 23 ++- src/paimon/core/io/rolling_blob_file_writer.h | 2 +- .../blob/blob_file_batch_reader_test.cpp | 6 +- src/paimon/format/blob/blob_format_writer.cpp | 23 +-- src/paimon/format/blob/blob_format_writer.h | 11 +- .../format/blob/blob_format_writer_test.cpp | 92 +++++----- src/paimon/format/blob/blob_writer_builder.h | 2 +- .../parquet_file_batch_reader_test.cpp | 18 +- src/paimon/testing/utils/test_helper.h | 57 ------ test/inte/blob_table_inte_test.cpp | 162 ++++++++---------- 18 files changed, 263 insertions(+), 331 deletions(-) diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index 0b29d05a5..a3ce07cc8 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -22,6 +22,7 @@ #include "arrow/api.h" #include "arrow/array/array_nested.h" #include "arrow/type.h" +#include "fmt/format.h" #include "paimon/common/data/blob_defs.h" #include "paimon/common/data/blob_descriptor.h" #include "paimon/common/utils/arrow/status_utils.h" @@ -78,6 +79,9 @@ Result BlobUtils::SeparateBlobArray( return Status::Invalid( "SeparateBlobArray expects at least one non-inline blob field, but got none."); } + if (main_fields.empty()) { + return Status::Invalid("SeparateBlobArray expects at least one main field, but got none."); + } SeparatedStructArrays result; PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(result.main_array, diff --git a/src/paimon/common/data/blob_utils_test.cpp b/src/paimon/common/data/blob_utils_test.cpp index 268ed8c70..4171a2ec7 100644 --- a/src/paimon/common/data/blob_utils_test.cpp +++ b/src/paimon/common/data/blob_utils_test.cpp @@ -175,6 +175,12 @@ TEST_F(BlobUtilsTest, SeparateBlobArray) { ASSERT_NOK_WITH_MSG( BlobUtils::SeparateBlobArray(struct_array, /*inline_fields=*/{"f2_blob"}), "SeparateBlobArray expects at least one non-inline blob field, but got none."); + + // All fields are blob with no inline -> no main field -> should return error + auto all_blob_struct = arrow::StructArray::Make({blob_array_data}, {blob_field}).ValueOrDie(); + auto all_blob_sa = std::dynamic_pointer_cast(all_blob_struct); + ASSERT_NOK_WITH_MSG(BlobUtils::SeparateBlobArray(all_blob_sa, /*inline_fields=*/{}), + "SeparateBlobArray expects at least one main field, but got none."); } TEST_F(BlobUtilsTest, SeparateBlobArrayWithPartialInline) { diff --git a/src/paimon/core/append/append_only_writer.cpp b/src/paimon/core/append/append_only_writer.cpp index b27c98fdd..b23975d90 100644 --- a/src/paimon/core/append/append_only_writer.cpp +++ b/src/paimon/core/append/append_only_writer.cpp @@ -200,7 +200,7 @@ AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingRowWrit external_storage_writer_ = std::make_unique( write_schema_, blob_context->GetExternalStorageFields(), blob_context->GetExternalStoragePath().value(), schema_id_, seq_num_counter_, - options_.GetFileSystem(), path_factory_, memory_pool_, options_); + path_factory_, options_, memory_pool_); if (!main_write_cols) { // To align with java, when require external storage writer, main writer will set write // cols in DataFileMeta diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.cpp b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp index d160ba14c..3716eebef 100644 --- a/src/paimon/core/casting/binary_to_blob_cast_executor.cpp +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/paimon/core/casting/binary_to_blob_cast_executor.h b/src/paimon/core/casting/binary_to_blob_cast_executor.h index 111a5bc71..e62983d68 100644 --- a/src/paimon/core/casting/binary_to_blob_cast_executor.h +++ b/src/paimon/core/casting/binary_to_blob_cast_executor.h @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/paimon/core/io/external_storage_blob_writer.cpp b/src/paimon/core/io/external_storage_blob_writer.cpp index a8f2be20f..4ce51ae2c 100644 --- a/src/paimon/core/io/external_storage_blob_writer.cpp +++ b/src/paimon/core/io/external_storage_blob_writer.cpp @@ -27,7 +27,6 @@ #include "paimon/common/data/blob_utils.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/scope_guard.h" -#include "paimon/core/core_options.h" #include "paimon/core/io/data_file_path_factory.h" #include "paimon/core/io/data_file_writer.h" #include "paimon/format/blob/blob_writer_builder.h" @@ -42,19 +41,16 @@ ExternalStorageBlobWriter::ExternalStorageBlobWriter( const std::shared_ptr& write_schema, const std::set& external_storage_fields, const std::string& external_storage_path, int64_t schema_id, const std::shared_ptr& seq_num_counter, - const std::shared_ptr& file_system, - const std::shared_ptr& path_factory, - const std::shared_ptr& memory_pool, const CoreOptions& options) + const std::shared_ptr& path_factory, const CoreOptions& options, + const std::shared_ptr& memory_pool) : write_schema_(write_schema), external_storage_fields_(external_storage_fields), external_storage_path_(external_storage_path), schema_id_(schema_id), seq_num_counter_(seq_num_counter), - file_system_(file_system), path_factory_(path_factory), memory_pool_(memory_pool), - options_(options), - logger_(Logger::GetLogger("ExternalStorageBlobWriter")) {} + options_(options) {} Result> ExternalStorageBlobWriter::CreateFieldRollingWriter(FieldWriter* field_writer) { @@ -99,8 +95,8 @@ ExternalStorageBlobWriter::CreateFieldRollingWriter(FieldWriter* field_writer) { seq_num_counter_, FileSource::Append(), stats_extractor, path_factory_->IsExternalPath(), write_cols, memory_pool_); PAIMON_RETURN_NOT_OK(writer->Init( - file_system_, path_factory_->NewExternalStorageBlobPath(external_storage_path_), - writer_builder)); + options_.GetFileSystem(), + path_factory_->NewExternalStorageBlobPath(external_storage_path_), writer_builder)); return writer; }; @@ -108,37 +104,85 @@ ExternalStorageBlobWriter::CreateFieldRollingWriter(FieldWriter* field_writer) { single_blob_file_writer_creator); } +Status ExternalStorageBlobWriter::InitializeFieldWritersIfNeeded() { + if (initialized_) { + return Status::OK(); + } + for (int32_t i = 0; i < write_schema_->num_fields(); ++i) { + const auto& field = write_schema_->field(i); + if (external_storage_fields_.count(field->name()) > 0) { + FieldWriter fw; + fw.field_name = field->name(); + fw.field_index = i; + field_writers_.push_back(std::move(fw)); + } + } + // Create rolling writers after push_back so FieldWriter addresses are stable + // for the consumer lambda capture. + for (auto& fw : field_writers_) { + PAIMON_ASSIGN_OR_RAISE(fw.rolling_writer, CreateFieldRollingWriter(&fw)); + } + initialized_ = true; + return Status::OK(); +} + +Result> ExternalStorageBlobWriter::TransformField( + const std::shared_ptr& column, FieldWriter* field_writer) { + int64_t num_rows = column->length(); + + // Clear captured descriptors before processing this batch + field_writer->captured_descriptors.clear(); + + // Write each row via RollingFileWriter; the consumer captures the descriptor + for (int64_t row = 0; row < num_rows; ++row) { + std::shared_ptr slice = column->Slice(row, 1); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( + std::shared_ptr single_row_struct, + arrow::StructArray::Make({slice}, {field_writer->field_name})); + + ::ArrowArray c_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*single_row_struct, &c_array)); + PAIMON_RETURN_NOT_OK(field_writer->rolling_writer->Write(&c_array)); + } + + // Validate captured descriptor count + if (static_cast(field_writer->captured_descriptors.size()) != num_rows) { + return Status::Invalid( + "Captured descriptor count {} does not match row count {} for field '{}'", + field_writer->captured_descriptors.size(), num_rows, field_writer->field_name); + } + + // Build descriptor column from captured descriptors + arrow::LargeBinaryBuilder descriptor_builder; + PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.Reserve(num_rows)); + for (int64_t row = 0; row < num_rows; ++row) { + const auto& descriptor = field_writer->captured_descriptors[row]; + if (!descriptor) { + PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.AppendNull()); + } else { + auto serialized = descriptor->Serialize(memory_pool_); + PAIMON_RETURN_NOT_OK_FROM_ARROW( + descriptor_builder.Append(serialized->data(), serialized->size())); + } + } + + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr descriptor_array, + descriptor_builder.Finish()); + return descriptor_array; +} + Result> ExternalStorageBlobWriter::TransformBatch( const std::shared_ptr& batch) { if (external_storage_fields_.empty()) { return batch; } - // Lazily initialize per-field writers - if (!initialized_) { - for (int32_t i = 0; i < write_schema_->num_fields(); ++i) { - const auto& field = write_schema_->field(i); - if (external_storage_fields_.count(field->name()) > 0) { - FieldWriter fw; - fw.field_name = field->name(); - fw.field_index = i; - field_writers_.push_back(std::move(fw)); - } - } - // Create rolling writers for each field (must be done after push_back so - // the FieldWriter addresses are stable for the consumer lambda capture). - for (auto& fw : field_writers_) { - PAIMON_ASSIGN_OR_RAISE(fw.rolling_writer, CreateFieldRollingWriter(&fw)); - } - initialized_ = true; - } + PAIMON_RETURN_NOT_OK(InitializeFieldWritersIfNeeded()); if (field_writers_.empty()) { return batch; } - int64_t num_rows = batch->length(); - // Collect all arrays and field names from the original batch std::vector> result_arrays; std::vector result_names; @@ -150,56 +194,13 @@ Result> ExternalStorageBlobWriter::Transform result_arrays.push_back(batch->field(col)); } - // For each external storage field, write blobs row by row via RollingFileWriter - // and build a replacement descriptor column from captured descriptors. + // Transform each external storage field and replace in result for (FieldWriter& fw : field_writers_) { - std::shared_ptr original_column = batch->field(fw.field_index); - - // Clear captured descriptors before processing this batch - fw.captured_descriptors.clear(); - - arrow::LargeBinaryBuilder descriptor_builder; - PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.Reserve(num_rows)); - - for (int64_t row = 0; row < num_rows; ++row) { - // Create a single-row single-field StructArray for BlobFormatWriter - std::shared_ptr slice = original_column->Slice(row, 1); - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr single_row_struct, - arrow::StructArray::Make({slice}, {fw.field_name})); - - ::ArrowArray c_array; - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*single_row_struct, &c_array)); - - // Write via RollingFileWriter; the consumer captures the descriptor - PAIMON_RETURN_NOT_OK(fw.rolling_writer->Write(&c_array)); - } - - // Build descriptor column from captured descriptors - if (static_cast(fw.captured_descriptors.size()) != num_rows) { - return Status::Invalid( - "Captured descriptor count {} does not match row count {} for field '{}'", - fw.captured_descriptors.size(), num_rows, fw.field_name); - } - - for (int64_t row = 0; row < num_rows; ++row) { - const auto& descriptor = fw.captured_descriptors[row]; - if (!descriptor) { - // Null blob -> null descriptor - PAIMON_RETURN_NOT_OK_FROM_ARROW(descriptor_builder.AppendNull()); - } else { - auto serialized = descriptor->Serialize(memory_pool_); - PAIMON_RETURN_NOT_OK_FROM_ARROW( - descriptor_builder.Append(serialized->data(), serialized->size())); - } - } - - // Build the descriptor column and replace - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr descriptor_array, - descriptor_builder.Finish()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr descriptor_array, + TransformField(batch->field(fw.field_index), &fw)); result_arrays[fw.field_index] = descriptor_array; } - // Construct the result StructArray PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr result, arrow::StructArray::Make(result_arrays, result_names)); return result; diff --git a/src/paimon/core/io/external_storage_blob_writer.h b/src/paimon/core/io/external_storage_blob_writer.h index 7b144b1c7..600dd6f96 100644 --- a/src/paimon/core/io/external_storage_blob_writer.h +++ b/src/paimon/core/io/external_storage_blob_writer.h @@ -23,13 +23,13 @@ #include #include "paimon/common/data/blob_descriptor.h" +#include "paimon/core/core_options.h" #include "paimon/core/io/data_file_meta.h" #include "paimon/core/io/rolling_file_writer.h" #include "paimon/core/io/single_file_writer.h" #include "paimon/logging.h" #include "paimon/result.h" #include "paimon/status.h" - namespace arrow { class Schema; class StructArray; @@ -37,7 +37,6 @@ class StructArray; namespace paimon { -class CoreOptions; class FileSystem; class LongCounter; class MemoryPool; @@ -60,10 +59,9 @@ class ExternalStorageBlobWriter { const std::set& external_storage_fields, const std::string& external_storage_path, int64_t schema_id, const std::shared_ptr& seq_num_counter, - const std::shared_ptr& file_system, const std::shared_ptr& path_factory, - const std::shared_ptr& memory_pool, - const CoreOptions& options); + const CoreOptions& options, + const std::shared_ptr& memory_pool); /// Transforms a batch by writing external storage fields to .blob files and replacing /// the BLOB values with serialized BlobDescriptor bytes. @@ -86,6 +84,14 @@ class ExternalStorageBlobWriter { std::vector> captured_descriptors; }; + /// Lazily initializes per-field writers on first call to TransformBatch. + Status InitializeFieldWritersIfNeeded(); + + /// Writes all rows of a single external blob field via RollingFileWriter and returns + /// a descriptor column (LargeBinary) built from captured BlobDescriptors. + Result> TransformField( + const std::shared_ptr& column, FieldWriter* field_writer); + /// Creates a RollingFileWriter for one external storage blob field with consumer injected. Result> CreateFieldRollingWriter(FieldWriter* field_writer); @@ -94,15 +100,12 @@ class ExternalStorageBlobWriter { std::string external_storage_path_; int64_t schema_id_; std::shared_ptr seq_num_counter_; - std::shared_ptr file_system_; std::shared_ptr path_factory_; std::shared_ptr memory_pool_; - const CoreOptions& options_; + CoreOptions options_; std::vector field_writers_; bool initialized_ = false; - - std::unique_ptr logger_; }; } // namespace paimon diff --git a/src/paimon/core/io/external_storage_blob_writer_test.cpp b/src/paimon/core/io/external_storage_blob_writer_test.cpp index a2206f971..e715f03d4 100644 --- a/src/paimon/core/io/external_storage_blob_writer_test.cpp +++ b/src/paimon/core/io/external_storage_blob_writer_test.cpp @@ -73,7 +73,7 @@ TEST_F(ExternalStorageBlobWriterTest, TestEmptyExternalFields) { // No external storage fields -> TransformBatch returns original batch ExternalStorageBlobWriter writer(write_schema_, /*external_storage_fields=*/{}, external_storage_path_, /*schema_id=*/0, seq_num_counter_, - file_system_, path_factory_, pool_, options_); + path_factory_, options_, pool_); auto input = std::static_pointer_cast( arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(write_schema_->fields()), @@ -89,8 +89,8 @@ TEST_F(ExternalStorageBlobWriterTest, TestEmptyExternalFields) { TEST_F(ExternalStorageBlobWriterTest, TestTransformBatchReplacesBlob) { std::set external_fields = {"blob_col"}; ExternalStorageBlobWriter writer(write_schema_, external_fields, external_storage_path_, - /*schema_id=*/0, seq_num_counter_, file_system_, path_factory_, - pool_, options_); + /*schema_id=*/0, seq_num_counter_, path_factory_, options_, + pool_); auto struct_type = arrow::struct_(write_schema_->fields()); auto input = std::static_pointer_cast( @@ -124,8 +124,8 @@ TEST_F(ExternalStorageBlobWriterTest, TestTransformBatchReplacesBlob) { TEST_F(ExternalStorageBlobWriterTest, TestAbort) { std::set external_fields = {"blob_col"}; ExternalStorageBlobWriter writer(write_schema_, external_fields, external_storage_path_, - /*schema_id=*/0, seq_num_counter_, file_system_, path_factory_, - pool_, options_); + /*schema_id=*/0, seq_num_counter_, path_factory_, options_, + pool_); auto input = std::static_pointer_cast( arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(write_schema_->fields()), diff --git a/src/paimon/core/io/field_mapping_reader_test.cpp b/src/paimon/core/io/field_mapping_reader_test.cpp index 2288163bd..a3b2dd09c 100644 --- a/src/paimon/core/io/field_mapping_reader_test.cpp +++ b/src/paimon/core/io/field_mapping_reader_test.cpp @@ -32,6 +32,7 @@ #include "arrow/ipc/json_simple.h" #include "arrow/util/checked_cast.h" #include "gtest/gtest.h" +#include "paimon/common/data/blob_utils.h" #include "paimon/common/types/data_field.h" #include "paimon/core/utils/field_mapping.h" #include "paimon/defs.h" @@ -183,6 +184,9 @@ class FieldMappingReaderTest : public ::testing::Test { auto expected_chunk_array = std::make_shared(arrow::ArrayVector({expect_array})); + ASSERT_TRUE(result_array->type()->Equals(expected_chunk_array->type())) + << result_array->type()->ToString() << expected_chunk_array->type()->ToString(); + ASSERT_TRUE(result_array->Equals(expected_chunk_array)) << result_array->ToString() << expected_chunk_array->ToString(); } @@ -710,29 +714,24 @@ TEST_F(FieldMappingReaderTest, TestSchemaEvolutionWithDictType) { TEST_F(FieldMappingReaderTest, TestReadInlineBlobAsBinaryDataFile) { std::vector data_fields = { - DataField(0, arrow::field("descriptor", arrow::large_binary())), + DataField(0, BlobUtils::ToArrowField("descriptor", /*nullable=*/true)), }; auto data_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); - auto data_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(data_schema->fields()), - R"([ + std::string json_str = R"([ ["descriptor-1"], [null], ["descriptor-2"] - ])") + ])"; + auto data_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(data_schema->fields()), json_str) .ValueOrDie()); std::vector read_fields = { - DataField(0, arrow::field("descriptor", arrow::large_binary())), + DataField(0, BlobUtils::ToArrowField("descriptor", /*nullable=*/true)), }; auto read_schema = DataField::ConvertDataFieldsToArrowSchema(read_fields); auto expected = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(read_schema->fields()), - R"([ - ["descriptor-1"], - [null], - ["descriptor-2"] - ])") + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(read_schema->fields()), json_str) .ValueOrDie()); CheckResult(data_schema, data_array, read_schema, /*predicate=*/nullptr, diff --git a/src/paimon/core/io/rolling_blob_file_writer.h b/src/paimon/core/io/rolling_blob_file_writer.h index 907936b09..859f9589d 100644 --- a/src/paimon/core/io/rolling_blob_file_writer.h +++ b/src/paimon/core/io/rolling_blob_file_writer.h @@ -64,7 +64,7 @@ class RollingBlobFileWriter const std::shared_ptr& blob_schema, MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator, const std::shared_ptr& data_type, - std::set inline_fields = {}); + std::set inline_fields); ~RollingBlobFileWriter() override = default; Status Write(::ArrowArray* record) override; diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index c1819101b..bde27d64d 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -235,9 +235,9 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { file_system->Create(dir->Str() + "/file.blob", /*overwrite=*/true)); std::shared_ptr blob_field = BlobUtils::ToArrowField("blob_col"); auto struct_type = arrow::struct_({blob_field}); - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream, struct_type, file_system, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream, struct_type, + /*write_consumer=*/nullptr, file_system, pool_)); ASSERT_OK(writer->Flush()); ASSERT_OK(writer->Finish()); diff --git a/src/paimon/format/blob/blob_format_writer.cpp b/src/paimon/format/blob/blob_format_writer.cpp index 3b199983d..4a6256a71 100644 --- a/src/paimon/format/blob/blob_format_writer.cpp +++ b/src/paimon/format/blob/blob_format_writer.cpp @@ -32,12 +32,13 @@ namespace paimon::blob { -BlobFormatWriter::BlobFormatWriter(const std::shared_ptr& out, +BlobFormatWriter::BlobFormatWriter(const std::shared_ptr& out, const std::string& uri, const std::shared_ptr& data_type, + WriteConsumer write_consumer, const std::shared_ptr& fs, - const std::shared_ptr& pool, - WriteConsumer write_consumer) + const std::shared_ptr& pool) : out_(out), + uri_(uri), data_type_(data_type), fs_(fs), pool_(pool), @@ -48,8 +49,8 @@ BlobFormatWriter::BlobFormatWriter(const std::shared_ptr& out, Result> BlobFormatWriter::Create( const std::shared_ptr& out, const std::shared_ptr& data_type, - const std::shared_ptr& fs, const std::shared_ptr& pool, - WriteConsumer write_consumer) { + WriteConsumer write_consumer, const std::shared_ptr& fs, + const std::shared_ptr& pool) { if (out == nullptr) { return Status::Invalid("blob format writer create failed. out is nullptr"); } @@ -67,8 +68,9 @@ Result> BlobFormatWriter::Create( return Status::Invalid( fmt::format("field {} is not BLOB", data_type->field(0)->ToString())); } + PAIMON_ASSIGN_OR_RAISE(std::string uri, out->GetUri()); return std::unique_ptr( - new BlobFormatWriter(out, data_type, fs, pool, std::move(write_consumer))); + new BlobFormatWriter(out, uri, data_type, std::move(write_consumer), fs, pool)); } Status BlobFormatWriter::AddBatch(ArrowArray* batch) { @@ -93,7 +95,7 @@ Status BlobFormatWriter::AddBatch(ArrowArray* batch) { if (child_array->IsNull(0)) { bin_lengths_.push_back(BlobDefs::kNullBinLength); if (write_consumer_) { - write_consumer_(nullptr); + write_consumer_(/*descriptor=*/nullptr); } return Status::OK(); } @@ -116,11 +118,10 @@ Status BlobFormatWriter::AddBatch(ArrowArray* batch) { PAIMON_ASSIGN_OR_RAISE(int64_t end_pos, out_->GetPos()); int64_t blob_start_pos = end_pos - bin_length; int64_t content_offset = blob_start_pos + BlobDefs::kContentStartOffset; - int64_t content_length = bin_length - 16; + int64_t content_length = bin_length - BlobDefs::kTotalMetaLength; - PAIMON_ASSIGN_OR_RAISE(std::string uri, out_->GetUri()); - PAIMON_ASSIGN_OR_RAISE(auto descriptor, - BlobDescriptor::Create(uri, content_offset, content_length)); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr descriptor, + BlobDescriptor::Create(uri_, content_offset, content_length)); bool should_flush = write_consumer_(std::move(descriptor)); if (should_flush) { PAIMON_RETURN_NOT_OK(Flush()); diff --git a/src/paimon/format/blob/blob_format_writer.h b/src/paimon/format/blob/blob_format_writer.h index c8fab7956..372906ea8 100644 --- a/src/paimon/format/blob/blob_format_writer.h +++ b/src/paimon/format/blob/blob_format_writer.h @@ -56,8 +56,8 @@ class BlobFormatWriter : public FormatWriter { static Result> Create( const std::shared_ptr& out, const std::shared_ptr& data_type, - const std::shared_ptr& fs, const std::shared_ptr& pool, - WriteConsumer write_consumer); + WriteConsumer write_consumer, const std::shared_ptr& fs, + const std::shared_ptr& pool); Status AddBatch(ArrowArray* batch) override; @@ -72,10 +72,10 @@ class BlobFormatWriter : public FormatWriter { } private: - BlobFormatWriter(const std::shared_ptr& out, + BlobFormatWriter(const std::shared_ptr& out, const std::string& uri, const std::shared_ptr& data_type, - const std::shared_ptr& fs, const std::shared_ptr& pool, - WriteConsumer write_consumer); + WriteConsumer write_consumer, const std::shared_ptr& fs, + const std::shared_ptr& pool); Status WriteBlob(std::string_view blob_data); @@ -93,6 +93,7 @@ class BlobFormatWriter : public FormatWriter { uint32_t crc32_ = 0; std::vector bin_lengths_; std::shared_ptr out_; + std::string uri_; PAIMON_UNIQUE_PTR tmp_buffer_; std::shared_ptr data_type_; std::shared_ptr fs_; diff --git a/src/paimon/format/blob/blob_format_writer_test.cpp b/src/paimon/format/blob/blob_format_writer_test.cpp index 8758447c4..506f93cfe 100644 --- a/src/paimon/format/blob/blob_format_writer_test.cpp +++ b/src/paimon/format/blob/blob_format_writer_test.cpp @@ -91,9 +91,9 @@ INSTANTIATE_TEST_SUITE_P(BlobAsDescriptor, BlobFormatWriterTest, ::testing::Valu TEST_P(BlobFormatWriterTest, TestSimple) { // write - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); std::vector> expected_blobs; std::string file1 = paimon::test::GetDataDir() + "/avro/data/avro_with_null"; @@ -161,7 +161,7 @@ TEST_P(BlobFormatWriterTest, TestWriteConsumerReceivesDescriptors) { ASSERT_OK_AND_ASSIGN( std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, consumer)); + BlobFormatWriter::Create(output_stream_, struct_type_, consumer, file_system_, pool_)); // Write a normal blob row std::string file = paimon::test::GetDataDir() + "/xxhash.data"; @@ -194,39 +194,39 @@ TEST_P(BlobFormatWriterTest, TestWriteConsumerReceivesDescriptors) { TEST_P(BlobFormatWriterTest, TestCreateWithInvalidParameters) { // Test with nullptr output stream - ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(nullptr, struct_type_, file_system_, pool_, nullptr), - "blob format writer create failed. out is nullptr"); + ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(nullptr, struct_type_, /*write_consumer=*/nullptr, + file_system_, pool_), + "blob format writer create failed. out is nullptr"); // Test with nullptr data type - ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(output_stream_, nullptr, file_system_, pool_, nullptr), - "blob format writer create failed. data_type is nullptr"); + ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(output_stream_, nullptr, + /*write_consumer=*/nullptr, file_system_, pool_), + "blob format writer create failed. data_type is nullptr"); // Test with nullptr memory pool - ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, nullptr, nullptr), - "blob format writer create failed. pool is nullptr"); + ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, nullptr), + "blob format writer create failed. pool is nullptr"); // Test with invalid field count (more than 1 field) auto multi_field_type = arrow::struct_( {arrow::field("blob_col1", arrow::binary()), arrow::field("blob_col2", arrow::binary())}); - ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(output_stream_, multi_field_type, file_system_, pool_, nullptr), - "blob data type field number 2 is not 1"); + ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(output_stream_, multi_field_type, + /*write_consumer=*/nullptr, file_system_, pool_), + "blob data type field number 2 is not 1"); // Test with non-blob field (missing blob metadata) auto non_blob_field = arrow::field("regular_col", arrow::binary()); auto non_blob_type = arrow::struct_({non_blob_field}); - ASSERT_NOK_WITH_MSG( - BlobFormatWriter::Create(output_stream_, non_blob_type, file_system_, pool_, nullptr), - "field regular_col: binary is not BLOB"); + ASSERT_NOK_WITH_MSG(BlobFormatWriter::Create(output_stream_, non_blob_type, + /*write_consumer=*/nullptr, file_system_, pool_), + "field regular_col: binary is not BLOB"); } TEST_P(BlobFormatWriterTest, TestInvalidCase) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Test nullptr batch ASSERT_NOK_WITH_MSG(writer->AddBatch(nullptr), @@ -243,9 +243,9 @@ TEST_P(BlobFormatWriterTest, TestInvalidCase) { } TEST_P(BlobFormatWriterTest, TestAddBatchWithInvalidBatchLength) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Test batch with wrong length (not 1) arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), @@ -271,9 +271,9 @@ TEST_P(BlobFormatWriterTest, TestAddBatchWithInvalidBatchLength) { } TEST_P(BlobFormatWriterTest, TestReachTargetSize) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Initially should not reach target size ASSERT_OK_AND_ASSIGN(bool reached, writer->ReachTargetSize(true, 1000)); @@ -296,9 +296,9 @@ TEST_P(BlobFormatWriterTest, TestReachTargetSize) { } TEST_P(BlobFormatWriterTest, TestGetWriterMetrics) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); auto metrics = writer->GetWriterMetrics(); ASSERT_TRUE(metrics); @@ -306,9 +306,9 @@ TEST_P(BlobFormatWriterTest, TestGetWriterMetrics) { TEST_P(BlobFormatWriterTest, TestEmptyWriter) { // Test creating a writer and finishing without adding any data - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); ASSERT_OK(writer->Flush()); ASSERT_OK(writer->Finish()); @@ -327,9 +327,9 @@ TEST_P(BlobFormatWriterTest, TestEmptyWriter) { } TEST_P(BlobFormatWriterTest, TestLargeBlob) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Create a temporary large file for testing std::string large_file_path = dir_->Str() + "/large_test_file.bin"; @@ -382,9 +382,9 @@ TEST_P(BlobFormatWriterTest, TestLargeBlob) { } TEST_P(BlobFormatWriterTest, TestAddBatchWithNullValues) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Write one row with child-level null blob arrow::StructBuilder struct_builder(struct_type_, arrow::default_memory_pool(), @@ -430,18 +430,18 @@ TEST_P(BlobFormatWriterTest, TestAddBatchWithNullValues) { ASSERT_TRUE(struct_builder2.Finish(&null_struct_array).ok()); auto null_c_array = std::make_unique(); ASSERT_TRUE(arrow::ExportArray(*null_struct_array, null_c_array.get()).ok()); - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer2, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer2, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); ASSERT_NOK_WITH_MSG(writer2->AddBatch(null_c_array.get()), "BlobFormatWriter does not support struct-level null."); ArrowArrayRelease(null_c_array.get()); } TEST_P(BlobFormatWriterTest, TestAddBatchWithZeroLengthBlob) { - ASSERT_OK_AND_ASSIGN( - std::shared_ptr writer, - BlobFormatWriter::Create(output_stream_, struct_type_, file_system_, pool_, nullptr)); + ASSERT_OK_AND_ASSIGN(std::shared_ptr writer, + BlobFormatWriter::Create(output_stream_, struct_type_, + /*write_consumer=*/nullptr, file_system_, pool_)); // Create a zero-length file std::string zero_file_path = dir_->Str() + "/zero_length_file.bin"; diff --git a/src/paimon/format/blob/blob_writer_builder.h b/src/paimon/format/blob/blob_writer_builder.h index 15f6d47b4..497d1889b 100644 --- a/src/paimon/format/blob/blob_writer_builder.h +++ b/src/paimon/format/blob/blob_writer_builder.h @@ -71,7 +71,7 @@ class BlobWriterBuilder : public SpecificFSWriterBuilder { if (fs_ == nullptr) { return Status::Invalid("File system is nullptr. Please call WithFileSystem() first."); } - return BlobFormatWriter::Create(out, data_type_, fs_, pool_, write_consumer_); + return BlobFormatWriter::Create(out, data_type_, write_consumer_, fs_, pool_); } private: diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 3b12b0183..f5cf99b9d 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -164,15 +164,16 @@ class ParquetFileBatchReaderTest : public ::testing::Test, TEST_F(ParquetFileBatchReaderTest, TestReadBinaryWrittenFromBinaryAndLargeBinary) { auto check_binary_read_result = [&](const std::shared_ptr& write_type, const std::string& file_name) { - auto write_field = arrow::field("f0", write_type); - auto write_schema = arrow::schema({write_field}); - auto write_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({write_field}), R"([ + std::string data_json = R"([ ["descriptor-1"], [""], [null], ["descriptor-2"] - ])") + ])"; + auto write_field = arrow::field("f0", write_type); + auto write_schema = arrow::schema({write_field}); + auto write_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({write_field}), data_json) .ValueOrDie()); std::string file_path = PathUtil::JoinPath(dir_->Str(), file_name); @@ -190,12 +191,7 @@ TEST_F(ParquetFileBatchReaderTest, TestReadBinaryWrittenFromBinaryAndLargeBinary ASSERT_TRUE(file_schema->Equals(*read_schema)); auto expected_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({read_field}), R"([ - ["descriptor-1"], - [""], - [null], - ["descriptor-2"] - ])") + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({read_field}), data_json) .ValueOrDie()); auto expected_chunked_array = std::make_shared(expected_array); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( diff --git a/src/paimon/testing/utils/test_helper.h b/src/paimon/testing/utils/test_helper.h index 93d13161b..78061fd39 100644 --- a/src/paimon/testing/utils/test_helper.h +++ b/src/paimon/testing/utils/test_helper.h @@ -251,63 +251,6 @@ class TestHelper { return new_array; } - Result ReadAndCheckResultForBlobTable( - const std::shared_ptr& all_columns_schema, - const std::vector>& splits, const std::string& main_expected_json, - const std::vector>& expected_blob_descriptors) { - ReadContextBuilder read_context_builder(table_path_); - read_context_builder.SetOptions(options_); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_context, - read_context_builder.Finish()); - PAIMON_ASSIGN_OR_RAISE(auto table_read, TableRead::Create(std::move(read_context))); - PAIMON_ASSIGN_OR_RAISE(auto batch_reader, table_read->CreateReader(splits)); - PAIMON_ASSIGN_OR_RAISE(auto read_result, - ReadResultCollector::CollectResult(batch_reader.get())); - - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto concat_array, - arrow::Concatenate(read_result->chunks())); - PAIMON_ASSIGN_OR_RAISE(auto reconstruct_array, - ReconstructBlobArray(concat_array, all_columns_schema)); - PAIMON_ASSIGN_OR_RAISE(auto separated_array, - BlobUtils::SeparateBlobArray( - std::dynamic_pointer_cast(reconstruct_array), - /*inline_fields=*/{})); - - arrow::EqualOptions equal_options = arrow::EqualOptions::Defaults(); - - // check main columns - auto separated_schema = - BlobUtils::SeparateBlobSchema(all_columns_schema, /*inline_fields=*/{}); - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW( - auto main_expected_array, - arrow::ipc::internal::json::ArrayFromJSON( - arrow::struct_(separated_schema.main_schema->fields()), main_expected_json)); - auto main_expected_chunk_array = std::make_shared(main_expected_array); - bool main_equal = main_expected_chunk_array->Equals( - arrow::ChunkedArray(separated_array.main_array), equal_options.diff_sink(&std::cout)); - if (!main_equal) { - std::cout << "[expected_data_type]" << main_expected_chunk_array->type()->ToString() - << std::endl; - std::cout << "[actual_data_type]" << separated_array.main_array->type()->ToString() - << std::endl; - std::cout << "[expected]:" << main_expected_chunk_array->ToString() << std::endl; - std::cout << "[actual]: " << separated_array.main_array->ToString() << std::endl; - } - - // check blob column - std::vector> expected_blobs; - for (const auto& descriptor : expected_blob_descriptors) { - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr blob, - Blob::FromDescriptor(descriptor->data(), descriptor->size())); - expected_blobs.emplace_back(blob); - } - PAIMON_ASSIGN_OR_RAISE(auto result_blobs, ToBlobs(separated_array.blob_array)); - PAIMON_ASSIGN_OR_RAISE(bool blob_equal, CheckBlobsEqual(result_blobs, expected_blobs, fs_)); - - table_read.reset(); - return main_equal && blob_equal; - } - Result ReadAndCheckResult(const std::shared_ptr& data_type, const std::vector>& splits, const std::string& expected_result) { diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index ad4e28283..4e0f9c305 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -78,7 +78,7 @@ class RecordBatch; namespace paimon::test { -struct ScanReadResult { +struct ReadResult { std::unique_ptr batch_reader; std::shared_ptr chunked_array; }; @@ -192,11 +192,11 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter /// Read from table using a pre-scanned plan, returning the ChunkedArray and batch_reader. /// The batch_reader must outlive the returned ChunkedArray (array memory depends on reader). - Result ReadTable(const std::string& table_path, - const std::vector& read_schema, - const std::shared_ptr& plan, - const std::shared_ptr& predicate = nullptr, - const std::map& options = {}) const { + Result ReadTable(const std::string& table_path, + const std::vector& read_schema, + const std::shared_ptr& plan, + const std::shared_ptr& predicate = nullptr, + const std::map& options = {}) const { auto splits = plan->Splits(); ReadContextBuilder read_context_builder(table_path); read_context_builder.SetReadSchema(read_schema).SetPredicate(predicate); @@ -209,14 +209,14 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter PAIMON_ASSIGN_OR_RAISE(auto batch_reader, table_read->CreateReader(splits)); PAIMON_ASSIGN_OR_RAISE(auto read_result, ReadResultCollector::CollectResult(batch_reader.get())); - return ScanReadResult{std::move(batch_reader), std::move(read_result)}; + return ReadResult{std::move(batch_reader), std::move(read_result)}; } /// Convenience: scan + read in one call. - Result ScanAndReadResult(const std::string& table_path, - const std::vector& read_schema, - const std::shared_ptr& predicate = nullptr, - const std::vector& row_ranges = {}) const { + Result ScanAndReadResult(const std::string& table_path, + const std::vector& read_schema, + const std::shared_ptr& predicate = nullptr, + const std::vector& row_ranges = {}) const { PAIMON_ASSIGN_OR_RAISE(auto result_plan, ScanTable(table_path, predicate, row_ranges)); return ReadTable(table_path, read_schema, result_plan, predicate); } @@ -278,30 +278,51 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter /// BlobDescriptor bytes. Each raw blob value is written to a temporary file, and /// the corresponding cell is replaced with the serialized BlobDescriptor pointing /// to that file. - Result> ConvertRawBlobToDescriptor( - const std::shared_ptr& raw_array, - const std::set& blob_fields) { - auto fs = std::make_shared(); - int64_t num_rows = raw_array->length(); - auto fields = raw_array->type()->fields(); - + /// Common framework for transforming blob fields in a StructArray. + /// Non-blob fields are kept as-is; blob fields are processed row-by-row via `transform_row`. + /// `transform_row` receives (binary_value_view) and returns the transformed bytes via builder. + using BlobRowTransform = + std::function; + + Result> TransformBlobFields( + const std::shared_ptr& input_array, + const std::set& blob_fields, BlobRowTransform transform_row) const { + auto fields = input_array->type()->fields(); arrow::ArrayVector child_arrays; for (const auto& field : fields) { - auto col = raw_array->GetFieldByName(field->name()); + auto col = input_array->GetFieldByName(field->name()); if (blob_fields.count(field->name()) == 0) { child_arrays.push_back(col); continue; } const auto& binary_array = arrow::internal::checked_cast(*col); - arrow::LargeBinaryBuilder desc_builder; - for (int64_t i = 0; i < num_rows; ++i) { + arrow::LargeBinaryBuilder builder; + for (int64_t i = 0; i < binary_array.length(); ++i) { if (binary_array.IsNull(i)) { - PAIMON_RETURN_NOT_OK_FROM_ARROW(desc_builder.AppendNull()); + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder.AppendNull()); continue; } - std::string_view raw_value = binary_array.GetView(i); + PAIMON_RETURN_NOT_OK(transform_row(binary_array.GetView(i), &builder)); + } + std::shared_ptr result_col; + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder.Finish(&result_col)); + child_arrays.push_back(result_col); + } + + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result, + arrow::StructArray::Make(child_arrays, fields)); + return result; + } + + Result> ConvertRawBlobToDescriptor( + const std::shared_ptr& raw_array, + const std::set& blob_fields) { + auto fs = std::make_shared(); + return TransformBlobFields( + raw_array, blob_fields, + [&](const std::string_view& raw_value, arrow::LargeBinaryBuilder* builder) -> Status { std::string file_path = blob_dir_->Str() + "/blob_" + std::to_string(blob_file_counter_++) + ".bin"; PAIMON_ASSIGN_OR_RAISE(auto out, fs->Create(file_path, /*overwrite=*/true)); @@ -317,16 +338,9 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter PAIMON_ASSIGN_OR_RAISE(auto blob, Blob::FromPath(file_path)); auto descriptor = blob->ToDescriptor(pool_); PAIMON_RETURN_NOT_OK_FROM_ARROW( - desc_builder.Append(descriptor->data(), descriptor->size())); - } - std::shared_ptr desc_array; - PAIMON_RETURN_NOT_OK_FROM_ARROW(desc_builder.Finish(&desc_array)); - child_arrays.push_back(desc_array); - } - - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result, - arrow::StructArray::Make(child_arrays, fields)); - return result; + builder->Append(descriptor->data(), descriptor->size())); + return Status::OK(); + }); } /// Convert a StructArray with serialized BlobDescriptor bytes back to a StructArray @@ -336,39 +350,16 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter const std::shared_ptr& desc_array, const std::set& blob_fields) const { auto fs = std::make_shared(); - int64_t num_rows = desc_array->length(); - auto fields = desc_array->type()->fields(); - - arrow::ArrayVector child_arrays; - - for (const auto& field : fields) { - auto col = desc_array->GetFieldByName(field->name()); - if (blob_fields.count(field->name()) == 0) { - child_arrays.push_back(col); - continue; - } - const auto& binary_array = - arrow::internal::checked_cast(*col); - arrow::LargeBinaryBuilder raw_builder; - for (int64_t i = 0; i < num_rows; ++i) { - if (binary_array.IsNull(i)) { - PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.AppendNull()); - continue; - } - std::string_view descriptor_bytes = binary_array.GetView(i); + return TransformBlobFields( + desc_array, blob_fields, + [&](const std::string_view& descriptor_bytes, + arrow::LargeBinaryBuilder* builder) -> Status { PAIMON_ASSIGN_OR_RAISE(auto blob, Blob::FromDescriptor(descriptor_bytes.data(), descriptor_bytes.size())); PAIMON_ASSIGN_OR_RAISE(auto data, blob->ToData(fs, pool_)); - PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.Append(data->data(), data->size())); - } - std::shared_ptr raw_array_col; - PAIMON_RETURN_NOT_OK_FROM_ARROW(raw_builder.Finish(&raw_array_col)); - child_arrays.push_back(raw_array_col); - } - - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto result, - arrow::StructArray::Make(child_arrays, fields)); - return result; + PAIMON_RETURN_NOT_OK_FROM_ARROW(builder->Append(data->data(), data->size())); + return Status::OK(); + }); } /// Verify DataFileMeta properties from a scan plan. @@ -396,24 +387,12 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter ASSERT_EQ(expected_write_cols.size(), expected_file_count); for (size_t i = 0; i < all_files.size(); ++i) { const auto& file = all_files[i]; - EXPECT_EQ(file->row_count, expected_row_counts[i]) - << "file[" << i << "] row_count mismatch"; - EXPECT_EQ(file->min_sequence_number, expected_min_seqs[i]) - << "file[" << i << "] min_sequence_number mismatch"; - EXPECT_EQ(file->max_sequence_number, expected_max_seqs[i]) - << "file[" << i << "] max_sequence_number mismatch"; - ASSERT_TRUE(file->first_row_id.has_value()) - << "file[" << i << "] first_row_id should not be null"; - EXPECT_EQ(file->first_row_id.value(), expected_first_row_ids[i]) - << "file[" << i << "] first_row_id mismatch"; - EXPECT_EQ(file->write_cols, expected_write_cols[i]) - << "file[" << i << "] write_cols mismatch, actual: " - << (file->write_cols ? fmt::format("[{}]", fmt::join(*file->write_cols, ", ")) - : "nullopt") - << ", expected: " - << (expected_write_cols[i] - ? fmt::format("[{}]", fmt::join(*expected_write_cols[i], ", ")) - : "nullopt"); + EXPECT_EQ(file->row_count, expected_row_counts[i]); + EXPECT_EQ(file->min_sequence_number, expected_min_seqs[i]); + EXPECT_EQ(file->max_sequence_number, expected_max_seqs[i]); + ASSERT_TRUE(file->first_row_id.has_value()); + EXPECT_EQ(file->first_row_id.value(), expected_first_row_ids[i]); + EXPECT_EQ(file->write_cols, expected_write_cols[i]); } } @@ -707,7 +686,7 @@ TEST_P(BlobTableInteTest, TestOnlySomeColumns) { ])") .ValueOrDie()); ASSERT_NOK_WITH_MSG(WriteArray(table_path, {}, write_cols1, {src_array1}), - "Can't infer struct array length with 0 child arrays"); + "SeparateBlobArray expects at least one main field, but got none."); } TEST_P(BlobTableInteTest, TestMultipleAppendsDifferentFirstRowIds) { @@ -2211,10 +2190,13 @@ TEST_P(BlobTableInteTest, TestDataEvolutionWithBlobDescriptorField) { } // Test DataEvolution (split-column write) combined with blob descriptor fields. // Schema: f0(int32), b0(blob descriptor inline), b1(blob descriptor+external), b2(blob), - // b3(blob) Commit 1: file A writes (f0, b2, b3) Commit 2: file B writes (f0, b0, b1) with - // SetFirstRowId(0) -> merges with commit 1 Commit 3: file A writes (f0, b0, b1, b3) Commit 4: - // file B writes (b0, b1, b3) with SetFirstRowId(3) -> merges with commit 3 Duplicate columns - // (b0, b1, b3) in commit 4 - newer file (B) takes precedence. + // b3(blob) + // Commit 1: file A writes (f0, b2, b3) + // Commit 2: file B writes (f0, b0, b1) with SetFirstRowId(0) + // -> merges with commit 1 + // Commit 3: file A writes (f0, b0, b1, b3) + // Commit 4: file B writes (b0, b1, b3) with SetFirstRowId(3) + // -> merges with commit 3 arrow::FieldVector fields = { arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), @@ -2299,7 +2281,6 @@ TEST_P(BlobTableInteTest, TestDataEvolutionWithBlobDescriptorField) { ASSERT_OK(Commit(table_path, commit_msgs_b2)); // --- Read all data with full schema --- - // Round 2: b0, b1, b3 come from file B (newer), f0 from file A, b2 not written -> null std::vector read_schema = {"f0", "b0", "b1", "b2", "b3"}; ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); @@ -2327,11 +2308,8 @@ TEST_P(BlobTableInteTest, TestDataEvolutionWithBlobDescriptorField) { // Resolve descriptors back to raw bytes ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); - ASSERT_TRUE(resolved->type()->Equals(expected_with_rk->type())) - << resolved->type()->ToString() << std::endl - << expected_with_rk->type()->ToString(); - ASSERT_TRUE(resolved->Equals(expected_with_rk)) << resolved->ToString() << std::endl - << expected_with_rk->ToString(); + ASSERT_TRUE(resolved->type()->Equals(expected_with_rk->type())); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); } TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWriteRawBytesDirectly) { From b5b630ac7b461ac060c64db52c0cd97befbf7fa6 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Fri, 29 May 2026 13:26:00 +0800 Subject: [PATCH 7/9] fix little --- src/paimon/core/io/rolling_blob_file_writer.cpp | 4 ++-- src/paimon/core/io/rolling_blob_file_writer.h | 2 +- src/paimon/testing/utils/test_helper.h | 12 ------------ 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/paimon/core/io/rolling_blob_file_writer.cpp b/src/paimon/core/io/rolling_blob_file_writer.cpp index 5c419baa4..41b069b5d 100644 --- a/src/paimon/core/io/rolling_blob_file_writer.cpp +++ b/src/paimon/core/io/rolling_blob_file_writer.cpp @@ -44,13 +44,13 @@ RollingBlobFileWriter::RollingBlobFileWriter( std::function>()> create_file_writer, const std::shared_ptr& blob_schema, MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator, - const std::shared_ptr& data_type, std::set inline_fields) + const std::shared_ptr& data_type, const std::set& inline_fields) : RollingFileWriter<::ArrowArray*, std::shared_ptr>(target_file_size, create_file_writer), blob_schema_(blob_schema), blob_writer_creator_(std::move(blob_writer_creator)), data_type_(data_type), - inline_fields_(std::move(inline_fields)), + inline_fields_(inline_fields), logger_(Logger::GetLogger("RollingBlobFileWriter")) {} Status RollingBlobFileWriter::Write(::ArrowArray* record) { diff --git a/src/paimon/core/io/rolling_blob_file_writer.h b/src/paimon/core/io/rolling_blob_file_writer.h index 859f9589d..b55eacd9f 100644 --- a/src/paimon/core/io/rolling_blob_file_writer.h +++ b/src/paimon/core/io/rolling_blob_file_writer.h @@ -64,7 +64,7 @@ class RollingBlobFileWriter const std::shared_ptr& blob_schema, MultipleBlobFileWriter::BlobWriterCreator blob_writer_creator, const std::shared_ptr& data_type, - std::set inline_fields); + const std::set& inline_fields); ~RollingBlobFileWriter() override = default; Status Write(::ArrowArray* record) override; diff --git a/src/paimon/testing/utils/test_helper.h b/src/paimon/testing/utils/test_helper.h index 78061fd39..a837ec2a5 100644 --- a/src/paimon/testing/utils/test_helper.h +++ b/src/paimon/testing/utils/test_helper.h @@ -239,18 +239,6 @@ class TestHelper { return result_blobs; } - // need to reconstruct the blob array, because the array in read result do not have blob meta - Result> ReconstructBlobArray( - const std::shared_ptr& array, const std::shared_ptr& schema) { - ::ArrowArray c_array; - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*array, &c_array)); - ::ArrowSchema new_c_schema; - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, &new_c_schema)); - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(auto new_array, - arrow::ImportArray(&c_array, &new_c_schema)); - return new_array; - } - Result ReadAndCheckResult(const std::shared_ptr& data_type, const std::vector>& splits, const std::string& expected_result) { From 236ff5005449a37faf424815da56c77a5b427210 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Sat, 30 May 2026 11:01:57 +0800 Subject: [PATCH 8/9] fix review with fieldmapping --- src/paimon/common/data/blob_utils.cpp | 25 +++++++ src/paimon/common/data/blob_utils.h | 13 ++++ src/paimon/common/data/blob_utils_test.cpp | 34 ++++++++++ .../core/io/field_mapping_reader_test.cpp | 14 ++-- .../core/operation/abstract_split_read.cpp | 11 +-- src/paimon/core/operation/file_store_scan.cpp | 10 ++- src/paimon/core/utils/field_mapping.cpp | 41 ++--------- src/paimon/core/utils/field_mapping.h | 9 +-- src/paimon/core/utils/field_mapping_test.cpp | 68 ++++--------------- 9 files changed, 112 insertions(+), 113 deletions(-) diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index a3ce07cc8..75ff4302a 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -17,6 +17,7 @@ #include "paimon/common/data/blob_utils.h" #include +#include #include #include "arrow/api.h" @@ -25,6 +26,7 @@ #include "fmt/format.h" #include "paimon/common/data/blob_defs.h" #include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/types/data_field.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/string_utils.h" namespace arrow { @@ -163,4 +165,27 @@ Status BlobUtils::ValidateInlineBlobDescriptors( return Status::OK(); } +std::vector BlobUtils::ConvertBlobInlineDataFields( + const std::vector& data_fields, const std::vector& blob_inline_fields) { + if (blob_inline_fields.empty()) { + return data_fields; + } + + std::set blob_inline_field_set(blob_inline_fields.begin(), + blob_inline_fields.end()); + std::vector converted_fields; + converted_fields.reserve(data_fields.size()); + for (const auto& data_field : data_fields) { + if (blob_inline_field_set.find(data_field.Name()) == blob_inline_field_set.end()) { + converted_fields.push_back(data_field); + continue; + } + + auto binary_field = arrow::field(data_field.Name(), arrow::binary(), data_field.Nullable(), + data_field.ArrowField()->metadata()); + converted_fields.emplace_back(data_field.Id(), binary_field, data_field.Description()); + } + return converted_fields; +} + } // namespace paimon diff --git a/src/paimon/common/data/blob_utils.h b/src/paimon/common/data/blob_utils.h index 58c6e95af..211f15f84 100644 --- a/src/paimon/common/data/blob_utils.h +++ b/src/paimon/common/data/blob_utils.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "paimon/result.h" #include "paimon/visibility.h" @@ -31,6 +32,10 @@ class Schema; class StructArray; } // namespace arrow +namespace paimon { +class DataField; +} // namespace paimon + namespace paimon { /// Utils for blob type. class PAIMON_EXPORT BlobUtils { @@ -74,6 +79,14 @@ class PAIMON_EXPORT BlobUtils { static Status ValidateInlineBlobDescriptors( const std::shared_ptr& struct_array, const std::set& inline_descriptor_fields); + + /// Converts inline blob DataFields from large_binary to binary type. + /// Inline blob fields use large_binary in the table schema (because they are BLOB type), + /// but are stored as binary in data files. This conversion aligns the field type with + /// the actual on-disk storage format for correct reading. + static std::vector ConvertBlobInlineDataFields( + const std::vector& data_fields, + const std::vector& blob_inline_fields); }; } // namespace paimon diff --git a/src/paimon/common/data/blob_utils_test.cpp b/src/paimon/common/data/blob_utils_test.cpp index 4171a2ec7..f2a02f10d 100644 --- a/src/paimon/common/data/blob_utils_test.cpp +++ b/src/paimon/common/data/blob_utils_test.cpp @@ -21,6 +21,7 @@ #include "gtest/gtest.h" #include "paimon/common/data/blob_defs.h" #include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/types/data_field.h" #include "paimon/data/blob.h" #include "paimon/memory/memory_pool.h" #include "paimon/testing/utils/testharness.h" @@ -331,4 +332,37 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { "require values to be a BlobDescriptor or BlobViewStruct."); } +TEST_F(BlobUtilsTest, TestConvertBlobInlineDataFields) { + // Schema with a blob field (large_binary with blob metadata) and normal fields. + auto blob_field = BlobUtils::ToArrowField("blob_col", /*nullable=*/true); + std::vector data_fields = {DataField(0, arrow::field("int_col", arrow::int32())), + DataField(1, blob_field), + DataField(2, arrow::field("str_col", arrow::utf8()))}; + + // Without inline fields — blob_col stays as large_binary + { + auto result = BlobUtils::ConvertBlobInlineDataFields(data_fields, {}); + ASSERT_EQ(result.size(), 3); + ASSERT_EQ(result[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); + } + + // With inline fields — blob_col should be converted from large_binary to binary + { + auto result = BlobUtils::ConvertBlobInlineDataFields(data_fields, {"blob_col"}); + ASSERT_EQ(result.size(), 3); + ASSERT_EQ(result[1].ArrowField()->type()->id(), arrow::Type::BINARY); + ASSERT_EQ(result[1].Name(), "blob_col"); + ASSERT_EQ(result[1].Nullable(), true); + // Other fields unchanged + ASSERT_EQ(result[0].ArrowField()->type()->id(), arrow::Type::INT32); + ASSERT_EQ(result[2].ArrowField()->type()->id(), arrow::Type::STRING); + } + + // Non-matching inline field name — no conversion should happen + { + auto result = BlobUtils::ConvertBlobInlineDataFields(data_fields, {"non_existent_field"}); + ASSERT_EQ(result[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); + } +} + } // namespace paimon::test diff --git a/src/paimon/core/io/field_mapping_reader_test.cpp b/src/paimon/core/io/field_mapping_reader_test.cpp index a3b2dd09c..491f67246 100644 --- a/src/paimon/core/io/field_mapping_reader_test.cpp +++ b/src/paimon/core/io/field_mapping_reader_test.cpp @@ -122,7 +122,7 @@ class FieldMappingReaderTest : public ::testing::Test { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys_, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields_)); auto arrow_schema = DataField::ConvertDataFieldsToArrowSchema( mapping->non_partition_info.non_partition_data_schema); @@ -155,8 +155,7 @@ class FieldMappingReaderTest : public ::testing::Test { const std::shared_ptr& read_schema, const std::shared_ptr& predicate, const std::vector& partition_keys, const BinaryRow& partition, - const std::shared_ptr& expect_array, - const std::vector& blob_inline_fields = {}) const { + const std::shared_ptr& expect_array) const { auto dir = paimon::test::UniqueTestDirectory::Create(); ASSERT_TRUE(dir); auto fs = dir->GetFileSystem(); @@ -164,8 +163,7 @@ class FieldMappingReaderTest : public ::testing::Test { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, - mapping_builder->CreateFieldMapping(data_schema, blob_inline_fields)); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_schema)); auto arrow_schema = DataField::ConvertDataFieldsToArrowSchema( mapping->non_partition_info.non_partition_data_schema); @@ -713,8 +711,9 @@ TEST_F(FieldMappingReaderTest, TestSchemaEvolutionWithDictType) { } TEST_F(FieldMappingReaderTest, TestReadInlineBlobAsBinaryDataFile) { + // data_fields uses binary type because inline blob fields are stored as binary in data files std::vector data_fields = { - DataField(0, BlobUtils::ToArrowField("descriptor", /*nullable=*/true)), + DataField(0, arrow::field("descriptor", arrow::binary(), /*nullable=*/true)), }; auto data_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); std::string json_str = R"([ @@ -735,8 +734,7 @@ TEST_F(FieldMappingReaderTest, TestReadInlineBlobAsBinaryDataFile) { .ValueOrDie()); CheckResult(data_schema, data_array, read_schema, /*predicate=*/nullptr, - /*partition_keys=*/{}, BinaryRow::EmptyRow(), expected, - /*blob_inline_fields=*/{"descriptor"}); + /*partition_keys=*/{}, BinaryRow::EmptyRow(), expected); } TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionRenameCombinedCast) { diff --git a/src/paimon/core/operation/abstract_split_read.cpp b/src/paimon/core/operation/abstract_split_read.cpp index 40d5e41d5..5098e1729 100644 --- a/src/paimon/core/operation/abstract_split_read.cpp +++ b/src/paimon/core/operation/abstract_split_read.cpp @@ -21,6 +21,7 @@ #include #include "arrow/type.h" +#include "paimon/common/data/blob_utils.h" #include "paimon/common/reader/delegating_prefetch_reader.h" #include "paimon/common/reader/predicate_batch_reader.h" #include "paimon/common/reader/prefetch_file_batch_reader_impl.h" @@ -193,14 +194,16 @@ Result> AbstractSplitRead::CreateFieldMappingRe SpecialFields::ValueKind()}; file_fields.insert(file_fields.end(), data_schema->Fields().begin(), data_schema->Fields().end()); - PAIMON_ASSIGN_OR_RAISE(field_mapping, field_mapping_builder->CreateFieldMapping( - file_fields, blob_inline_fields)); + PAIMON_ASSIGN_OR_RAISE(field_mapping, + field_mapping_builder->CreateFieldMapping(file_fields)); } else { PAIMON_ASSIGN_OR_RAISE( std::vector projected_data_fields, ProjectFieldsForRowTrackingAndDataEvolution(data_schema, file_meta->write_cols)); - PAIMON_ASSIGN_OR_RAISE(field_mapping, field_mapping_builder->CreateFieldMapping( - projected_data_fields, blob_inline_fields)); + auto converted_fields = + BlobUtils::ConvertBlobInlineDataFields(projected_data_fields, blob_inline_fields); + PAIMON_ASSIGN_OR_RAISE(field_mapping, + field_mapping_builder->CreateFieldMapping(converted_fields)); } auto read_schema = DataField::ConvertDataFieldsToArrowSchema( diff --git a/src/paimon/core/operation/file_store_scan.cpp b/src/paimon/core/operation/file_store_scan.cpp index 61d9cf61a..14b353489 100644 --- a/src/paimon/core/operation/file_store_scan.cpp +++ b/src/paimon/core/operation/file_store_scan.cpp @@ -27,6 +27,7 @@ #include "arrow/type.h" #include "fmt/format.h" #include "paimon/common/data/binary_array.h" +#include "paimon/common/data/blob_utils.h" #include "paimon/common/executor/future.h" #include "paimon/common/predicate/literal_converter.h" #include "paimon/common/types/data_field.h" @@ -354,9 +355,12 @@ Status FileStoreScan::SplitAndSetFilter(const std::vector& partitio PAIMON_ASSIGN_OR_RAISE(std::unique_ptr mapping_builder, FieldMappingBuilder::Create(arrow_schema, partition_keys, scan_filters->GetPredicate())); - PAIMON_ASSIGN_OR_RAISE( - std::unique_ptr mapping, - mapping_builder->CreateFieldMapping(arrow_schema, core_options_.GetBlobInlineFields())); + PAIMON_ASSIGN_OR_RAISE(std::vector data_fields, + DataField::ConvertArrowSchemaToDataFields(arrow_schema)); + auto converted_fields = BlobUtils::ConvertBlobInlineDataFields( + data_fields, core_options_.GetBlobInlineFields()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr mapping, + mapping_builder->CreateFieldMapping(converted_fields)); if (mapping->partition_info != std::nullopt) { const auto& partition_info = mapping->partition_info.value(); partition_schema_ = diff --git a/src/paimon/core/utils/field_mapping.cpp b/src/paimon/core/utils/field_mapping.cpp index 8898ddfd9..e24ee7277 100644 --- a/src/paimon/core/utils/field_mapping.cpp +++ b/src/paimon/core/utils/field_mapping.cpp @@ -17,7 +17,6 @@ #include "paimon/core/utils/field_mapping.h" #include -#include #include #include "arrow/type.h" @@ -62,24 +61,19 @@ Result> FieldMappingBuilder::Create( } Result> FieldMappingBuilder::CreateFieldMapping( - const std::shared_ptr& data_schema, - const std::vector& blob_inline_fields) const { + const std::shared_ptr& data_schema) const { PAIMON_ASSIGN_OR_RAISE(std::vector data_fields, DataField::ConvertArrowSchemaToDataFields(data_schema)); - return CreateFieldMapping(data_fields, blob_inline_fields); + return CreateFieldMapping(data_fields); } Result> FieldMappingBuilder::CreateFieldMapping( - const std::vector& data_fields, - const std::vector& blob_inline_fields) const { - auto converted_data_fields = ConvertBlobInlineDataFields(data_fields, blob_inline_fields); - + const std::vector& data_fields) const { // generate non-exist field info - std::optional non_exist_field_info = - CreateNonExistFieldInfo(converted_data_fields); + std::optional non_exist_field_info = CreateNonExistFieldInfo(data_fields); // generate exist field info - ExistFieldInfo exist_field_info = CreateExistFieldInfo(converted_data_fields); + ExistFieldInfo exist_field_info = CreateExistFieldInfo(data_fields); // key: partition key, value: partition idx std::map partition_key_to_idx = @@ -87,35 +81,12 @@ Result> FieldMappingBuilder::CreateFieldMapping( PAIMON_ASSIGN_OR_RAISE( NonPartitionInfo non_partition_info, - CreateNonPartitionInfo(converted_data_fields, exist_field_info, partition_key_to_idx)); + CreateNonPartitionInfo(data_fields, exist_field_info, partition_key_to_idx)); PAIMON_ASSIGN_OR_RAISE(std::optional partition_info, CreatePartitionInfo(exist_field_info, partition_key_to_idx)); return std::make_unique(partition_info, non_partition_info, non_exist_field_info); } -std::vector FieldMappingBuilder::ConvertBlobInlineDataFields( - const std::vector& data_fields, const std::vector& blob_inline_fields) { - if (blob_inline_fields.empty()) { - return data_fields; - } - - std::set blob_inline_field_set(blob_inline_fields.begin(), - blob_inline_fields.end()); - std::vector converted_fields; - converted_fields.reserve(data_fields.size()); - for (const auto& data_field : data_fields) { - if (blob_inline_field_set.find(data_field.Name()) == blob_inline_field_set.end()) { - converted_fields.push_back(data_field); - continue; - } - - auto binary_field = arrow::field(data_field.Name(), arrow::binary(), data_field.Nullable(), - data_field.ArrowField()->metadata()); - converted_fields.emplace_back(data_field.Id(), binary_field, data_field.Description()); - } - return converted_fields; -} - ExistFieldInfo FieldMappingBuilder::CreateExistFieldInfo( const std::vector& data_fields) const { // key:field id, value: {target_idx, read field} diff --git a/src/paimon/core/utils/field_mapping.h b/src/paimon/core/utils/field_mapping.h index 4dde0c478..0c0abc04b 100644 --- a/src/paimon/core/utils/field_mapping.h +++ b/src/paimon/core/utils/field_mapping.h @@ -56,11 +56,9 @@ class FieldMappingBuilder { const std::shared_ptr& predicate); Result> CreateFieldMapping( - const std::vector& data_fields, - const std::vector& blob_inline_fields) const; + const std::vector& data_fields) const; Result> CreateFieldMapping( - const std::shared_ptr& data_schema, - const std::vector& blob_inline_fields) const; + const std::shared_ptr& data_schema) const; int32_t GetReadFieldCount() const { return read_fields_.size(); @@ -83,9 +81,6 @@ class FieldMappingBuilder { std::optional CreateNonExistFieldInfo( const std::vector& data_fields) const; ExistFieldInfo CreateExistFieldInfo(const std::vector& data_fields) const; - static std::vector ConvertBlobInlineDataFields( - const std::vector& data_fields, - const std::vector& blob_inline_fields); Result CreateNonPartitionInfo( const std::vector& data_fields, const ExistFieldInfo& exist_field_info, diff --git a/src/paimon/core/utils/field_mapping_test.cpp b/src/paimon/core/utils/field_mapping_test.cpp index 29e0deee5..4ec1bce2e 100644 --- a/src/paimon/core/utils/field_mapping_test.cpp +++ b/src/paimon/core/utils/field_mapping_test.cpp @@ -91,7 +91,7 @@ TEST_F(FieldMappingTest, TestEmptyPartitionKeys) { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create( schema_, /*partition_keys=*/std::vector(), predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); ASSERT_EQ(mapping->partition_info, std::nullopt); ASSERT_EQ(mapping->non_exist_field_info, std::nullopt); @@ -123,7 +123,7 @@ TEST_F(FieldMappingTest, TestCompoundPartitionPredicate) { std::vector partition_keys = {"f0", "f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[0], fields_[1], fields_[2]}; @@ -156,7 +156,7 @@ TEST_F(FieldMappingTest, TestPartitionKeysEqualSchema) { std::vector partition_keys = {"f0", "f1", "f2", "f3"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = fields_; @@ -182,7 +182,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -208,7 +208,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema2) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -245,7 +245,7 @@ TEST_F(FieldMappingTest, TestAllPartitionKeysInSchema3) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(schema_, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[1], fields_[2]}; @@ -288,7 +288,7 @@ TEST_F(FieldMappingTest, TestPartialPartitionKeysInSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = {fields_[2]}; @@ -332,7 +332,7 @@ TEST_F(FieldMappingTest, TestNoPartitionKeysInReadSchema) { std::vector partition_keys = {"f1", "f2"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(schema_)); ASSERT_EQ(mapping->partition_info, std::nullopt); @@ -376,7 +376,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolution) { ASSERT_OK_AND_ASSIGN( auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, /*predicate=*/nullptr)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -462,7 +462,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolutionWithPredicate) { std::vector partition_keys = {"key0", "key1"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -564,7 +564,7 @@ TEST_F(FieldMappingTest, TestSchemaEvolutionWithPredicate2) { std::vector partition_keys = {"key0", "key1"}; ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); PartitionInfo expected_part_info; expected_part_info.partition_read_schema = { @@ -631,7 +631,7 @@ TEST_F(FieldMappingTest, TestCompoundPredicateWithoutPushDown) { ASSERT_OK_AND_ASSIGN(auto mapping_builder, FieldMappingBuilder::Create(read_schema, partition_keys, predicate)); ASSERT_TRUE(mapping_builder); - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); + ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields)); ASSERT_FALSE(mapping->partition_info); NonPartitionInfo expected_non_part_info; @@ -643,48 +643,4 @@ TEST_F(FieldMappingTest, TestCompoundPredicateWithoutPushDown) { CheckNonPartitionInfo(mapping->non_partition_info, expected_non_part_info); } -TEST_F(FieldMappingTest, TestBlobInlineFieldConversion) { - // Schema with a blob field (large_binary with blob metadata) and a normal field. - auto blob_field = BlobUtils::ToArrowField("blob_col", /*nullable=*/true); - std::vector data_fields = {DataField(0, arrow::field("int_col", arrow::int32())), - DataField(1, blob_field), - DataField(2, arrow::field("str_col", arrow::utf8()))}; - - auto read_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); - ASSERT_OK_AND_ASSIGN( - auto mapping_builder, - FieldMappingBuilder::Create(read_schema, /*partition_keys=*/{}, /*predicate=*/nullptr)); - - // Without inline fields — blob_col stays as large_binary - { - ASSERT_OK_AND_ASSIGN(auto mapping, mapping_builder->CreateFieldMapping(data_fields, {})); - auto& data_schema = mapping->non_partition_info.non_partition_data_schema; - ASSERT_EQ(data_schema.size(), 3); - ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); - } - - // With inline fields — blob_col should be converted from large_binary to binary - { - ASSERT_OK_AND_ASSIGN(auto mapping, - mapping_builder->CreateFieldMapping(data_fields, {"blob_col"})); - auto& data_schema = mapping->non_partition_info.non_partition_data_schema; - ASSERT_EQ(data_schema.size(), 3); - // blob_col converted to binary - ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::BINARY); - ASSERT_EQ(data_schema[1].Name(), "blob_col"); - ASSERT_EQ(data_schema[1].Nullable(), true); - // Other fields unchanged - ASSERT_EQ(data_schema[0].ArrowField()->type()->id(), arrow::Type::INT32); - ASSERT_EQ(data_schema[2].ArrowField()->type()->id(), arrow::Type::STRING); - } - - // Non-matching inline field name — no conversion should happen - { - ASSERT_OK_AND_ASSIGN( - auto mapping, mapping_builder->CreateFieldMapping(data_fields, {"non_existent_field"})); - auto& data_schema = mapping->non_partition_info.non_partition_data_schema; - ASSERT_EQ(data_schema[1].ArrowField()->type()->id(), arrow::Type::LARGE_BINARY); - } -} - } // namespace paimon::test From d5096b8e9f05565d999e91f17b059e7472cc17b6 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 1 Jun 2026 14:43:50 +0800 Subject: [PATCH 9/9] fix review and add test --- src/paimon/core/utils/field_mapping_test.cpp | 2 - src/paimon/format/blob/blob_format_writer.cpp | 2 +- test/inte/blob_table_inte_test.cpp | 85 +++++++++++++------ 3 files changed, 62 insertions(+), 27 deletions(-) diff --git a/src/paimon/core/utils/field_mapping_test.cpp b/src/paimon/core/utils/field_mapping_test.cpp index 4ec1bce2e..c74083e26 100644 --- a/src/paimon/core/utils/field_mapping_test.cpp +++ b/src/paimon/core/utils/field_mapping_test.cpp @@ -18,10 +18,8 @@ #include -#include "arrow/type.h" #include "arrow/type_fwd.h" #include "gtest/gtest.h" -#include "paimon/common/data/blob_utils.h" #include "paimon/common/predicate/leaf_predicate_impl.h" #include "paimon/common/predicate/predicate_filter.h" #include "paimon/data/decimal.h" diff --git a/src/paimon/format/blob/blob_format_writer.cpp b/src/paimon/format/blob/blob_format_writer.cpp index 4a6256a71..a21299d77 100644 --- a/src/paimon/format/blob/blob_format_writer.cpp +++ b/src/paimon/format/blob/blob_format_writer.cpp @@ -127,7 +127,7 @@ Status BlobFormatWriter::AddBatch(ArrowArray* batch) { PAIMON_RETURN_NOT_OK(Flush()); } } else { - // TODO(xinyu.lxy): Java does not flush when writeConsumer is null. + // Java does not flush when writeConsumer is null. PAIMON_RETURN_NOT_OK(Flush()); } return Status::OK(); diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index 4e0f9c305..75010ebc0 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -2150,23 +2150,25 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorMultiCommitAndShuffledReadSchema) { WriteArray(table_path, {}, schema->field_names(), {desc_array_3})); ASSERT_OK(Commit(table_path, commit_msgs_3)); - // --- Read with shuffled schema: b3, b2, b1, b0, f0 --- - std::vector shuffled_read_schema = {"b3", "b2", "b1", "b0", "f0"}; - ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); - - std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; - ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, shuffled_read_schema, plan, - /*predicate=*/nullptr, read_options)); - ASSERT_TRUE(result.chunked_array); - auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); - auto read_struct = std::dynamic_pointer_cast(read_concat); - - // Build expected array in shuffled order from all 3 batches - arrow::FieldVector shuffled_fields = { - BlobUtils::ToArrowField("b3", true), BlobUtils::ToArrowField("b2", true), - BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b0", true), - arrow::field("f0", arrow::int32())}; - std::string expected_json = R"([ + // test read + { + // --- Read with shuffled schema: b3, b2, b1, b0, f0 --- + std::vector shuffled_read_schema = {"b3", "b2", "b1", "b0", "f0"}; + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, shuffled_read_schema, plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // Build expected array in shuffled order from all 3 batches + arrow::FieldVector shuffled_fields = { + BlobUtils::ToArrowField("b3", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b0", true), + arrow::field("f0", arrow::int32())}; + std::string expected_json = R"([ ["raw_3_0", "raw_2_0", null, "img_0", 1], [null, "raw_2_1", "vid_1", "img_1", 2], ["raw_3_2", "raw_2_2", "vid_2", "img_2", 3], @@ -2174,14 +2176,49 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorMultiCommitAndShuffledReadSchema) { [null, "raw_2_4", null, "img_4", 5], ["raw_3_5", null, "vid_5", "img_5", 6] ])"; - auto expected_array = std::dynamic_pointer_cast( - arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(shuffled_fields), expected_json) - .ValueOrDie()); + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(shuffled_fields), + expected_json) + .ValueOrDie()); - // Resolve descriptors (b0, b1 are descriptor fields) back to raw bytes - ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); - ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); - ASSERT_TRUE(resolved->Equals(expected_with_rk)); + // Resolve descriptors (b0, b1 are descriptor fields) back to raw bytes + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + } + { + // test scan and read with GlobalIndexResult + std::vector shuffled_read_schema = {"b3", "b2", "b1", "b0", "f0"}; + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path, /*predicate=*/nullptr, + /*row_ranges=*/{Range(1, 3), Range(5, 5)})); + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, shuffled_read_schema, plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // Build expected array in shuffled order from all 3 batches + arrow::FieldVector shuffled_fields = { + BlobUtils::ToArrowField("b3", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b0", true), + arrow::field("f0", arrow::int32())}; + std::string expected_json = R"([ + [null, "raw_2_1", "vid_1", "img_1", 2], + ["raw_3_2", "raw_2_2", "vid_2", "img_2", 3], + ["raw_3_3", "raw_2_3", "vid_3", null, 4], + ["raw_3_5", null, "vid_5", "img_5", 6] + ])"; + auto expected_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(shuffled_fields), + expected_json) + .ValueOrDie()); + + // Resolve descriptors (b0, b1 are descriptor fields) back to raw bytes + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); + } } TEST_P(BlobTableInteTest, TestDataEvolutionWithBlobDescriptorField) {