From b8e1b5e65be1f4d30118e88fc2f5d1e15283aa1b Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Fri, 13 Mar 2026 08:43:28 +0000 Subject: [PATCH 1/8] feat(compaction): support multi-level lookup in LSM tree --- include/paimon/data/timestamp.h | 4 + include/paimon/disk/io_manager.h | 38 ++ include/paimon/reader/file_batch_reader.h | 2 +- src/paimon/CMakeLists.txt | 6 + src/paimon/common/data/timestamp_test.cpp | 2 +- .../bitmap/apply_bitmap_index_batch_reader.h | 26 +- .../reader/delegating_prefetch_reader.h | 2 +- .../prefetch_file_batch_reader_impl.cpp | 5 +- .../reader/prefetch_file_batch_reader_impl.h | 2 +- .../prefetch_file_batch_reader_impl_test.cpp | 34 +- src/paimon/common/sst/block_reader.h | 2 +- src/paimon/common/sst/sst_file_reader.cpp | 2 +- src/paimon/common/utils/arrow/arrow_utils.cpp | 18 + src/paimon/common/utils/arrow/arrow_utils.h | 3 + .../common/utils/arrow/arrow_utils_test.cpp | 67 +++ .../utils/binary_row_partition_computer.cpp | 25 + .../utils/binary_row_partition_computer.h | 4 + .../binary_row_partition_computer_test.cpp | 41 ++ src/paimon/common/utils/object_utils.h | 13 + src/paimon/common/utils/object_utils_test.cpp | 24 + .../apply_deletion_vector_batch_reader.h | 28 +- src/paimon/core/disk/io_manager.cpp | 47 ++ .../complete_row_tracking_fields_reader.cpp | 3 +- .../io/complete_row_tracking_fields_reader.h | 2 +- src/paimon/core/io/data_file_meta.cpp | 4 + src/paimon/core/io/data_file_meta.h | 1 + src/paimon/core/io/data_file_meta_test.cpp | 2 + src/paimon/core/io/field_mapping_reader.cpp | 3 +- src/paimon/core/io/field_mapping_reader.h | 29 +- .../io/key_value_data_file_record_reader.cpp | 12 +- .../io/key_value_data_file_record_reader.h | 11 +- ...key_value_data_file_record_reader_test.cpp | 57 +++ src/paimon/core/key_value.h | 1 + .../compact/merge_tree_compact_rewriter.cpp | 12 +- .../compact/merge_tree_compact_rewriter.h | 2 +- .../merge_tree_compact_rewriter_test.cpp | 2 +- src/paimon/core/mergetree/level_sorted_run.h | 7 + src/paimon/core/mergetree/levels.cpp | 206 ++++++++ src/paimon/core/mergetree/levels.h | 91 ++++ src/paimon/core/mergetree/levels_test.cpp | 167 +++++++ .../default_lookup_serializer_factory.h | 10 +- src/paimon/core/mergetree/lookup_file.h | 105 +++++ .../core/mergetree/lookup_file_test.cpp | 106 +++++ src/paimon/core/mergetree/lookup_levels.cpp | 269 +++++++++++ src/paimon/core/mergetree/lookup_levels.h | 115 +++++ .../core/mergetree/lookup_levels_test.cpp | 439 ++++++++++++++++++ src/paimon/core/mergetree/lookup_utils.h | 119 +++++ src/paimon/core/mergetree/sorted_run.h | 37 ++ src/paimon/core/mergetree/sorted_run_test.cpp | 39 ++ .../core/operation/abstract_split_read.cpp | 14 +- .../core/operation/abstract_split_read.h | 20 +- .../operation/data_evolution_split_read.cpp | 14 +- .../operation/data_evolution_split_read.h | 2 +- .../core/operation/merge_file_split_read.cpp | 39 +- .../core/operation/merge_file_split_read.h | 2 +- .../core/operation/raw_file_split_read.cpp | 15 +- .../core/operation/raw_file_split_read.h | 2 +- .../format/avro/avro_file_batch_reader.h | 2 +- .../avro/avro_file_batch_reader_test.cpp | 13 +- .../format/blob/blob_file_batch_reader.h | 8 +- .../blob/blob_file_batch_reader_test.cpp | 46 +- .../format/lance/lance_file_batch_reader.h | 8 +- src/paimon/format/orc/orc_file_batch_reader.h | 2 +- .../format/orc/orc_file_batch_reader_test.cpp | 14 +- .../format/parquet/file_reader_wrapper.h | 2 +- .../parquet/file_reader_wrapper_test.cpp | 28 +- .../parquet/parquet_file_batch_reader.h | 2 +- .../parquet_file_batch_reader_test.cpp | 14 +- .../testing/mock/mock_file_batch_reader.h | 2 +- .../mock_key_value_data_file_record_reader.h | 2 +- 70 files changed, 2317 insertions(+), 180 deletions(-) create mode 100644 include/paimon/disk/io_manager.h create mode 100644 src/paimon/core/disk/io_manager.cpp create mode 100644 src/paimon/core/mergetree/levels.cpp create mode 100644 src/paimon/core/mergetree/levels.h create mode 100644 src/paimon/core/mergetree/levels_test.cpp create mode 100644 src/paimon/core/mergetree/lookup_file.h create mode 100644 src/paimon/core/mergetree/lookup_file_test.cpp create mode 100644 src/paimon/core/mergetree/lookup_levels.cpp create mode 100644 src/paimon/core/mergetree/lookup_levels.h create mode 100644 src/paimon/core/mergetree/lookup_levels_test.cpp create mode 100644 src/paimon/core/mergetree/lookup_utils.h diff --git a/include/paimon/data/timestamp.h b/include/paimon/data/timestamp.h index cc4c092a..dddbff7a 100644 --- a/include/paimon/data/timestamp.h +++ b/include/paimon/data/timestamp.h @@ -100,6 +100,10 @@ class PAIMON_EXPORT Timestamp { nano_of_millisecond_ == other.nano_of_millisecond_; } + bool operator!=(const Timestamp& other) const { + return !(*this == other); + } + bool operator<(const Timestamp& other) const { if (millisecond_ == other.millisecond_) { return nano_of_millisecond_ < other.nano_of_millisecond_; diff --git a/include/paimon/disk/io_manager.h b/include/paimon/disk/io_manager.h new file mode 100644 index 00000000..f5bdb527 --- /dev/null +++ b/include/paimon/disk/io_manager.h @@ -0,0 +1,38 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/result.h" +#include "paimon/visibility.h" + +namespace paimon { +/// The facade for the provided disk I/O services. +class PAIMON_EXPORT IOManager { + public: + virtual ~IOManager() = default; + static std::unique_ptr Create(const std::string& tmp_dir); + + /// @return Temp directory path. + virtual const std::string& GetTempDir() const = 0; + + virtual Result GenerateTempFilePath(const std::string& prefix) const = 0; +}; +} // namespace paimon diff --git a/include/paimon/reader/file_batch_reader.h b/include/paimon/reader/file_batch_reader.h index 708c8b23..272de3c8 100644 --- a/include/paimon/reader/file_batch_reader.h +++ b/include/paimon/reader/file_batch_reader.h @@ -47,7 +47,7 @@ class PAIMON_EXPORT FileBatchReader : public BatchReader { using BatchReader::NextBatchWithBitmap; /// Get the row number of the first row in the previously read batch. - virtual uint64_t GetPreviousBatchFirstRowNumber() const = 0; + virtual Result GetPreviousBatchFirstRowNumber() const = 0; /// Get the number of rows in the file. virtual Result GetNumberOfRows() const = 0; diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index bfa73af4..2083d053 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -144,6 +144,7 @@ set(PAIMON_COMMON_SRCS common/utils/string_utils.cpp) set(PAIMON_CORE_SRCS + core/disk/io_manager.cpp core/append/append_only_writer.cpp core/append/bucketed_append_compact_manager.cpp core/casting/binary_to_string_cast_executor.cpp @@ -233,6 +234,8 @@ set(PAIMON_CORE_SRCS core/mergetree/compact/sort_merge_reader_with_loser_tree.cpp core/mergetree/compact/sort_merge_reader_with_min_heap.cpp core/mergetree/merge_tree_writer.cpp + core/mergetree/levels.cpp + core/mergetree/lookup_levels.cpp core/migrate/file_meta_utils.cpp core/operation/data_evolution_file_store_scan.cpp core/operation/data_evolution_split_read.cpp @@ -536,6 +539,9 @@ if(PAIMON_BUILD_TESTS) core/manifest/partition_entry_test.cpp core/manifest/file_entry_test.cpp core/manifest/index_manifest_entry_serializer_test.cpp + core/mergetree/levels_test.cpp + core/mergetree/lookup_file_test.cpp + core/mergetree/lookup_levels_test.cpp core/mergetree/compact/aggregate/aggregate_merge_function_test.cpp core/mergetree/compact/aggregate/field_aggregator_factory_test.cpp core/mergetree/compact/aggregate/field_bool_agg_test.cpp diff --git a/src/paimon/common/data/timestamp_test.cpp b/src/paimon/common/data/timestamp_test.cpp index 46544844..3cf16536 100644 --- a/src/paimon/common/data/timestamp_test.cpp +++ b/src/paimon/common/data/timestamp_test.cpp @@ -68,7 +68,7 @@ TEST_F(TimestampTest, EqualityOperator) { Timestamp ts3(1622547800000, 654321); ASSERT_EQ(ts1, ts1); ASSERT_EQ(ts1, ts2); - ASSERT_FALSE(ts1 == ts3); + ASSERT_NE(ts1, ts3); } TEST_F(TimestampTest, LessThanOperator) { diff --git a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h index 035aefe7..7e2c9338 100644 --- a/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h +++ b/src/paimon/common/file_index/bitmap/apply_bitmap_index_batch_reader.h @@ -25,7 +25,6 @@ #include "arrow/c/helpers.h" #include "paimon/common/reader/reader_utils.h" #include "paimon/file_index/bitmap_index_result.h" -#include "paimon/reader/batch_reader.h" #include "paimon/reader/file_batch_reader.h" #include "paimon/result.h" #include "paimon/status.h" @@ -34,7 +33,7 @@ namespace paimon { class Metrics; -class ApplyBitmapIndexBatchReader : public BatchReader { +class ApplyBitmapIndexBatchReader : public FileBatchReader { public: ApplyBitmapIndexBatchReader(std::unique_ptr&& reader, RoaringBitmap32&& bitmap) : reader_(std::move(reader)), bitmap_(std::move(bitmap)) { @@ -72,10 +71,31 @@ class ApplyBitmapIndexBatchReader : public BatchReader { return reader_->GetReaderMetrics(); } + Result> GetFileSchema() const override { + return reader_->GetFileSchema(); + } + + Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, + const std::optional& selection_bitmap) override { + return Status::Invalid("ApplyBitmapIndexBatchReader does not support SetReadSchema"); + } + + Result GetPreviousBatchFirstRowNumber() const override { + return reader_->GetPreviousBatchFirstRowNumber(); + } + + Result GetNumberOfRows() const override { + return reader_->GetNumberOfRows(); + } + + bool SupportPreciseBitmapSelection() const override { + return reader_->SupportPreciseBitmapSelection(); + } + private: Result Filter(int32_t batch_size) const { RoaringBitmap32 is_valid; - int32_t start_pos = reader_->GetPreviousBatchFirstRowNumber(); + PAIMON_ASSIGN_OR_RAISE(int32_t start_pos, reader_->GetPreviousBatchFirstRowNumber()); int32_t length = batch_size; for (auto iter = bitmap_.EqualOrLarger(start_pos); iter != bitmap_.End() && *iter < start_pos + length; ++iter) { diff --git a/src/paimon/common/reader/delegating_prefetch_reader.h b/src/paimon/common/reader/delegating_prefetch_reader.h index 432ddfd5..fe2e3eda 100644 --- a/src/paimon/common/reader/delegating_prefetch_reader.h +++ b/src/paimon/common/reader/delegating_prefetch_reader.h @@ -54,7 +54,7 @@ class DelegatingPrefetchReader : public FileBatchReader { return prefetch_reader_->SetReadSchema(read_schema, predicate, selection_bitmap); } - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { return GetReader()->GetPreviousBatchFirstRowNumber(); } diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp index 63c8d614..da74f348 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.cpp @@ -409,7 +409,8 @@ Status PrefetchFileBatchReaderImpl::EnsureReaderPosition( Status PrefetchFileBatchReaderImpl::HandleReadResult( size_t reader_idx, const std::pair& read_range, ReadBatchWithBitmap&& read_batch_with_bitmap) { - uint64_t first_row_number = readers_[reader_idx]->GetPreviousBatchFirstRowNumber(); + PAIMON_ASSIGN_OR_RAISE(uint64_t first_row_number, + readers_[reader_idx]->GetPreviousBatchFirstRowNumber()); auto& prefetch_queue = prefetch_queues_[reader_idx]; if (!BatchReader::IsEofBatch(read_batch_with_bitmap)) { auto& [read_batch, bitmap] = read_batch_with_bitmap; @@ -570,7 +571,7 @@ Result> PrefetchFileBatchReaderImpl::GetFileSchem return readers_[0]->GetFileSchema(); } -uint64_t PrefetchFileBatchReaderImpl::GetPreviousBatchFirstRowNumber() const { +Result PrefetchFileBatchReaderImpl::GetPreviousBatchFirstRowNumber() const { return previous_batch_first_row_num_; } diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h index 06ec45ac..5ed9fb35 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl.h +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl.h @@ -76,7 +76,7 @@ class PrefetchFileBatchReaderImpl : public PrefetchFileBatchReader { const std::optional& selection_bitmap) override; Status SeekToRow(uint64_t row_number) override; - uint64_t GetPreviousBatchFirstRowNumber() const override; + Result GetPreviousBatchFirstRowNumber() const override; Result GetNumberOfRows() const override; uint64_t GetNextRowToRead() const override; void Close() override; diff --git a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp index 2f6a7c9f..6cfd6133 100644 --- a/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp +++ b/src/paimon/common/reader/prefetch_file_batch_reader_impl_test.cpp @@ -206,11 +206,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestSimple) { /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 101); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 101); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -396,11 +396,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestReadWithLargeBatchSize) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 101); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 101); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); } @@ -424,11 +424,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPartialReaderSuccessRead) { } arrow::ArrayVector result_array_vector; - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto batch_with_bitmap, reader->NextBatchWithBitmap()); auto& [batch, bitmap] = batch_with_bitmap; ASSERT_EQ(batch.first->length, bitmap.Cardinality()); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 0); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 0); ASSERT_OK_AND_ASSIGN(auto array, ReadResultCollector::GetArray(std::move(batch))); result_array_vector.push_back(array); ASSERT_OK(prefetch_reader->GetReadStatus()); @@ -469,9 +469,9 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { ->SetNextBatchStatus(Status::IOError("mock error")); } - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); auto batch_result = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_FALSE(batch_result.ok()); ASSERT_TRUE(batch_result.status().IsIOError()); ASSERT_FALSE(prefetch_reader->is_shutdown_); @@ -480,7 +480,7 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestAllReaderFailedWithIOError) { // call NextBatch again, will still return error status auto batch_result2 = reader->NextBatchWithBitmap(); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_FALSE(batch_result2.ok()); ASSERT_TRUE(batch_result2.status().IsIOError()); } @@ -497,11 +497,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestPrefetchWithEmptyData) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 0); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 0); ASSERT_FALSE(result_array); } @@ -517,11 +517,11 @@ TEST_F(PrefetchFileBatchReaderImplTest, TestCallNextBatchAfterReadingEof) { prefetch_max_parallel_num * 2, /*enable_adaptive_prefetch_strategy=*/false, executor_, /*initialize_read_ranges=*/true, /*prefetch_cache_mode=*/PrefetchCacheMode::ALWAYS, CacheConfig(), GetDefaultPool())); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 10); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 10); auto expected_array = std::make_shared(data_array); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -623,11 +623,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, TestPrefetchWithPredicatePushdownWithCom PreparePrefetchReader(file_format, schema.get(), predicate, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 90); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 90); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 30)); @@ -659,11 +659,11 @@ TEST_P(PrefetchFileBatchReaderImplTest, /*selection_bitmap=*/std::nullopt, /*batch_size=*/10, /*prefetch_max_parallel_num=*/3, cache_mode); ASSERT_OK(reader->RefreshReadRanges()); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, ReadResultCollector::CollectResult( reader.get(), /*max simulated data processing time*/ 100)); - ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber(), 90); + ASSERT_EQ(reader->GetPreviousBatchFirstRowNumber().value(), 90); arrow::ArrayVector expected_array_vector; expected_array_vector.push_back(data_array->Slice(0, 20)); diff --git a/src/paimon/common/sst/block_reader.h b/src/paimon/common/sst/block_reader.h index ec417105..542c5c24 100644 --- a/src/paimon/common/sst/block_reader.h +++ b/src/paimon/common/sst/block_reader.h @@ -74,7 +74,7 @@ class AlignedBlockReader : public BlockReader { class UnAlignedBlockReader : public BlockReader { public: UnAlignedBlockReader(const std::shared_ptr& data, - std::shared_ptr& index, + const std::shared_ptr& index, MemorySlice::SliceComparator comparator) : BlockReader(data, index->Length() / 4, std::move(comparator)), index_(index) {} diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index c197b4cb..f313b224 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -90,7 +90,7 @@ Result> SstFileReader::Lookup(const std::shared_ptrIterator(); - PAIMON_ASSIGN_OR_RAISE(bool _, index_block_iterator->SeekTo(key_slice)); + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool _, index_block_iterator->SeekTo(key_slice)); // if indexIterator does not have a next, it means the key does not exist in this iterator if (index_block_iterator->HasNext()) { // seek the current iterator to the key diff --git a/src/paimon/common/utils/arrow/arrow_utils.cpp b/src/paimon/common/utils/arrow/arrow_utils.cpp index 5cbbce64..c27b6c4b 100644 --- a/src/paimon/common/utils/arrow/arrow_utils.cpp +++ b/src/paimon/common/utils/arrow/arrow_utils.cpp @@ -91,6 +91,24 @@ void ArrowUtils::TraverseArray(const std::shared_ptr& array) { } } +bool ArrowUtils::EqualsIgnoreNullable(const std::shared_ptr& type, + const std::shared_ptr& other_type) { + if (type->id() != other_type->id() || type->num_fields() != other_type->num_fields()) { + return false; + } + for (int32_t i = 0; i < type->num_fields(); ++i) { + const auto& field = type->field(i); + const auto& other_field = other_type->field(i); + if (field->name() != other_field->name()) { + return false; + } + if (!EqualsIgnoreNullable(field->type(), other_field->type())) { + return false; + } + } + return true; +} + Status ArrowUtils::InnerCheckNullabilityMatch(const std::shared_ptr& field, const std::shared_ptr& data) { if (PAIMON_UNLIKELY(!field->nullable() && data->null_count() != 0)) { diff --git a/src/paimon/common/utils/arrow/arrow_utils.h b/src/paimon/common/utils/arrow/arrow_utils.h index 4e49c913..a3638c38 100644 --- a/src/paimon/common/utils/arrow/arrow_utils.h +++ b/src/paimon/common/utils/arrow/arrow_utils.h @@ -46,6 +46,9 @@ class PAIMON_EXPORT ArrowUtils { static Result> RemoveFieldFromStructArray( const std::shared_ptr& struct_array, const std::string& field_name); + static bool EqualsIgnoreNullable(const std::shared_ptr& type, + const std::shared_ptr& other_type); + private: static Status InnerCheckNullabilityMatch(const std::shared_ptr& field, const std::shared_ptr& data); diff --git a/src/paimon/common/utils/arrow/arrow_utils_test.cpp b/src/paimon/common/utils/arrow/arrow_utils_test.cpp index ba27cbcc..e91cd71d 100644 --- a/src/paimon/common/utils/arrow/arrow_utils_test.cpp +++ b/src/paimon/common/utils/arrow/arrow_utils_test.cpp @@ -378,4 +378,71 @@ TEST(ArrowUtilsTest, TestRemoveFieldFromStructArraySuccess) { ASSERT_TRUE(result->Equals(expected_struct_array)); } +TEST(ArrowUtilsTest, TestEqualsIgnoreNullable) { + { + // test simple + ASSERT_FALSE(ArrowUtils::EqualsIgnoreNullable(arrow::int32(), arrow::int64())); + ASSERT_TRUE(ArrowUtils::EqualsIgnoreNullable(arrow::int32(), arrow::int32())); + } + { + // test struct + auto child1 = arrow::field("child1", arrow::int32(), /*nullable=*/false); + auto child2 = arrow::field("child2", arrow::int32(), /*nullable=*/false); + auto child3 = arrow::field("child1", arrow::int32(), /*nullable=*/true); + auto struct_type1 = arrow::struct_({child1}); + auto struct_type2 = arrow::struct_({child2}); + auto struct_type3 = arrow::struct_({child3}); + auto struct_type4 = arrow::struct_({child3, child1}); + ASSERT_FALSE(ArrowUtils::EqualsIgnoreNullable(struct_type1, struct_type2)); + ASSERT_TRUE(ArrowUtils::EqualsIgnoreNullable(struct_type1, struct_type3)); + ASSERT_FALSE(ArrowUtils::EqualsIgnoreNullable(struct_type1, struct_type4)); + } + { + // test complex + auto key_field = arrow::field("key", arrow::int32(), /*nullable=*/false); + auto value_field = arrow::field("value", arrow::int32(), /*nullable=*/false); + auto inner_child1 = arrow::field( + "inner1", + arrow::map(arrow::utf8(), arrow::field("inner_list", arrow::list(value_field), + /*nullable=*/true)), + /*nullable=*/false); + auto inner_child2 = arrow::field( + "inner2", + arrow::map(arrow::utf8(), + arrow::field("inner_map", arrow::map(arrow::utf8(), value_field), + /*nullable=*/true)), + /*nullable=*/false); + auto inner_child3 = arrow::field( + "inner3", + arrow::map(arrow::utf8(), + arrow::field("inner_struct", arrow::struct_({key_field, value_field}), + /*nullable=*/true)), + /*nullable=*/false); + auto struct_type1 = arrow::struct_({inner_child1, inner_child2, inner_child3}); + + auto key_field_other = arrow::field("key", arrow::int32(), /*nullable=*/true); + auto value_field_other = arrow::field("value", arrow::int32(), /*nullable=*/true); + auto inner_child1_other = arrow::field( + "inner1", + arrow::map(arrow::utf8(), arrow::field("inner_list", arrow::list(value_field_other), + /*nullable=*/false)), + /*nullable=*/true); + auto inner_child2_other = arrow::field( + "inner2", + arrow::map(arrow::utf8(), + arrow::field("inner_map", arrow::map(arrow::utf8(), value_field_other), + /*nullable=*/false)), + /*nullable=*/true); + auto inner_child3_other = arrow::field( + "inner3", + arrow::map( + arrow::utf8(), + arrow::field("inner_struct", arrow::struct_({key_field_other, value_field_other}), + /*nullable=*/false)), + /*nullable=*/true); + auto struct_type2 = + arrow::struct_({inner_child1_other, inner_child2_other, inner_child3_other}); + ASSERT_TRUE(ArrowUtils::EqualsIgnoreNullable(struct_type1, struct_type2)); + } +} } // namespace paimon::test diff --git a/src/paimon/common/utils/binary_row_partition_computer.cpp b/src/paimon/common/utils/binary_row_partition_computer.cpp index 5a6d2a0a..88f77407 100644 --- a/src/paimon/common/utils/binary_row_partition_computer.cpp +++ b/src/paimon/common/utils/binary_row_partition_computer.cpp @@ -21,6 +21,7 @@ #include "arrow/type.h" #include "fmt/format.h" +#include "fmt/ranges.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/data/binary_row_writer.h" #include "paimon/common/utils/string_utils.h" @@ -133,4 +134,28 @@ Result BinaryRowPartitionComputer::GetTypeFromArrowSchema( return field->type()->id(); } +Result BinaryRowPartitionComputer::PartToSimpleString( + const std::shared_ptr& partition_type, const BinaryRow& partition, + const std::string& delimiter, int32_t max_length) { + std::vector partition_converters; + partition_converters.reserve(partition_type->num_fields()); + for (const auto& field : partition_type->fields()) { + PAIMON_ASSIGN_OR_RAISE(DataConverterUtils::BinaryRowFieldToStrConverter converter, + DataConverterUtils::CreateBinaryRowFieldToStringConverter( + field->type()->id(), /*legacy_partition_name_enabled=*/true)); + partition_converters.emplace_back(converter); + } + std::vector partition_vec; + partition_vec.reserve(partition_converters.size()); + for (size_t field_idx = 0; field_idx < partition_converters.size(); field_idx++) { + const auto& to_str = partition_converters[field_idx]; + if (partition.IsNullAt(field_idx)) { + partition_vec.push_back("null"); + } else { + PAIMON_ASSIGN_OR_RAISE(std::string partition_field_str, to_str(partition, field_idx)); + partition_vec.push_back(partition_field_str); + } + } + return fmt::format("{}", fmt::join(partition_vec, delimiter)).substr(0, max_length); +} } // namespace paimon diff --git a/src/paimon/common/utils/binary_row_partition_computer.h b/src/paimon/common/utils/binary_row_partition_computer.h index 3778ad7d..e42b7e7c 100644 --- a/src/paimon/common/utils/binary_row_partition_computer.h +++ b/src/paimon/common/utils/binary_row_partition_computer.h @@ -58,6 +58,10 @@ class BinaryRowPartitionComputer { return partition_keys_; } + static Result PartToSimpleString( + const std::shared_ptr& partition_type, const BinaryRow& partition, + const std::string& delimiter, int32_t max_length); + private: BinaryRowPartitionComputer(const std::vector& partition_keys, const std::shared_ptr& schema, diff --git a/src/paimon/common/utils/binary_row_partition_computer_test.cpp b/src/paimon/common/utils/binary_row_partition_computer_test.cpp index c1e3add0..6a94e8bf 100644 --- a/src/paimon/common/utils/binary_row_partition_computer_test.cpp +++ b/src/paimon/common/utils/binary_row_partition_computer_test.cpp @@ -282,4 +282,45 @@ TEST(BinaryRowPartitionComputerTest, TestNullOrWhitespaceOnlyStr) { {"f0", "__DEFAULT_PARTITION__"}, {"f1", "__DEFAULT_PARTITION__"}, {"f2", "ab "}}; ASSERT_EQ(partition_key_values, expected); } + +TEST(BinaryRowPartitionComputerTest, TestPartToSimpleString) { + auto pool = GetDefaultPool(); + { + auto schema = arrow::schema({}); + auto partition = BinaryRow::EmptyRow(); + ASSERT_OK_AND_ASSIGN(std::string ret, BinaryRowPartitionComputer::PartToSimpleString( + schema, partition, "-", 30)); + ASSERT_EQ(ret, ""); + } + { + auto schema = arrow::schema({ + arrow::field("f0", arrow::utf8()), + arrow::field("f1", arrow::int32()), + }); + auto partition = BinaryRowGenerator::GenerateRow({"20240731", 10}, pool.get()); + ASSERT_OK_AND_ASSIGN(std::string ret, BinaryRowPartitionComputer::PartToSimpleString( + schema, partition, "-", 30)); + ASSERT_EQ(ret, "20240731-10"); + } + { + auto schema = arrow::schema({ + arrow::field("f0", arrow::utf8()), + arrow::field("f1", arrow::int32()), + }); + auto partition = BinaryRowGenerator::GenerateRow({NullType(), 10}, pool.get()); + ASSERT_OK_AND_ASSIGN(std::string ret, BinaryRowPartitionComputer::PartToSimpleString( + schema, partition, "-", 30)); + ASSERT_EQ(ret, "null-10"); + } + { + auto schema = arrow::schema({ + arrow::field("f0", arrow::utf8()), + arrow::field("f1", arrow::int32()), + }); + auto partition = BinaryRowGenerator::GenerateRow({"20240731", 10}, pool.get()); + ASSERT_OK_AND_ASSIGN(std::string ret, BinaryRowPartitionComputer::PartToSimpleString( + schema, partition, "-", 5)); + ASSERT_EQ(ret, "20240"); + } +} } // namespace paimon::test diff --git a/src/paimon/common/utils/object_utils.h b/src/paimon/common/utils/object_utils.h index fd039192..008b0d21 100644 --- a/src/paimon/common/utils/object_utils.h +++ b/src/paimon/common/utils/object_utils.h @@ -132,5 +132,18 @@ class ObjectUtils { } return index_map; } + + /// Precondition: U can be moved to T. + template + static std::vector MoveVector(std::vector&& input) { + static_assert(std::is_constructible_v, "U cannot be moved to T"); + std::vector result; + result.reserve(input.size()); + for (auto& item : input) { + result.push_back(std::move(item)); + } + input.clear(); + return result; + } }; } // namespace paimon diff --git a/src/paimon/common/utils/object_utils_test.cpp b/src/paimon/common/utils/object_utils_test.cpp index b2e144f9..9d052399 100644 --- a/src/paimon/common/utils/object_utils_test.cpp +++ b/src/paimon/common/utils/object_utils_test.cpp @@ -86,5 +86,29 @@ TEST(ObjectUtilsTest, TestCreateIdentifierToIndexMap) { ASSERT_EQ(expected_map, result_map); } } +TEST(ObjectUtilsTest, TestMoveVector) { + struct Base { + virtual ~Base() = default; + virtual int32_t Value() const = 0; + }; + + struct Derived : Base { + explicit Derived(int32_t v) : val(v) {} + int32_t Value() const override { + return val; + } + int32_t val; + }; + std::vector> derived_vec; + derived_vec.push_back(std::make_unique(10)); + derived_vec.push_back(std::make_unique(20)); + derived_vec.push_back(std::make_unique(30)); + auto base_vec = paimon::ObjectUtils::MoveVector>(std::move(derived_vec)); + + ASSERT_TRUE(derived_vec.empty()); + ASSERT_EQ(base_vec[0]->Value(), 10); + ASSERT_EQ(base_vec[1]->Value(), 20); + ASSERT_EQ(base_vec[2]->Value(), 30); +} } // namespace paimon::test diff --git a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h index 5f7b403a..724be9dc 100644 --- a/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h +++ b/src/paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h @@ -26,7 +26,6 @@ #include "paimon/common/reader/reader_utils.h" #include "paimon/core/deletionvectors/deletion_vector.h" #include "paimon/memory/memory_pool.h" -#include "paimon/reader/batch_reader.h" #include "paimon/reader/file_batch_reader.h" #include "paimon/result.h" #include "paimon/status.h" @@ -35,7 +34,7 @@ namespace paimon { class Metrics; -class ApplyDeletionVectorBatchReader : public BatchReader { +class ApplyDeletionVectorBatchReader : public FileBatchReader { public: ApplyDeletionVectorBatchReader(std::unique_ptr&& reader, PAIMON_UNIQUE_PTR&& deletion_vector) @@ -74,9 +73,32 @@ class ApplyDeletionVectorBatchReader : public BatchReader { return reader_->GetReaderMetrics(); } + Result> GetFileSchema() const override { + return reader_->GetFileSchema(); + } + + Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, + const std::optional& selection_bitmap) override { + return Status::Invalid("ApplyDeletionVectorBatchReader does not support SetReadSchema"); + } + + Result GetPreviousBatchFirstRowNumber() const override { + return reader_->GetPreviousBatchFirstRowNumber(); + } + + Result GetNumberOfRows() const override { + return reader_->GetNumberOfRows(); + } + + bool SupportPreciseBitmapSelection() const override { + return reader_->SupportPreciseBitmapSelection(); + } + private: Result Filter(int32_t batch_size) const { - return deletion_vector_->IsValid(reader_->GetPreviousBatchFirstRowNumber(), batch_size); + PAIMON_ASSIGN_OR_RAISE(uint64_t previous_batch_first_row_number, + reader_->GetPreviousBatchFirstRowNumber()); + return deletion_vector_->IsValid(previous_batch_first_row_number, batch_size); } private: diff --git a/src/paimon/core/disk/io_manager.cpp b/src/paimon/core/disk/io_manager.cpp new file mode 100644 index 00000000..cc1586f3 --- /dev/null +++ b/src/paimon/core/disk/io_manager.cpp @@ -0,0 +1,47 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "paimon/disk/io_manager.h" + +#include "paimon/common/utils/path_util.h" +#include "paimon/common/utils/uuid.h" + +namespace paimon { +class IOManagerImpl : public IOManager { + public: + explicit IOManagerImpl(const std::string& tmp_dir) : tmp_dir_(tmp_dir) {} + + const std::string& GetTempDir() const override { + return tmp_dir_; + } + + Result GenerateTempFilePath(const std::string& prefix) const override { + std::string uuid; + if (!UUID::Generate(&uuid)) { + return Status::Invalid("generate uuid for io manager tmp path failed."); + } + return PathUtil::JoinPath(tmp_dir_, prefix + "-" + uuid + std::string(kSuffix)); + } + + private: + static constexpr char kSuffix[] = ".channel"; + std::string tmp_dir_; +}; + +std::unique_ptr IOManager::Create(const std::string& tmp_dir) { + return std::make_unique(tmp_dir); +} + +} // namespace paimon diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp index d9efd487..2aef9b29 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.cpp +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.cpp @@ -86,7 +86,8 @@ CompleteRowTrackingFieldsBatchReader::NextBatchWithBitmap() { std::string row_id_field_name = SpecialFields::RowId().Name(); if (read_schema_->GetFieldIndex(row_id_field_name) != -1) { row_id_array = src_struct_array->GetFieldByName(row_id_field_name); - uint64_t previous_batch_first_row_number = reader_->GetPreviousBatchFirstRowNumber(); + PAIMON_ASSIGN_OR_RAISE(uint64_t previous_batch_first_row_number, + reader_->GetPreviousBatchFirstRowNumber()); auto row_id_convert_func = [previous_batch_first_row_number, this](int32_t idx_in_array) -> Result { if (first_row_id_ == std::nullopt) { diff --git a/src/paimon/core/io/complete_row_tracking_fields_reader.h b/src/paimon/core/io/complete_row_tracking_fields_reader.h index 812ae5d5..cc2f9f7b 100644 --- a/src/paimon/core/io/complete_row_tracking_fields_reader.h +++ b/src/paimon/core/io/complete_row_tracking_fields_reader.h @@ -60,7 +60,7 @@ class CompleteRowTrackingFieldsBatchReader : public FileBatchReader { reader_->Close(); } - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { return reader_->GetPreviousBatchFirstRowNumber(); } diff --git a/src/paimon/core/io/data_file_meta.cpp b/src/paimon/core/io/data_file_meta.cpp index 319b89fd..c2bc7611 100644 --- a/src/paimon/core/io/data_file_meta.cpp +++ b/src/paimon/core/io/data_file_meta.cpp @@ -182,6 +182,10 @@ bool DataFileMeta::operator==(const DataFileMeta& other) const { write_cols == other.write_cols; } +bool DataFileMeta::operator!=(const DataFileMeta& other) const { + return !(*this == other); +} + bool DataFileMeta::TEST_Equal(const DataFileMeta& other) const { if (this == &other) { return true; diff --git a/src/paimon/core/io/data_file_meta.h b/src/paimon/core/io/data_file_meta.h index ee89df3e..487b7445 100644 --- a/src/paimon/core/io/data_file_meta.h +++ b/src/paimon/core/io/data_file_meta.h @@ -88,6 +88,7 @@ struct DataFileMeta { std::optional ExternalPathDir() const; bool operator==(const DataFileMeta& other) const; + bool operator!=(const DataFileMeta& other) const; bool TEST_Equal(const DataFileMeta& other) const; std::string ToString() const; diff --git a/src/paimon/core/io/data_file_meta_test.cpp b/src/paimon/core/io/data_file_meta_test.cpp index 80756442..434a0e23 100644 --- a/src/paimon/core/io/data_file_meta_test.cpp +++ b/src/paimon/core/io/data_file_meta_test.cpp @@ -100,6 +100,8 @@ TEST(DataFileMetaTest, TestGetMaxSequenceNumber) { ASSERT_EQ(4, DataFileMeta::GetMaxSequenceNumber({file_meta1})); ASSERT_EQ(10, DataFileMeta::GetMaxSequenceNumber({file_meta1, file_meta2})); ASSERT_EQ(-1, DataFileMeta::GetMaxSequenceNumber({})); + ASSERT_EQ(file_meta1, file_meta1); + ASSERT_NE(file_meta1, file_meta2); } TEST(DataFileMetaTest, TestNonNullFirstRowId) { diff --git a/src/paimon/core/io/field_mapping_reader.cpp b/src/paimon/core/io/field_mapping_reader.cpp index 427e7741..286778b2 100644 --- a/src/paimon/core/io/field_mapping_reader.cpp +++ b/src/paimon/core/io/field_mapping_reader.cpp @@ -41,7 +41,8 @@ namespace paimon { class MemoryPool; -FieldMappingReader::FieldMappingReader(int32_t field_count, std::unique_ptr&& reader, +FieldMappingReader::FieldMappingReader(int32_t field_count, + std::unique_ptr&& reader, const BinaryRow& partition, std::unique_ptr&& mapping, const std::shared_ptr& pool) diff --git a/src/paimon/core/io/field_mapping_reader.h b/src/paimon/core/io/field_mapping_reader.h index d23bb60c..ffd18bd6 100644 --- a/src/paimon/core/io/field_mapping_reader.h +++ b/src/paimon/core/io/field_mapping_reader.h @@ -30,7 +30,7 @@ #include "paimon/common/utils/arrow/mem_utils.h" #include "paimon/core/partition/partition_info.h" #include "paimon/core/utils/field_mapping.h" -#include "paimon/reader/batch_reader.h" +#include "paimon/reader/file_batch_reader.h" #include "paimon/result.h" #include "paimon/status.h" @@ -44,9 +44,9 @@ class MemoryPool; class Metrics; struct FieldMapping; -class FieldMappingReader : public BatchReader { +class FieldMappingReader : public FileBatchReader { public: - FieldMappingReader(int32_t field_count, std::unique_ptr&& reader, + FieldMappingReader(int32_t field_count, std::unique_ptr&& reader, const BinaryRow& partition, std::unique_ptr&& mapping, const std::shared_ptr& pool); @@ -65,6 +65,27 @@ class FieldMappingReader : public BatchReader { reader_->Close(); } + Result> GetFileSchema() const override { + return Status::Invalid("FieldMappingReader does not support GetFileSchema"); + } + + Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, + const std::optional& selection_bitmap) override { + return Status::Invalid("FieldMappingReader does not support SetReadSchema"); + } + + Result GetPreviousBatchFirstRowNumber() const override { + return reader_->GetPreviousBatchFirstRowNumber(); + } + + Result GetNumberOfRows() const override { + return reader_->GetNumberOfRows(); + } + + bool SupportPreciseBitmapSelection() const override { + return reader_->SupportPreciseBitmapSelection(); + } + private: Result> GenerateSinglePartitionArray(int32_t idx, int32_t batch_size) const; @@ -86,7 +107,7 @@ class FieldMappingReader : public BatchReader { bool need_casting_ = false; int32_t field_count_; std::shared_ptr arrow_pool_; - std::unique_ptr reader_; + std::unique_ptr reader_; BinaryRow partition_ = BinaryRow::EmptyRow(); std::optional partition_info_; diff --git a/src/paimon/core/io/key_value_data_file_record_reader.cpp b/src/paimon/core/io/key_value_data_file_record_reader.cpp index 35a7aaef..457bcc55 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.cpp +++ b/src/paimon/core/io/key_value_data_file_record_reader.cpp @@ -38,7 +38,7 @@ namespace paimon { class MemoryPool; KeyValueDataFileRecordReader::KeyValueDataFileRecordReader( - std::unique_ptr&& reader, int32_t key_arity, + std::unique_ptr&& reader, int32_t key_arity, const std::shared_ptr& value_schema, int32_t level, const std::shared_ptr& pool) : key_arity_(key_arity), @@ -80,10 +80,17 @@ Result KeyValueDataFileRecordReader::Iterator::Next() { return KeyValue(row_kind, sequence_number, reader_->level_, std::move(key), std::move(value)); } +Result> KeyValueDataFileRecordReader::Iterator::NextWithFilePos() { + PAIMON_ASSIGN_OR_RAISE(KeyValue kv, Next()); + return std::make_pair(previous_batch_first_row_number_ + cursor_ - 1, std::move(kv)); +} + Result> KeyValueDataFileRecordReader::NextBatch() { Reset(); PAIMON_ASSIGN_OR_RAISE(BatchReader::ReadBatchWithBitmap batch_with_bitmap, reader_->NextBatchWithBitmap()); + PAIMON_ASSIGN_OR_RAISE(int64_t previous_batch_first_row_number, + reader_->GetPreviousBatchFirstRowNumber()); if (BatchReader::IsEofBatch(batch_with_bitmap)) { // reader eof, just return return std::unique_ptr(); @@ -135,7 +142,8 @@ Result> KeyValueDataFileRecordRe key_ctx_ = std::make_shared(key_fields, pool_); value_ctx_ = std::make_shared(value_fields, pool_); ArrowUtils::TraverseArray(data_batch); - return std::make_unique(this); + return std::make_unique( + this, previous_batch_first_row_number); } void KeyValueDataFileRecordReader::Reset() { diff --git a/src/paimon/core/io/key_value_data_file_record_reader.h b/src/paimon/core/io/key_value_data_file_record_reader.h index c08ea177..6669e1ad 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader.h +++ b/src/paimon/core/io/key_value_data_file_record_reader.h @@ -24,7 +24,7 @@ #include "arrow/type_fwd.h" #include "paimon/core/io/key_value_record_reader.h" #include "paimon/core/key_value.h" -#include "paimon/reader/batch_reader.h" +#include "paimon/reader/file_batch_reader.h" #include "paimon/result.h" #include "paimon/utils/roaring_bitmap32.h" @@ -47,17 +47,20 @@ struct ColumnarBatchContext; // VALUE_KIND columns) class KeyValueDataFileRecordReader : public KeyValueRecordReader { public: - KeyValueDataFileRecordReader(std::unique_ptr&& reader, int32_t key_arity, + KeyValueDataFileRecordReader(std::unique_ptr&& reader, int32_t key_arity, const std::shared_ptr& value_schema, int32_t level, const std::shared_ptr& pool); class Iterator : public KeyValueRecordReader::Iterator { public: - explicit Iterator(KeyValueDataFileRecordReader* reader) : reader_(reader) {} + Iterator(KeyValueDataFileRecordReader* reader, int64_t previous_batch_first_row_number) + : previous_batch_first_row_number_(previous_batch_first_row_number), reader_(reader) {} bool HasNext() const override; Result Next() override; + Result> NextWithFilePos(); private: + int64_t previous_batch_first_row_number_; mutable int64_t cursor_ = 0; KeyValueDataFileRecordReader* reader_ = nullptr; }; @@ -80,7 +83,7 @@ class KeyValueDataFileRecordReader : public KeyValueRecordReader { int32_t key_arity_; int32_t level_; std::shared_ptr pool_; - std::unique_ptr reader_; + std::unique_ptr reader_; std::shared_ptr value_schema_; std::vector value_names_; RoaringBitmap32 selection_bitmap_; diff --git a/src/paimon/core/io/key_value_data_file_record_reader_test.cpp b/src/paimon/core/io/key_value_data_file_record_reader_test.cpp index d56f8780..e4aa3a58 100644 --- a/src/paimon/core/io/key_value_data_file_record_reader_test.cpp +++ b/src/paimon/core/io/key_value_data_file_record_reader_test.cpp @@ -244,6 +244,63 @@ TEST_F(KeyValueDataFileRecordReaderTest, TestWithSelectedBitmap) { check_result({3}); } +TEST_F(KeyValueDataFileRecordReaderTest, TestWithSelectedBitmapWithFilePos) { + arrow::FieldVector fields = {arrow::field("_SEQUENCE_NUMBER", arrow::int64()), + arrow::field("_VALUE_KIND", arrow::int8()), + arrow::field("k0", arrow::int32()), + arrow::field("k1", arrow::int32()), + arrow::field("v0", arrow::int32()), + arrow::field("v1", arrow::int32()), + arrow::field("v2", arrow::int32())}; + + std::shared_ptr value_schema = + arrow::schema(arrow::FieldVector({fields[2], fields[3], fields[4], fields[5], fields[6]})); + std::shared_ptr src_type = arrow::struct_(fields); + auto src_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(src_type, R"([ + [0, 0, 1, 1, 10, 20, 30], + [1, 0, 1, 2, 11, 21, 31], + [2, 1, 2, 2, 12, 22, 32], + [3, 2, 2, 3, 13, 23, 33], + [5, 0, 3, 3, 14, 24, 34], + [8, 0, 3, 4, 15, 25, 35], + [4, 1, 5, 4, 16, 26, 36], + [6, 3, 6, 5, 17, 27, 37] + ])") + .ValueOrDie()); + + RoaringBitmap32 selected_bitmap = RoaringBitmap32::From({1, 2, 4, 6}); + auto file_batch_reader = std::make_unique( + src_array, src_type, /*bitmap=*/selected_bitmap, /*batch_size=*/4); + file_batch_reader->EnableRandomizeBatchSize(false); + auto record_reader = std::make_unique( + std::move(file_batch_reader), /*key_arity=*/2, value_schema, /*level=*/2, pool_); + + auto check_result = [](const std::vector& expected_pos_vector, + KeyValueRecordReader::Iterator* iter) { + auto typed_iter = dynamic_cast(iter); + ASSERT_TRUE(typed_iter); + size_t pos_iter = 0; + while (iter->HasNext()) { + ASSERT_OK_AND_ASSIGN(auto kv_and_pos, typed_iter->NextWithFilePos()); + const auto& [pos, kv] = kv_and_pos; + ASSERT_EQ(pos, expected_pos_vector[pos_iter++]); + } + ASSERT_EQ(pos_iter, expected_pos_vector.size()); + }; + + // first read row 1, 2 + ASSERT_OK_AND_ASSIGN(auto iter, record_reader->NextBatch()); + check_result({1, 2}, iter.get()); + + // second read row 4, 6 + ASSERT_OK_AND_ASSIGN(iter, record_reader->NextBatch()); + check_result({4, 6}, iter.get()); + + // eof + ASSERT_OK_AND_ASSIGN(iter, record_reader->NextBatch()); + ASSERT_FALSE(iter); +} TEST_F(KeyValueDataFileRecordReaderTest, TestEmptyReader) { arrow::FieldVector fields = {arrow::field("_SEQUENCE_NUMBER", arrow::int64()), arrow::field("_VALUE_KIND", arrow::int8()), diff --git a/src/paimon/core/key_value.h b/src/paimon/core/key_value.h index 0b32ea39..e110cde4 100644 --- a/src/paimon/core/key_value.h +++ b/src/paimon/core/key_value.h @@ -35,6 +35,7 @@ namespace paimon { struct KeyValue { static constexpr int32_t UNKNOWN_LEVEL = -1; static constexpr int32_t UNKNOWN_SEQUENCE = -1; + KeyValue() = default; KeyValue(const RowKind* _value_kind, int64_t _sequence_number, int32_t _level, std::shared_ptr&& _key, std::unique_ptr&& _value) diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.cpp b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.cpp index a7b236a4..3411d04f 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.cpp +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.cpp @@ -18,6 +18,7 @@ #include "arrow/c/bridge.h" #include "arrow/c/helpers.h" #include "paimon/common/table/special_fields.h" +#include "paimon/common/utils/scope_guard.h" #include "paimon/core/io/key_value_data_file_writer.h" #include "paimon/core/io/key_value_meta_projection_consumer.h" #include "paimon/core/io/key_value_record_reader.h" @@ -28,7 +29,6 @@ #include "paimon/format/file_format.h" #include "paimon/format/writer_builder.h" #include "paimon/read_context.h" - namespace paimon { MergeTreeCompactRewriter::MergeTreeCompactRewriter( const BinaryRow& partition, int64_t schema_id, @@ -51,17 +51,15 @@ MergeTreeCompactRewriter::MergeTreeCompactRewriter( Result> MergeTreeCompactRewriter::Create( int32_t bucket, const BinaryRow& partition, const std::shared_ptr& table_schema, const std::shared_ptr& path_factory, const CoreOptions& options, - const std::shared_ptr& pool, const std::shared_ptr& executor) { + const std::shared_ptr& pool) { PAIMON_ASSIGN_OR_RAISE(std::vector trimmed_primary_keys, table_schema->TrimmedPrimaryKeys()); auto data_schema = DataField::ConvertDataFieldsToArrowSchema(table_schema->Fields()); auto write_schema = SpecialFields::CompleteSequenceAndValueKindField(data_schema); + // TODO(xinyu.lxy): set executor ReadContextBuilder read_context_builder(path_factory->RootPath()); - read_context_builder.SetOptions(options.ToMap()) - .EnablePrefetch(true) - .WithMemoryPool(pool) - .WithExecutor(executor); + read_context_builder.SetOptions(options.ToMap()).EnablePrefetch(true).WithMemoryPool(pool); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr read_context, read_context_builder.Finish()); @@ -70,7 +68,7 @@ Result> MergeTreeCompactRewriter::Crea InternalReadContext::Create(read_context, table_schema, options.ToMap())); PAIMON_ASSIGN_OR_RAISE( std::unique_ptr merge_file_split_read, - MergeFileSplitRead::Create(path_factory, internal_context, pool, executor)); + MergeFileSplitRead::Create(path_factory, internal_context, pool, CreateDefaultExecutor())); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr data_file_path_factory, path_factory->CreateDataFilePathFactory(partition, bucket)); diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.h b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.h index ea4f8529..cd44c7c9 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.h +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter.h @@ -35,7 +35,7 @@ class MergeTreeCompactRewriter : public CompactRewriter { int32_t bucket, const BinaryRow& partition, const std::shared_ptr& table_schema, const std::shared_ptr& path_factory, const CoreOptions& options, - const std::shared_ptr& memory_pool, const std::shared_ptr& executor); + const std::shared_ptr& memory_pool); Result Rewrite(int32_t output_level, bool drop_delete, const std::vector>& sections) override; diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter_test.cpp b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter_test.cpp index d8c3f7a6..f4e439d5 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter_test.cpp +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_rewriter_test.cpp @@ -54,7 +54,7 @@ class MergeTreeCompactRewriterTest : public testing::Test { global_index_external_path, options.IndexFileInDataFileDir(), pool_)); return MergeTreeCompactRewriter::Create(bucket, partition, table_schema, path_factory, - options, pool_, CreateDefaultExecutor()); + options, pool_); } Result>> GenerateSortedRuns( diff --git a/src/paimon/core/mergetree/level_sorted_run.h b/src/paimon/core/mergetree/level_sorted_run.h index 485fae3d..ff6fdd20 100644 --- a/src/paimon/core/mergetree/level_sorted_run.h +++ b/src/paimon/core/mergetree/level_sorted_run.h @@ -28,6 +28,13 @@ struct LevelSortedRun { return fmt::format("LevelSortedRun{{ level={}, run={} }}", level, run.ToString()); } + bool operator==(const LevelSortedRun& other) const { + if (this == &other) { + return true; + } + return level == other.level && run == other.run; + } + int32_t level; SortedRun run; }; diff --git a/src/paimon/core/mergetree/levels.cpp b/src/paimon/core/mergetree/levels.cpp new file mode 100644 index 00000000..ba3f5e94 --- /dev/null +++ b/src/paimon/core/mergetree/levels.cpp @@ -0,0 +1,206 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "paimon/core/mergetree/levels.h" +namespace paimon { +bool Levels::Level0Comparator::operator()(const std::shared_ptr& a, + const std::shared_ptr& b) const { + if (a->max_sequence_number != b->max_sequence_number) { + // file with larger sequence number should be in front + return a->max_sequence_number > b->max_sequence_number; + } else { + // When two or more jobs are writing the same merge tree, it is + // possible that multiple files have the same maxSequenceNumber. In + // this case we have to compare their file names so that files with + // same maxSequenceNumber won't be "de-duplicated" by the tree set. + int64_t min_seq_a = a->min_sequence_number; + int64_t min_seq_b = b->min_sequence_number; + if (min_seq_a != min_seq_b) { + return min_seq_a < min_seq_b; + } + // If minSequenceNumber is also the same, use creation time + Timestamp time_a = a->creation_time; + Timestamp time_b = b->creation_time; + if (time_a != time_b) { + return time_a < time_b; + } + // Final fallback: filename (to ensure uniqueness in set) + return a->file_name < b->file_name; + } +} + +Result> Levels::Create( + const std::shared_ptr& key_comparator, + const std::vector>& input_files, int32_t num_levels) { + // in case the num of levels is not specified explicitly + int32_t restored_num_levels = -1; + for (const auto& file : input_files) { + if (file->level > restored_num_levels) { + restored_num_levels = file->level; + } + } + restored_num_levels = std::max(restored_num_levels + 1, num_levels); + if (restored_num_levels <= 1) { + return Status::Invalid("Number of levels must be at least 2."); + } + + std::set, Levels::Level0Comparator> level0; + std::vector levels; + levels.reserve(restored_num_levels - 1); + for (int32_t i = 1; i < restored_num_levels; ++i) { + levels.push_back(SortedRun::Empty()); + } + auto level_map = GroupByLevel(input_files); + for (auto& [level, files] : level_map) { + PAIMON_RETURN_NOT_OK( + UpdateLevel(level, /*before=*/{}, /*after=*/files, key_comparator, &levels, &level0)); + } + + size_t total_file_num = level0.size(); + for (const auto& run : levels) { + total_file_num += run.Files().size(); + } + if (total_file_num != input_files.size()) { + return Status::Invalid( + "Number of files stored in Levels does not equal to the size of inputFiles. This " + "is unexpected."); + } + return std::unique_ptr(new Levels(key_comparator, level0, levels)); +} + +int32_t Levels::NumberOfSortedRuns() const { + int32_t number_of_runs = level0_.size(); + for (const auto& run : levels_) { + if (!run.IsEmpty()) { + number_of_runs++; + } + } + return number_of_runs; +} + +Status Levels::AddLevel0File(const std::shared_ptr& file) { + if (file->level != 0) { + return Status::Invalid("must add level0 file in AddLevel0File"); + } + level0_.insert(file); + return Status::OK(); +} + +int32_t Levels::NonEmptyHighestLevel() const { + for (int32_t i = levels_.size() - 1; i >= 0; i--) { + if (!levels_[i].IsEmpty()) { + return i + 1; + } + } + return level0_.empty() ? -1 : 0; +} + +int64_t Levels::TotalFileSize() const { + int64_t total_size = 0; + for (const auto& file : level0_) { + total_size += file->file_size; + } + for (const auto& run : levels_) { + total_size += run.TotalSize(); + } + return total_size; +} + +std::vector> Levels::AllFiles() const { + std::vector> all_files; + auto runs = LevelSortedRuns(); + for (const auto& run : runs) { + all_files.insert(all_files.end(), run.run.Files().begin(), run.run.Files().end()); + } + return all_files; +} + +std::vector Levels::LevelSortedRuns() const { + std::vector runs; + for (const auto& file : level0_) { + runs.emplace_back(/*level=*/0, SortedRun::FromSingle(file)); + } + for (int32_t i = 0; i < static_cast(levels_.size()); i++) { + const auto& run = levels_[i]; + if (!run.IsEmpty()) { + runs.emplace_back(/*level=*/i + 1, run); + } + } + return runs; +} + +Status Levels::Update(const std::vector>& before, + const std::vector>& after) { + auto grouped_before = GroupByLevel(before); + auto grouped_after = GroupByLevel(after); + int32_t number_of_levels = NumberOfLevels(); + for (int32_t i = 0; i < number_of_levels; i++) { + PAIMON_RETURN_NOT_OK(UpdateLevel(i, grouped_before[i], grouped_after[i], key_comparator_, + &levels_, &level0_)); + } + return Status::OK(); + // TODO(lisizhuo.lsz): dropFileCallbacks +} + +Status Levels::UpdateLevel(int32_t level, const std::vector>& before, + const std::vector>& after, + const std::shared_ptr& key_comparator, + std::vector* levels, + std::set, Level0Comparator>* level0) { + if (before.empty() && after.empty()) { + return Status::OK(); + } + if (level == 0) { + for (const auto& file : before) { + level0->erase(file); + } + for (const auto& file : after) { + level0->insert(file); + } + } else { + PAIMON_ASSIGN_OR_RAISE(SortedRun run, RunOfLevel(level, *levels)); + std::vector> files = run.Files(); + for (const auto& before_file : before) { + auto iter = std::find_if(files.begin(), files.end(), [&before_file](const auto& cur) { + return before_file->file_name == cur->file_name; + }); + if (iter != files.end()) { + files.erase(iter); + } + } + files.insert(files.end(), after.begin(), after.end()); + PAIMON_ASSIGN_OR_RAISE((*levels)[level - 1], + SortedRun::FromUnsorted(files, key_comparator)); + } + return Status::OK(); +} + +Result Levels::RunOfLevel(int32_t level, const std::vector& levels) { + if (level <= 0) { + return Status::Invalid("Level0 does not have one single sorted run."); + } + return levels[level - 1]; +} + +std::map>> Levels::GroupByLevel( + const std::vector>& files) { + std::map>> level_map; + for (const auto& file : files) { + level_map[file->level].push_back(file); + } + return level_map; +} + +} // namespace paimon diff --git a/src/paimon/core/mergetree/levels.h b/src/paimon/core/mergetree/levels.h new file mode 100644 index 00000000..b8174cd9 --- /dev/null +++ b/src/paimon/core/mergetree/levels.h @@ -0,0 +1,91 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "paimon/core/io/data_file_meta.h" +#include "paimon/core/mergetree/level_sorted_run.h" +#include "paimon/core/mergetree/sorted_run.h" +#include "paimon/core/utils/fields_comparator.h" +#include "paimon/result.h" +namespace paimon { +/// A class which stores all level files of merge tree. +class Levels { + public: + struct Level0Comparator { + bool operator()(const std::shared_ptr& a, + const std::shared_ptr& b) const; + }; + + static Result> Create( + const std::shared_ptr& key_comparator, + const std::vector>& input_files, int32_t num_levels); + + static Result RunOfLevel(int32_t level, const std::vector& levels); + + const std::set, Level0Comparator>& GetLevel0() const { + return level0_; + } + + const std::vector& GetLevels() const { + return levels_; + } + int32_t NumberOfLevels() const { + return levels_.size() + 1; + } + + int32_t MaxLevel() const { + return levels_.size(); + } + + int32_t NumberOfSortedRuns() const; + + Status AddLevel0File(const std::shared_ptr& file); + + /// @return the highest non-empty level or -1 if all levels empty. + int32_t NonEmptyHighestLevel() const; + + int64_t TotalFileSize() const; + + std::vector> AllFiles() const; + + std::vector LevelSortedRuns() const; + + Status Update(const std::vector>& before, + const std::vector>& after); + + private: + Levels(const std::shared_ptr& key_comparator, + const std::set, Level0Comparator>& level0, + const std::vector& levels) + : key_comparator_(key_comparator), level0_(level0), levels_(levels) {} + + static Status UpdateLevel(int32_t level, + const std::vector>& before, + const std::vector>& after, + const std::shared_ptr& key_comparator, + std::vector* levels, + std::set, Level0Comparator>* level0); + + static std::map>> GroupByLevel( + const std::vector>& files); + + private: + std::shared_ptr key_comparator_; + std::set, Level0Comparator> level0_; + std::vector levels_; + // TODO(lisizhuo.lsz): DropFileCallback? +}; +} // namespace paimon diff --git a/src/paimon/core/mergetree/levels_test.cpp b/src/paimon/core/mergetree/levels_test.cpp new file mode 100644 index 00000000..1449b556 --- /dev/null +++ b/src/paimon/core/mergetree/levels_test.cpp @@ -0,0 +1,167 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/mergetree/levels.h" + +#include "arrow/api.h" +#include "gtest/gtest.h" +#include "paimon/common/utils/uuid.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/result.h" +#include "paimon/status.h" +#include "paimon/testing/utils/binary_row_generator.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class LevelsTest : public testing::Test { + public: + std::shared_ptr CreateDataFileMeta(int32_t level, int64_t min_sequence_number, + int64_t max_sequence_number, + int64_t ts_second) const { + std::string uuid; + EXPECT_TRUE(UUID::Generate(&uuid)); + return std::make_shared( + /*file_name=*/uuid, /*file_size=*/1, + /*row_count=*/max_sequence_number - min_sequence_number + 1, + BinaryRowGenerator::GenerateRow({min_sequence_number}, pool_.get()), + BinaryRowGenerator::GenerateRow({max_sequence_number}, pool_.get()), + SimpleStats::EmptyStats(), SimpleStats::EmptyStats(), min_sequence_number, + max_sequence_number, + /*schema_id=*/0, level, std::vector>(), + Timestamp(ts_second, 0l), std::nullopt, nullptr, FileSource::Append(), std::nullopt, + std::nullopt, std::nullopt, std::nullopt); + } + + std::shared_ptr CreateComparator() const { + std::vector data_fields; + data_fields.emplace_back(/*id=*/0, arrow::field("f0", arrow::int32())); + EXPECT_OK_AND_ASSIGN(auto cmp, + FieldsComparator::Create(data_fields, /*is_ascending_order=*/true, + /*use_view=*/false)); + return cmp; + } + + private: + std::shared_ptr pool_ = GetDefaultPool(); +}; + +TEST_F(LevelsTest, TestNonEmptyHighestLevelNo) { + std::vector> input_files; + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->NonEmptyHighestLevel(), -1); +} + +TEST_F(LevelsTest, TestNonEmptyHighestLevel0) { + std::vector> input_files = {CreateDataFileMeta(0, 0, 1, 0), + CreateDataFileMeta(0, 2, 3, 0)}; + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->NonEmptyHighestLevel(), 0); +} + +TEST_F(LevelsTest, TestNonEmptyHighestLevel1) { + std::vector> input_files = {CreateDataFileMeta(0, 0, 1, 0), + CreateDataFileMeta(1, 2, 3, 0)}; + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->NonEmptyHighestLevel(), 1); + ASSERT_EQ(levels->NumberOfLevels(), 3); + ASSERT_EQ(levels->MaxLevel(), 2); +} + +TEST_F(LevelsTest, TestNonEmptyHighestLevel2) { + std::vector> input_files = { + CreateDataFileMeta(0, 0, 100, 0), CreateDataFileMeta(0, 100, 200, 0), + CreateDataFileMeta(0, 0, 200, 0), CreateDataFileMeta(0, 0, 200, 10), + CreateDataFileMeta(1, 0, 500, 0), CreateDataFileMeta(2, 0, 1000, 0)}; + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->NonEmptyHighestLevel(), 2); + ASSERT_EQ(levels->TotalFileSize(), 6); + + std::vector expected_sorted_run = { + LevelSortedRun(0, SortedRun::FromSingle(input_files[2])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[3])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[1])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[0])), + LevelSortedRun(1, SortedRun::FromSingle(input_files[4])), + LevelSortedRun(2, SortedRun::FromSingle(input_files[5])), + }; + + ASSERT_EQ(levels->LevelSortedRuns(), expected_sorted_run); + ASSERT_EQ(levels->NumberOfSortedRuns(), 6); +} + +TEST_F(LevelsTest, TestAddLevel0File) { + std::vector> input_files = { + CreateDataFileMeta(0, 100, 200, 0), CreateDataFileMeta(0, 0, 200, 0), + CreateDataFileMeta(0, 0, 200, 10), CreateDataFileMeta(1, 0, 500, 0), + CreateDataFileMeta(2, 0, 1000, 0)}; + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->TotalFileSize(), 5); + + auto new_level0 = CreateDataFileMeta(0, 0, 100, 0); + levels->AddLevel0File(new_level0); + ASSERT_EQ(levels->TotalFileSize(), 6); + std::vector expected_sorted_run = { + LevelSortedRun(0, SortedRun::FromSingle(input_files[1])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[2])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[0])), + LevelSortedRun(0, SortedRun::FromSingle(new_level0)), + LevelSortedRun(1, SortedRun::FromSingle(input_files[3])), + LevelSortedRun(2, SortedRun::FromSingle(input_files[4])), + }; + + ASSERT_EQ(levels->LevelSortedRuns(), expected_sorted_run); + ASSERT_EQ(levels->NumberOfSortedRuns(), 6); +} + +TEST_F(LevelsTest, TestUpdate) { + std::vector> input_files = { + CreateDataFileMeta(0, 100, 200, 0), CreateDataFileMeta(0, 0, 200, 0), + CreateDataFileMeta(0, 0, 200, 10), CreateDataFileMeta(1, 0, 500, 0), + CreateDataFileMeta(1, 600, 1000, 0)}; + + ASSERT_OK_AND_ASSIGN(auto levels, + Levels::Create(CreateComparator(), input_files, /*num_levels=*/3)); + ASSERT_EQ(levels->TotalFileSize(), 5); + ASSERT_EQ(levels->NumberOfSortedRuns(), 4); + + std::vector> before = { + input_files[1], + input_files[3], + }; + + std::vector> after = {CreateDataFileMeta(0, 0, 100, 0), + CreateDataFileMeta(1, 0, 550, 0)}; + + ASSERT_OK(levels->Update(before, after)); + + std::vector expected_sorted_run = { + LevelSortedRun(0, SortedRun::FromSingle(input_files[2])), + LevelSortedRun(0, SortedRun::FromSingle(input_files[0])), + LevelSortedRun(0, SortedRun::FromSingle(after[0])), + LevelSortedRun(1, SortedRun::FromSorted({after[1], input_files[4]})), + }; + + ASSERT_EQ(levels->LevelSortedRuns(), expected_sorted_run); + ASSERT_EQ(levels->NumberOfSortedRuns(), 4); +} + +} // namespace paimon::test diff --git a/src/paimon/core/mergetree/lookup/default_lookup_serializer_factory.h b/src/paimon/core/mergetree/lookup/default_lookup_serializer_factory.h index efc971f8..2ad37d27 100644 --- a/src/paimon/core/mergetree/lookup/default_lookup_serializer_factory.h +++ b/src/paimon/core/mergetree/lookup/default_lookup_serializer_factory.h @@ -15,8 +15,8 @@ */ #pragma once - #include "paimon/common/data/serializer/row_compacted_serializer.h" +#include "paimon/common/utils/arrow/arrow_utils.h" #include "paimon/core/mergetree/lookup/lookup_serializer_factory.h" namespace paimon { /// A `LookupSerializerFactory` using `RowCompactedSerializer`. @@ -48,10 +48,12 @@ class DefaultLookupSerializerFactory : public LookupSerializerFactory { "file_ser_version {} mismatch DefaultLookupSerializerFactory version {}", file_ser_version, Version())); } - if (!file_schema->Equals(current_schema)) { - // TODO(xinyu.lxy): support EqualsIgnoreNullable + if (!ArrowUtils::EqualsIgnoreNullable(arrow::struct_(file_schema->fields()), + arrow::struct_(current_schema->fields()))) { return Status::Invalid( - "current_schema and file_schema must be equal in DefaultLookupSerializerFactory"); + fmt::format("current_schema {} must be equal with file_schema {} in " + "DefaultLookupSerializerFactory", + current_schema->ToString(), file_schema->ToString())); } PAIMON_ASSIGN_OR_RAISE(std::shared_ptr serializer, RowCompactedSerializer::Create(current_schema, pool)); diff --git a/src/paimon/core/mergetree/lookup_file.h b/src/paimon/core/mergetree/lookup_file.h new file mode 100644 index 00000000..469109bc --- /dev/null +++ b/src/paimon/core/mergetree/lookup_file.h @@ -0,0 +1,105 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "paimon/common/data/binary_row.h" +#include "paimon/common/lookup/lookup_store_factory.h" +#include "paimon/common/utils/binary_row_partition_computer.h" +#include "paimon/fs/file_system.h" + +namespace paimon { +/// Lookup file for cache remote file to local. +class LookupFile { + public: + LookupFile(const std::shared_ptr& fs, const std::string& local_file, int32_t level, + int64_t schema_id, const std::string& ser_version, + std::unique_ptr&& reader) + : fs_(fs), + local_file_(local_file), + level_(level), + schema_id_(schema_id), + ser_version_(ser_version), + reader_(std::move(reader)) {} + + ~LookupFile() { + [[maybe_unused]] auto status = Close(); + } + const std::string& LocalFile() const { + return local_file_; + } + + int64_t SchemaId() const { + return schema_id_; + } + + const std::string& SerVersion() const { + return ser_version_; + } + + int32_t Level() const { + return level_; + } + + bool IsClosed() const { + return closed_; + } + + Result> GetResult(const std::shared_ptr& key) { + if (closed_) { + return Status::Invalid("GetResult failed in LookupFile, reader is closed"); + } + request_count_++; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr res, reader_->Lookup(key)); + if (res) { + hit_count_++; + } + return res; + } + + Status Close() { + PAIMON_RETURN_NOT_OK(reader_->Close()); + closed_ = true; + // TODO(lisizhuo.lsz): callback + return fs_->Delete(local_file_, /*recursive=*/false); + } + + static Result LocalFilePrefix(const std::shared_ptr& partition_type, + const BinaryRow& partition, int32_t bucket, + const std::string& remote_file_name) { + if (partition.GetFieldCount() == 0) { + return fmt::format("{}-{}", std::to_string(bucket), remote_file_name); + } else { + PAIMON_ASSIGN_OR_RAISE( + std::string part_str, + BinaryRowPartitionComputer::PartToSimpleString( + partition_type, partition, /*delimiter=*/"-", /*max_length=*/20)); + return fmt::format("{}-{}-{}", part_str, bucket, remote_file_name); + } + } + + private: + std::shared_ptr fs_; + std::string local_file_; + int32_t level_; + int64_t schema_id_; + std::string ser_version_; + std::unique_ptr reader_; + int64_t request_count_ = 0; + int64_t hit_count_ = 0; + bool closed_ = false; + // TODO(lisizhuo.lsz): callback? +}; +} // namespace paimon diff --git a/src/paimon/core/mergetree/lookup_file_test.cpp b/src/paimon/core/mergetree/lookup_file_test.cpp new file mode 100644 index 00000000..7aeaf1ff --- /dev/null +++ b/src/paimon/core/mergetree/lookup_file_test.cpp @@ -0,0 +1,106 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/mergetree/lookup_file.h" + +#include "gtest/gtest.h" +#include "paimon/testing/utils/binary_row_generator.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { +TEST(LookupFileTest, TestSimple) { + class FakeLookupStoreReader : public LookupStoreReader { + FakeLookupStoreReader(const std::map& kvs, + std::shared_ptr& pool) + : pool_(pool), kvs_(kvs) {} + Result> Lookup(const std::shared_ptr& key) const override { + auto iter = kvs_.find(std::string(key->data(), key->size())); + if (iter == kvs_.end()) { + return std::shared_ptr(); + } + return std::make_shared(iter->second, pool_.get()); + } + Status Close() override { + return Status::OK(); + } + + private: + std::shared_ptr pool_; + std::map kvs_; + }; + auto pool = GetDefaultPool(); + auto tmp_dir = UniqueTestDirectory::Create("local"); + ASSERT_TRUE(tmp_dir); + auto fs = tmp_dir->GetFileSystem(); + std::string local_file = tmp_dir->Str() + "/test.file"; + ASSERT_OK(fs->WriteFile(local_file, "testdata", /*overwrite=*/false)); + ASSERT_TRUE(fs->Exists(local_file).value()); + + std::map kvs = {{"aa", "aa1"}, {"bb", "bb1"}}; + auto lookup_file = std::make_shared( + fs, local_file, /*level=*/3, /*schema_id=*/1, + /*ser_version=*/"v1", std::make_unique(kvs, pool)); + ASSERT_EQ(lookup_file->LocalFile(), local_file); + ASSERT_EQ(lookup_file->Level(), 3); + ASSERT_EQ(lookup_file->SchemaId(), 1); + ASSERT_EQ(lookup_file->SerVersion(), "v1"); + { + ASSERT_OK_AND_ASSIGN(auto value, + lookup_file->GetResult(std::make_shared("aa", pool.get()))); + ASSERT_TRUE(value); + ASSERT_EQ(std::string(value->data(), value->size()), "aa1"); + } + { + ASSERT_OK_AND_ASSIGN(auto value, + lookup_file->GetResult(std::make_shared("bb", pool.get()))); + ASSERT_TRUE(value); + ASSERT_EQ(std::string(value->data(), value->size()), "bb1"); + } + { + ASSERT_OK_AND_ASSIGN( + auto value, lookup_file->GetResult(std::make_shared("non-exist", pool.get()))); + ASSERT_FALSE(value); + } + ASSERT_FALSE(lookup_file->IsClosed()); + ASSERT_EQ(lookup_file->request_count_, 3); + ASSERT_EQ(lookup_file->hit_count_, 2); + + ASSERT_OK(lookup_file->Close()); + ASSERT_TRUE(lookup_file->IsClosed()); + ASSERT_FALSE(fs->Exists(local_file).value()); +} + +TEST(LookupFileTest, TestLocalFilePrefix) { + auto pool = GetDefaultPool(); + { + auto schema = arrow::schema({ + arrow::field("f0", arrow::utf8()), + arrow::field("f1", arrow::int32()), + }); + auto partition = BinaryRowGenerator::GenerateRow({"20240731", 10}, pool.get()); + ASSERT_OK_AND_ASSIGN(std::string ret, LookupFile::LocalFilePrefix( + schema, partition, /*bucket=*/3, "test.orc")); + ASSERT_EQ(ret, "20240731-10-3-test.orc"); + } + { + auto schema = arrow::schema({}); + auto partition = BinaryRow::EmptyRow(); + ASSERT_OK_AND_ASSIGN(std::string ret, LookupFile::LocalFilePrefix( + schema, partition, /*bucket=*/3, "test.orc")); + ASSERT_EQ(ret, "3-test.orc"); + } +} +} // namespace paimon::test diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp new file mode 100644 index 00000000..c3f73a21 --- /dev/null +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -0,0 +1,269 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/mergetree/lookup_levels.h" + +#include "paimon/common/table/special_fields.h" +#include "paimon/common/utils/scope_guard.h" +#include "paimon/core/io/key_value_data_file_record_reader.h" +#include "paimon/core/mergetree/lookup/file_position.h" +#include "paimon/core/mergetree/lookup/positioned_key_value.h" +#include "paimon/core/mergetree/lookup_utils.h" +#include "paimon/core/operation/internal_read_context.h" +#include "paimon/result.h" +namespace paimon { +template +Result>> LookupLevels::Create( + const std::shared_ptr& fs, const BinaryRow& partition, int32_t bucket, + const CoreOptions& options, const std::shared_ptr& schema_manager, + const std::shared_ptr& io_manager, + const std::shared_ptr& path_factory, + const std::shared_ptr& table_schema, std::unique_ptr&& levels, + const std::unordered_map& deletion_file_map, + const std::shared_ptr::Factory>& processor_factory, + const std::shared_ptr& serializer_factory, + const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& pool) { + PAIMON_ASSIGN_OR_RAISE(std::vector trimmed_pk, table_schema->TrimmedPrimaryKeys()); + PAIMON_ASSIGN_OR_RAISE(std::vector pk_fields, table_schema->GetFields(trimmed_pk)); + + auto pk_schema = DataField::ConvertDataFieldsToArrowSchema(pk_fields); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr key_serializer, + RowCompactedSerializer::Create(pk_schema, pool)); + PAIMON_ASSIGN_OR_RAISE( + std::unique_ptr key_comparator, + FieldsComparator::Create(pk_fields, /*is_ascending_order=*/true, /*use_view=*/false)); + + PAIMON_ASSIGN_OR_RAISE(std::vector partition_fields, + table_schema->GetFields(table_schema->PartitionKeys())); + auto partition_schema = DataField::ConvertDataFieldsToArrowSchema(partition_fields); + + // TODO(xinyu.lxy): set executor + ReadContextBuilder read_context_builder(path_factory->RootPath()); + read_context_builder.SetOptions(options.ToMap()).EnablePrefetch(true).WithMemoryPool(pool); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr read_context, + read_context_builder.Finish()); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr internal_read_context, + InternalReadContext::Create(read_context, table_schema, options.ToMap())); + auto split_read = std::make_unique(path_factory, internal_read_context, pool, + CreateDefaultExecutor()); + + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr data_file_path_factory, + path_factory->CreateDataFilePathFactory(partition, bucket)); + + return std::unique_ptr(new LookupLevels( + fs, partition, bucket, options, schema_manager, io_manager, std::move(key_comparator), + data_file_path_factory, std::move(split_read), table_schema, partition_schema, + std::move(levels), deletion_file_map, processor_factory, std::move(key_serializer), + serializer_factory, lookup_store_factory, pool)); +} +template +Result> LookupLevels::Lookup(const std::shared_ptr& key, + int32_t start_level) { + auto lookup = [this](const std::shared_ptr& key, + const SortedRun& level) -> Result> { + return this->Lookup(key, level); + }; + auto lookup_level0 = + [this](const std::shared_ptr& key, + const std::set, Levels::Level0Comparator>& level0) + -> Result> { return this->lookupLevel0(key, level0); }; + return LookupUtils::Lookup(*levels_, key, start_level, std::function(lookup), + std::function(lookup_level0)); +} + +template +Result> LookupLevels::lookupLevel0( + const std::shared_ptr& key, + const std::set, Levels::Level0Comparator>& level0) { + auto lookup = [this](const std::shared_ptr& key, + const std::shared_ptr& file) -> Result> { + return this->Lookup(key, file); + }; + return LookupUtils::LookupLevel0(key_comparator_, key, level0, std::function(lookup)); +} + +template +Result> LookupLevels::Lookup(const std::shared_ptr& key, + const SortedRun& level) { + auto lookup = [this](const std::shared_ptr& key, + const std::shared_ptr& file) -> Result> { + return this->Lookup(key, file); + }; + return LookupUtils::Lookup(key_comparator_, key, level, std::function(lookup)); +} + +template +LookupLevels::LookupLevels( + const std::shared_ptr& fs, const BinaryRow& partition, int32_t bucket, + const CoreOptions& options, const std::shared_ptr& schema_manager, + const std::shared_ptr& io_manager, + std::unique_ptr&& key_comparator, + const std::shared_ptr& data_file_path_factory, + std::unique_ptr&& split_read, + const std::shared_ptr& table_schema, + const std::shared_ptr& partition_schema, std::unique_ptr&& levels, + const std::unordered_map& deletion_file_map, + const std::shared_ptr::Factory>& processor_factory, + std::unique_ptr&& key_serializer, + const std::shared_ptr& serializer_factory, + const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& pool) + : pool_(pool), + fs_(fs), + partition_(partition), + bucket_(bucket), + options_(options), + schema_manager_(schema_manager), + io_manager_(io_manager), + key_comparator_(std::move(key_comparator)), + data_file_path_factory_(data_file_path_factory), + split_read_(std::move(split_read)), + table_schema_(table_schema), + levels_(std::move(levels)), + deletion_file_map_(deletion_file_map), + processor_factory_(processor_factory), + key_serializer_(std::move(key_serializer)), + serializer_factory_(serializer_factory), + lookup_store_factory_(lookup_store_factory) { + value_schema_ = DataField::ConvertDataFieldsToArrowSchema(table_schema->Fields()); + read_schema_ = SpecialFields::CompleteSequenceAndValueKindField(value_schema_); +} +template +Result> LookupLevels::Lookup(const std::shared_ptr& key, + const std::shared_ptr& file) { + auto iter = lookup_file_cache_.find(file->file_name); + std::shared_ptr lookup_file; + if (iter == lookup_file_cache_.end()) { + PAIMON_ASSIGN_OR_RAISE(lookup_file, CreateLookupFile(file)); + lookup_file_cache_[file->file_name] = lookup_file; + } else { + lookup_file = iter->second; + } + + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr key_bytes, + key_serializer_->SerializeToBytes(*key)); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr value_bytes, lookup_file->GetResult(key_bytes)); + if (!value_bytes) { + return std::optional(); + } + + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr> processor, + GetOrCreateProcessor(lookup_file->SchemaId(), lookup_file->SerVersion())); + PAIMON_ASSIGN_OR_RAISE( + T result, processor->ReadFromDisk(key, lookup_file->Level(), value_bytes, file->file_name)); + return std::optional(std::move(result)); +} +template +Result> LookupLevels::CreateLookupFile( + const std::shared_ptr& file) { + PAIMON_ASSIGN_OR_RAISE( + std::string prefix, + LookupFile::LocalFilePrefix(partition_schema_, partition_, bucket_, file->file_name)); + PAIMON_ASSIGN_OR_RAISE(std::string kv_file_path, io_manager_->GenerateTempFilePath(prefix)); + // TODO(lisizhuo.lsz): support DownloadRemoteSst + PAIMON_RETURN_NOT_OK(CreateSstFileFromDataFile(file, kv_file_path)); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr reader, + lookup_store_factory_->CreateReader(fs_, kv_file_path, pool_)); + return std::make_shared(fs_, kv_file_path, file->level, table_schema_->Id(), + serializer_factory_->Version(), std::move(reader)); +} +template +Status LookupLevels::CreateSstFileFromDataFile(const std::shared_ptr& file, + const std::string& kv_file_path) { + // Prepare reader to iterate KeyValue + PAIMON_ASSIGN_OR_RAISE( + std::vector> raw_readers, + split_read_->CreateRawFileReaders(partition_, {file}, read_schema_, + /*predicate=*/nullptr, deletion_file_map_, + /*row_ranges=*/std::nullopt, data_file_path_factory_)); + if (raw_readers.size() != 1) { + return Status::Invalid("Unexpected, CreateSstFileFromDataFile only create single reader"); + } + auto& raw_reader = raw_readers[0]; + PAIMON_ASSIGN_OR_RAISE(std::vector trimmed_pk, + table_schema_->TrimmedPrimaryKeys()); + auto reader = std::make_unique( + std::move(raw_reader), trimmed_pk.size(), value_schema_, file->level, pool_); + + // Create processor to persist value + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr> processor, + GetOrCreateProcessor(table_schema_->Id(), serializer_factory_->Version())); + + // Prepare writer to write lookup file + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr bloom_filter, + LookupStoreFactory::BfGenerator(file->row_count, options_, pool_.get())); + PAIMON_ASSIGN_OR_RAISE( + std::unique_ptr kv_writer, + lookup_store_factory_->CreateWriter(fs_, kv_file_path, bloom_filter, pool_)); + + ScopeGuard write_guard([&]() -> void { + [[maybe_unused]] auto status = fs_->Delete(kv_file_path, /*recursive=*/false); + }); + + // Read each KeyValue and write to lookup file with or without position. + while (true) { + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr iter, + reader->NextBatch()); + if (iter == nullptr) { + break; + } + auto typed_iter = dynamic_cast(iter.get()); + assert(typed_iter); + while (typed_iter->HasNext()) { + std::pair kv_and_pos; + PAIMON_ASSIGN_OR_RAISE(kv_and_pos, typed_iter->NextWithFilePos()); + const auto& [file_pos, kv] = kv_and_pos; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr key_bytes, + key_serializer_->SerializeToBytes(*(kv.key))); + std::shared_ptr value_bytes; + if (processor->WithPosition()) { + PAIMON_ASSIGN_OR_RAISE(value_bytes, processor->PersistToDisk(kv, file_pos)); + } else { + PAIMON_ASSIGN_OR_RAISE(value_bytes, processor->PersistToDisk(kv)); + } + PAIMON_RETURN_NOT_OK(kv_writer->Put(std::move(key_bytes), std::move(value_bytes))); + } + } + kv_writer->Close(); + write_guard.Release(); + return Status::OK(); +} +template +Result>> LookupLevels::GetOrCreateProcessor( + int64_t schema_id, const std::string& ser_version) { + auto key = std::make_pair(schema_id, ser_version); + auto iter = schema_id_and_ser_version_to_processors_.find(key); + if (iter != schema_id_and_ser_version_to_processors_.end()) { + return iter->second; + } + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr table_schema, + schema_manager_->ReadSchema(schema_id)); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr> processor, + processor_factory_->Create(ser_version, serializer_factory_, value_schema_, pool_)); + schema_id_and_ser_version_to_processors_[key] = processor; + return processor; +} + +template class LookupLevels; +template class LookupLevels; +template class LookupLevels; +template class LookupLevels; +} // namespace paimon diff --git a/src/paimon/core/mergetree/lookup_levels.h b/src/paimon/core/mergetree/lookup_levels.h new file mode 100644 index 00000000..cd5281ca --- /dev/null +++ b/src/paimon/core/mergetree/lookup_levels.h @@ -0,0 +1,115 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include "paimon/common/data/serializer/row_compacted_serializer.h" +#include "paimon/core/io/key_value_data_file_record_reader.h" +#include "paimon/core/mergetree/lookup/lookup_serializer_factory.h" +#include "paimon/core/mergetree/lookup/persist_processor.h" +#include "paimon/core/mergetree/lookup_file.h" +#include "paimon/core/mergetree/lookup_utils.h" +#include "paimon/core/operation/raw_file_split_read.h" +#include "paimon/core/schema/schema_manager.h" +#include "paimon/disk/io_manager.h" +#include "paimon/result.h" + +namespace paimon { +/// Provide lookup by key. +template +class LookupLevels { + public: + static Result>> Create( + const std::shared_ptr& fs, const BinaryRow& partition, int32_t bucket, + const CoreOptions& options, const std::shared_ptr& schema_manager, + const std::shared_ptr& io_manager, + const std::shared_ptr& path_factory, + const std::shared_ptr& table_schema, std::unique_ptr&& levels, + const std::unordered_map& deletion_file_map, + const std::shared_ptr::Factory>& processor_factory, + const std::shared_ptr& serializer_factory, + const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& pool); + + const std::unique_ptr& GetLevels() const { + return levels_; + } + + Result> Lookup(const std::shared_ptr& key, int32_t start_level); + + Result> lookupLevel0( + const std::shared_ptr& key, + const std::set, Levels::Level0Comparator>& level0); + + Result> Lookup(const std::shared_ptr& key, + const SortedRun& level); + + private: + LookupLevels(const std::shared_ptr& fs, const BinaryRow& partition, int32_t bucket, + const CoreOptions& options, const std::shared_ptr& schema_manager, + const std::shared_ptr& io_manager, + std::unique_ptr&& key_comparator, + const std::shared_ptr& data_file_path_factory, + std::unique_ptr&& split_read, + const std::shared_ptr& table_schema, + const std::shared_ptr& partition_schema, + std::unique_ptr&& levels, + const std::unordered_map& deletion_file_map, + const std::shared_ptr::Factory>& processor_factory, + std::unique_ptr&& key_serializer, + const std::shared_ptr& serializer_factory, + const std::shared_ptr& lookup_store_factory, + const std::shared_ptr& pool); + + Result> Lookup(const std::shared_ptr& key, + const std::shared_ptr& file); + + Result> CreateLookupFile(const std::shared_ptr& file); + + Status CreateSstFileFromDataFile(const std::shared_ptr& file, + const std::string& kv_file_path); + + Result>> GetOrCreateProcessor( + int64_t schema_id, const std::string& ser_version); + + private: + std::shared_ptr pool_; + std::shared_ptr fs_; + BinaryRow partition_; + int32_t bucket_; + CoreOptions options_; + std::shared_ptr schema_manager_; + std::shared_ptr io_manager_; + std::shared_ptr key_comparator_; + std::shared_ptr data_file_path_factory_; + std::unique_ptr split_read_; + + std::shared_ptr table_schema_; + std::shared_ptr partition_schema_; + std::shared_ptr read_schema_; + std::shared_ptr value_schema_; + std::unique_ptr levels_; + std::unordered_map deletion_file_map_; + + std::shared_ptr::Factory> processor_factory_; + std::unique_ptr key_serializer_; + std::shared_ptr serializer_factory_; + std::shared_ptr lookup_store_factory_; + + std::map> lookup_file_cache_; + std::map, std::shared_ptr>> + schema_id_and_ser_version_to_processors_; +}; +} // namespace paimon diff --git a/src/paimon/core/mergetree/lookup_levels_test.cpp b/src/paimon/core/mergetree/lookup_levels_test.cpp new file mode 100644 index 00000000..1242b43f --- /dev/null +++ b/src/paimon/core/mergetree/lookup_levels_test.cpp @@ -0,0 +1,439 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/core/mergetree/lookup_levels.h" + +#include "arrow/api.h" +#include "arrow/c/abi.h" +#include "arrow/c/bridge.h" +#include "arrow/ipc/json_simple.h" +#include "gtest/gtest.h" +#include "paimon/catalog/catalog.h" +#include "paimon/common/utils/path_util.h" +#include "paimon/core/core_options.h" +#include "paimon/core/io/data_file_path_factory.h" +#include "paimon/core/mergetree/compact/deduplicate_merge_function.h" +#include "paimon/core/mergetree/compact/reducer_merge_function_wrapper.h" +#include "paimon/core/mergetree/lookup/default_lookup_serializer_factory.h" +#include "paimon/core/mergetree/lookup/persist_value_and_pos_processor.h" +#include "paimon/core/mergetree/lookup/positioned_key_value.h" +#include "paimon/core/mergetree/merge_tree_writer.h" +#include "paimon/core/schema/schema_manager.h" +#include "paimon/core/utils/fields_comparator.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/record_batch.h" +#include "paimon/testing/utils/binary_row_generator.h" +#include "paimon/testing/utils/testharness.h" +namespace paimon::test { +class LookupLevelsTest : public testing::Test { + public: + void SetUp() override { + pool_ = GetDefaultPool(); + arrow::FieldVector fields = { + arrow::field("key", arrow::int32()), + arrow::field("value", arrow::int32()), + }; + arrow_schema_ = arrow::schema(fields); + key_schema_ = arrow::schema({fields[0]}); + tmp_dir_ = UniqueTestDirectory::Create("local"); + dir_ = UniqueTestDirectory::Create("local"); + fs_ = dir_->GetFileSystem(); + } + + void TearDown() override {} + + Result> NewFiles(int32_t level, int64_t last_sequence_number, + const std::string& table_path, + const CoreOptions& options, + const std::string& src_array_str) const { + std::shared_ptr src_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(arrow_schema_->fields()), + src_array_str) + .ValueOrDie(); + + // prepare writer + PAIMON_ASSIGN_OR_RAISE(auto path_factory, CreateFileStorePathFactory(table_path, options)); + PAIMON_ASSIGN_OR_RAISE(auto data_path_factory, path_factory->CreateDataFilePathFactory( + BinaryRow::EmptyRow(), /*bucket=*/0)); + PAIMON_ASSIGN_OR_RAISE(auto key_comparator, CreateKeyComparator()); + auto mfunc = std::make_unique(/*ignore_delete=*/false); + auto merge_function_wrapper = + std::make_shared(std::move(mfunc)); + + auto writer = std::make_shared( + /*last_sequence_number=*/last_sequence_number, std::vector({"key"}), + data_path_factory, key_comparator, + /*user_defined_seq_comparator=*/nullptr, merge_function_wrapper, /*schema_id=*/0, + arrow_schema_, options, pool_); + + // write data + ArrowArray c_src_array; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*src_array, &c_src_array)); + RecordBatchBuilder batch_builder(&c_src_array); + batch_builder.SetBucket(0); + PAIMON_ASSIGN_OR_RAISE(auto batch, batch_builder.Finish()); + PAIMON_RETURN_NOT_OK(writer->Write(std::move(batch))); + // get file meta + PAIMON_ASSIGN_OR_RAISE(auto commit_increment, + writer->PrepareCommit(/*wait_compaction=*/false)); + const auto& file_metas = commit_increment.GetNewFilesIncrement().NewFiles(); + EXPECT_EQ(file_metas.size(), 1); + auto file_meta = file_metas[0]; + file_meta->level = level; + return file_meta; + } + + Result> CreateKeyComparator() const { + std::vector key_fields = {DataField(0, key_schema_->field(0))}; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr key_comparator, + FieldsComparator::Create(key_fields, + /*is_ascending_order=*/true, + /*use_view=*/false)); + return key_comparator; + } + + Result CreateTable(const std::map& options) const { + ::ArrowSchema c_schema; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*arrow_schema_, &c_schema)); + + PAIMON_ASSIGN_OR_RAISE(auto catalog, Catalog::Create(dir_->Str(), {})); + PAIMON_RETURN_NOT_OK(catalog->CreateDatabase("foo", {}, /*ignore_if_exists=*/false)); + PAIMON_RETURN_NOT_OK(catalog->CreateTable(Identifier("foo", "bar"), &c_schema, + /*partition_keys=*/{}, + /*primary_keys=*/{"key"}, options, + /*ignore_if_exists=*/false)); + return PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + } + + Result> CreateFileStorePathFactory( + const std::string& table_path, const CoreOptions& options) const { + PAIMON_ASSIGN_OR_RAISE(std::vector external_paths, + options.CreateExternalPaths()); + PAIMON_ASSIGN_OR_RAISE(std::optional global_index_external_path, + options.CreateGlobalIndexExternalPath()); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr path_factory, + FileStorePathFactory::Create( + table_path, arrow_schema_, /*partition_keys=*/{}, options.GetPartitionDefaultName(), + options.GetWriteFileFormat()->Identifier(), options.DataFilePrefix(), + options.LegacyPartitionNameEnabled(), external_paths, global_index_external_path, + options.IndexFileInDataFileDir(), pool_)); + return path_factory; + } + + Result>> CreateLookupLevels( + const std::string& table_path, std::unique_ptr&& levels) const { + auto schema_manager = std::make_shared(fs_, table_path); + PAIMON_ASSIGN_OR_RAISE(auto table_schema, schema_manager->ReadSchema(0)); + PAIMON_ASSIGN_OR_RAISE(CoreOptions options, CoreOptions::FromMap(table_schema->Options())); + + auto io_manager = IOManager::Create(tmp_dir_->Str()); + + auto arrow_schema = DataField::ConvertDataFieldsToArrowSchema(table_schema->Fields()); + auto processor_factory = + std::make_shared(arrow_schema_); + auto serializer_factory = std::make_shared(); + PAIMON_ASSIGN_OR_RAISE(auto key_comparator, + RowCompactedSerializer::CreateSliceComparator(key_schema_, pool_)); + PAIMON_ASSIGN_OR_RAISE(auto lookup_store_factory, + LookupStoreFactory::Create(key_comparator, options)); + PAIMON_ASSIGN_OR_RAISE(auto path_factory, CreateFileStorePathFactory(table_path, options)); + return LookupLevels::Create( + fs_, BinaryRow::EmptyRow(), /*bucket=*/0, options, schema_manager, + std::move(io_manager), path_factory, table_schema, std::move(levels), + /*deletion_file_map=*/{}, processor_factory, serializer_factory, lookup_store_factory, + pool_); + } + + private: + std::shared_ptr pool_; + std::shared_ptr arrow_schema_; + std::shared_ptr key_schema_; + std::unique_ptr tmp_dir_; + std::unique_ptr dir_; + std::shared_ptr fs_; +}; + +TEST_F(LookupLevelsTest, TestMultiLevels) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/2, /*last_sequence_number=*/3, table_path, + core_options, "[[2, 22], [5, 55]]")); + std::vector> files = {file0, file1}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + // only in level 1 + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 1); + ASSERT_EQ(positioned_kv.value().key_value.level, 1); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 11); + + // only in level 2 + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({2}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 4); + ASSERT_EQ(positioned_kv.value().key_value.level, 2); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 22); + + // both in level 1 and level 2 + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 3); + ASSERT_EQ(positioned_kv.value().key_value.level, 1); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 5); + + // no exists + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({4}, pool_.get()), + /*start_level=*/1)); + ASSERT_FALSE(positioned_kv); + + ASSERT_EQ(lookup_levels->lookup_file_cache_.size(), 2); + ASSERT_EQ(lookup_levels->schema_id_and_ser_version_to_processors_.size(), 1); + // TODO(lisizhuo.lsz): test lookuplevels close +} + +TEST_F(LookupLevelsTest, TestMultiFiles) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [2, 22]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/1, /*last_sequence_number=*/2, table_path, + core_options, "[[4, 44], [5, 55]]")); + ASSERT_OK_AND_ASSIGN(auto file2, NewFiles(/*level=*/1, /*last_sequence_number=*/4, table_path, + core_options, "[[7, 77], [8, 88]]")); + ASSERT_OK_AND_ASSIGN(auto file3, NewFiles(/*level=*/1, /*last_sequence_number=*/6, table_path, + core_options, "[[10, 1010], [11, 1111]]")); + + std::vector> files = {file0, file1, file2, file3}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + std::map contains = {{1, 11}, {2, 22}, {4, 44}, {5, 55}, + {7, 77}, {8, 88}, {10, 1010}, {11, 1111}}; + for (const auto& [key, value] : contains) { + ASSERT_OK_AND_ASSIGN( + auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({key}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.level, 1); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), value); + } + + std::vector not_contains = {0, 3, 6, 9, 12}; + for (const auto& key : not_contains) { + ASSERT_OK_AND_ASSIGN( + auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({key}, pool_.get()), + /*start_level=*/1)); + ASSERT_FALSE(positioned_kv); + } +} + +TEST_F(LookupLevelsTest, TestLookupEmptyLevel) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/3, /*last_sequence_number=*/3, table_path, + core_options, "[[2, 22], [5, 55]]")); + std::vector> files = {file0, file1}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({2}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 4); + ASSERT_EQ(positioned_kv.value().key_value.level, 3); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 22); +} + +TEST_F(LookupLevelsTest, TestLookupLevel0) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/0, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 0]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/1, /*last_sequence_number=*/1, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file2, NewFiles(/*level=*/2, /*last_sequence_number=*/4, table_path, + core_options, "[[2, 22], [5, 55]]")); + + std::vector> files = {file0, file1, file2}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/0)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 1); + ASSERT_EQ(positioned_kv.value().key_value.level, 0); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 0); +} + +TEST_F(LookupLevelsTest, TestLookupLevel0NotInLevel0) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/2, /*last_sequence_number=*/3, table_path, + core_options, "[[2, 22], [5, 55]]")); + std::vector> files = {file0, file1}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/0)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 1); + ASSERT_EQ(positioned_kv.value().key_value.level, 1); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 11); +} + +TEST_F(LookupLevelsTest, TestLookupLevel0WithMultipleFiles) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/0, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 0], [4, 44]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/0, /*last_sequence_number=*/2, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file2, NewFiles(/*level=*/2, /*last_sequence_number=*/5, table_path, + core_options, "[[2, 22], [5, 55]]")); + + std::vector> files = {file0, file1, file2}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/0)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 3); + ASSERT_EQ(positioned_kv.value().key_value.level, 0); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 11); + + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({3}, pool_.get()), + /*start_level=*/0)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 4); + ASSERT_EQ(positioned_kv.value().key_value.level, 0); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 33); + + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({4}, pool_.get()), + /*start_level=*/0)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 2); + ASSERT_EQ(positioned_kv.value().key_value.level, 0); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 44); + + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/2)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().key_value.sequence_number, 7); + ASSERT_EQ(positioned_kv.value().key_value.level, 2); + ASSERT_EQ(positioned_kv.value().key_value.value->GetInt(1), 55); + + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({4}, pool_.get()), + /*start_level=*/2)); + ASSERT_FALSE(positioned_kv); +} + +TEST_F(LookupLevelsTest, TestWithPosistion) { + std::map options = {}; + ASSERT_OK_AND_ASSIGN(CoreOptions core_options, CoreOptions::FromMap(options)); + ASSERT_OK_AND_ASSIGN(auto table_path, CreateTable(options)); + ASSERT_OK_AND_ASSIGN(auto key_comparator, CreateKeyComparator()); + + ASSERT_OK_AND_ASSIGN(auto file0, NewFiles(/*level=*/1, /*last_sequence_number=*/0, table_path, + core_options, "[[1, 11], [3, 33], [5, 5]]")); + ASSERT_OK_AND_ASSIGN(auto file1, NewFiles(/*level=*/2, /*last_sequence_number=*/3, table_path, + core_options, "[[2, 22], [5, 55]]")); + std::vector> files = {file0, file1}; + ASSERT_OK_AND_ASSIGN(auto levels, Levels::Create(key_comparator, files, /*num_levels=*/3)); + + ASSERT_OK_AND_ASSIGN(auto lookup_levels, CreateLookupLevels(table_path, std::move(levels))); + + // only in level 1 + ASSERT_OK_AND_ASSIGN(auto positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({1}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().row_position, 0); + + // only in level 2 + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({2}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().row_position, 0); + + // both in level 1 and level 2 + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({5}, pool_.get()), + /*start_level=*/1)); + ASSERT_TRUE(positioned_kv); + ASSERT_EQ(positioned_kv.value().row_position, 2); + + // no exists + ASSERT_OK_AND_ASSIGN(positioned_kv, + lookup_levels->Lookup(BinaryRowGenerator::GenerateRowPtr({4}, pool_.get()), + /*start_level=*/1)); + ASSERT_FALSE(positioned_kv); +} + +} // namespace paimon::test diff --git a/src/paimon/core/mergetree/lookup_utils.h b/src/paimon/core/mergetree/lookup_utils.h new file mode 100644 index 00000000..44abd141 --- /dev/null +++ b/src/paimon/core/mergetree/lookup_utils.h @@ -0,0 +1,119 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +#include "paimon/core/mergetree/levels.h" +#include "paimon/core/mergetree/sorted_run.h" +#include "paimon/core/utils/fields_comparator.h" +#include "paimon/result.h" + +namespace paimon { +/// Utils for lookup. +class LookupUtils { + public: + LookupUtils() = delete; + ~LookupUtils() = delete; + + template + static Result> Lookup( + const Levels& levels, const std::shared_ptr& key, int32_t start_level, + std::function>(const std::shared_ptr&, + const SortedRun&)> + lookup, + std::function>( + const std::shared_ptr&, + const std::set, Levels::Level0Comparator>&)> + level0_lookup) { + std::optional result; + for (int32_t i = start_level; i < levels.NumberOfLevels(); ++i) { + if (i == 0) { + PAIMON_ASSIGN_OR_RAISE(result, level0_lookup(key, levels.GetLevel0())); + } else { + PAIMON_ASSIGN_OR_RAISE(SortedRun level, Levels::RunOfLevel(i, levels.GetLevels())); + PAIMON_ASSIGN_OR_RAISE(result, lookup(key, level)); + } + if (result) { + break; + } + } + return result; + } + + template + static Result> LookupLevel0( + const std::shared_ptr& key_comparator, + const std::shared_ptr& target, + const std::set, Levels::Level0Comparator>& level0, + std::function>(const std::shared_ptr&, + const std::shared_ptr&)> + lookup) { + std::optional result; + for (const auto& file : level0) { + if (key_comparator->CompareTo(file->max_key, *target) >= 0 && + key_comparator->CompareTo(file->min_key, *target) <= 0) { + PAIMON_ASSIGN_OR_RAISE(result, lookup(target, file)); + if (result) { + break; + } + } + } + return result; + } + + template + static Result> Lookup( + const std::shared_ptr& key_comparator, + const std::shared_ptr& target, const SortedRun& level, + std::function>(const std::shared_ptr&, + const std::shared_ptr&)> + lookup) { + if (level.IsEmpty()) { + return std::optional(); + } + const auto& files = level.Files(); + int32_t left = 0; + auto right = static_cast(files.size()) - 1; + + // binary search restart positions to find the restart position immediately before the + // target key + while (left < right) { + int32_t mid = (left + right) / 2; + if (key_comparator->CompareTo(files[mid]->max_key, *target) < 0) { + // Key at "mid.max" < "target". Therefore all + // files at or before "mid" are uninteresting. + left = mid + 1; + } else { + // Key at "mid.max" >= "target". Therefore all files + // after "mid" are uninteresting. + right = mid; + } + } + int32_t index = right; + // if the index is now pointing to the last file, check if the largest key in the block is + // smaller than the target key. If so, we need to seek beyond the end of this file + if (index == static_cast(files.size() - 1) && + key_comparator->CompareTo(files[index]->max_key, *target) < 0) { + index++; + } + + // if files does not have a next, it means the key does not exist in this level + return index < static_cast(files.size()) ? lookup(target, files[index]) + : std::optional(); + } +}; +} // namespace paimon diff --git a/src/paimon/core/mergetree/sorted_run.h b/src/paimon/core/mergetree/sorted_run.h index 45583950..ca16155b 100644 --- a/src/paimon/core/mergetree/sorted_run.h +++ b/src/paimon/core/mergetree/sorted_run.h @@ -32,12 +32,34 @@ namespace paimon { /// of these files do not overlap. class SortedRun { public: + static SortedRun Empty() { + return SortedRun({}); + } static SortedRun FromSingle(const std::shared_ptr& meta) { return SortedRun({meta}); } static SortedRun FromSorted(const std::vector>& meta) { return SortedRun(meta); } + static Result FromUnsorted(const std::vector>& meta, + const std::shared_ptr& comparator) { + std::vector> unsorted = meta; + std::sort(unsorted.begin(), unsorted.end(), + [comparator](const std::shared_ptr& m1, + const std::shared_ptr& m2) { + return comparator->CompareTo(m1->min_key, m2->min_key) < 0; + }); + SortedRun sorted_run(unsorted); + if (!sorted_run.IsValid(comparator)) { + return Status::Invalid("from unsorted validate failed"); + } + return sorted_run; + } + + bool IsEmpty() const { + return files_.empty(); + } + const std::vector>& Files() const& { return files_; } @@ -59,6 +81,21 @@ class SortedRun { return true; } + bool operator==(const SortedRun& other) const { + if (this == &other) { + return true; + } + if (files_.size() != other.Files().size()) { + return false; + } + for (size_t i = 0; i < files_.size(); i++) { + if (*files_[i] != *(other.Files()[i])) { + return false; + } + } + return total_size_ == other.TotalSize(); + } + std::string ToString() const { std::vector files_str; files_str.reserve(files_.size()); diff --git a/src/paimon/core/mergetree/sorted_run_test.cpp b/src/paimon/core/mergetree/sorted_run_test.cpp index 9beabf23..33b4a705 100644 --- a/src/paimon/core/mergetree/sorted_run_test.cpp +++ b/src/paimon/core/mergetree/sorted_run_test.cpp @@ -87,6 +87,44 @@ TEST_F(SortedRunTest, TestSortedRunIsValid) { } } +TEST_F(SortedRunTest, TestFromUnsorted) { + ASSERT_OK_AND_ASSIGN( + std::shared_ptr comparator, + FieldsComparator::Create({DataField(0, arrow::field("test", arrow::int32()))}, + /*is_ascending_order=*/true, /*use_view=*/false)); + + // m1 [10, 20] + auto m1 = CreateDataFileMeta(10, 20); + + // m2 [30, 40] + auto m2 = CreateDataFileMeta(30, 40); + + // m3 [15, 35] + auto m3 = CreateDataFileMeta(15, 35); + + ASSERT_OK_AND_ASSIGN(auto run1, SortedRun::FromUnsorted({m2, m1}, comparator)); + auto run2 = SortedRun::FromSorted({m1, m2}); + ASSERT_EQ(run1, run2); + + ASSERT_NOK_WITH_MSG(SortedRun::FromUnsorted({m2, m1, m3}, comparator), + "from unsorted validate failed"); +} + +TEST_F(SortedRunTest, TestEqual) { + auto empty = SortedRun::Empty(); + auto m1 = CreateDataFileMeta(10, 20); + auto run1 = SortedRun::FromSingle({m1}); + auto other_run1 = SortedRun::FromSingle({m1}); + + auto m2 = CreateDataFileMeta(100, 200); + auto run2 = SortedRun::FromSingle({m2}); + + ASSERT_EQ(run1, run1); + ASSERT_EQ(run1, other_run1); + ASSERT_FALSE(run1 == run2); + ASSERT_FALSE(empty == run2); +} + TEST_F(SortedRunTest, TestSortedRunToString) { auto m1 = CreateDataFileMeta(10, 20); auto m2 = CreateDataFileMeta(30, 40); @@ -98,4 +136,5 @@ TEST_F(SortedRunTest, TestSortedRunToString) { std::string::npos); ASSERT_TRUE(level_sorted_run_str.find(sorted_run_str) != std::string::npos); } + } // namespace paimon::test diff --git a/src/paimon/core/operation/abstract_split_read.cpp b/src/paimon/core/operation/abstract_split_read.cpp index 349f8a3d..be0ea416 100644 --- a/src/paimon/core/operation/abstract_split_read.cpp +++ b/src/paimon/core/operation/abstract_split_read.cpp @@ -61,20 +61,20 @@ AbstractSplitRead::AbstractSplitRead(const std::shared_ptr context_(context), schema_manager_(std::move(schema_manager)) {} -Result>> AbstractSplitRead::CreateRawFileReaders( +Result>> AbstractSplitRead::CreateRawFileReaders( const BinaryRow& partition, const std::vector>& data_files, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, const std::unordered_map& deletion_file_map, const std::optional>& row_ranges, const std::shared_ptr& data_file_path_factory) const { if (data_files.empty()) { - return std::vector>(); + return std::vector>(); } PAIMON_ASSIGN_OR_RAISE( std::unique_ptr field_mapping_builder, FieldMappingBuilder::Create(read_schema, context_->GetPartitionKeys(), predicate)); - std::vector> raw_file_readers; + std::vector> raw_file_readers; raw_file_readers.reserve(data_files.size()); for (const auto& file : data_files) { auto data_file_path = data_file_path_factory->ToPath(file); @@ -82,7 +82,7 @@ Result>> AbstractSplitRead::CreateRawFi PAIMON_ASSIGN_OR_RAISE(std::unique_ptr reader_builder, PrepareReaderBuilder(data_file_identifier)); PAIMON_ASSIGN_OR_RAISE( - std::unique_ptr file_reader, + std::unique_ptr file_reader, CreateFieldMappingReader(data_file_path, file, partition, reader_builder.get(), field_mapping_builder.get(), deletion_file_map, row_ranges, data_file_path_factory)); @@ -170,7 +170,7 @@ Result> AbstractSplitRead::CreateFileBatchReade } } -Result> AbstractSplitRead::CreateFieldMappingReader( +Result> AbstractSplitRead::CreateFieldMappingReader( const std::string& data_file_path, const std::shared_ptr& file_meta, const BinaryRow& partition, const ReaderBuilder* reader_builder, const FieldMappingBuilder* field_mapping_builder, @@ -213,13 +213,13 @@ Result> AbstractSplitRead::CreateFieldMappingReader const auto& predicate = field_mapping->non_partition_info.non_partition_filter; auto all_data_schema = DataField::ConvertDataFieldsToArrowSchema(data_schema->Fields()); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr final_reader, + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr final_reader, ApplyIndexAndDvReaderIfNeeded( std::move(file_reader), file_meta, all_data_schema, read_schema, predicate, deletion_file_map, row_ranges, data_file_path_factory)); if (!final_reader) { // file is skipped by index or dv - return std::unique_ptr(); + return std::unique_ptr(); } return std::make_unique(field_mapping_builder->GetReadFieldCount(), diff --git a/src/paimon/core/operation/abstract_split_read.h b/src/paimon/core/operation/abstract_split_read.h index 73b0549a..20db532b 100644 --- a/src/paimon/core/operation/abstract_split_read.h +++ b/src/paimon/core/operation/abstract_split_read.h @@ -59,14 +59,7 @@ class AbstractSplitRead : public SplitRead { public: ~AbstractSplitRead() override = default; - protected: - AbstractSplitRead(const std::shared_ptr& path_factory, - const std::shared_ptr& context, - std::unique_ptr&& schema_manager, - const std::shared_ptr& memory_pool, - const std::shared_ptr& executor); - - Result>> CreateRawFileReaders( + Result>> CreateRawFileReaders( const BinaryRow& partition, const std::vector>& data_files, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, @@ -74,6 +67,13 @@ class AbstractSplitRead : public SplitRead { const std::optional>& row_ranges, const std::shared_ptr& data_file_path_factory) const; + protected: + AbstractSplitRead(const std::shared_ptr& path_factory, + const std::shared_ptr& context, + std::unique_ptr&& schema_manager, + const std::shared_ptr& memory_pool, + const std::shared_ptr& executor); + static std::unordered_map CreateDeletionFileMap( const DataSplitImpl& data_split); @@ -86,7 +86,7 @@ class AbstractSplitRead : public SplitRead { protected: // return nullptr if file is skipped by index or dv - virtual Result> ApplyIndexAndDvReaderIfNeeded( + virtual Result> ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, @@ -111,7 +111,7 @@ class AbstractSplitRead : public SplitRead { const ReaderBuilder* reader_builder) const; // return nullptr if data file is skipped by index or dv - Result> CreateFieldMappingReader( + Result> CreateFieldMappingReader( const std::string& data_file_path, const std::shared_ptr& file_meta, const BinaryRow& partition, const ReaderBuilder* reader_builder, const FieldMappingBuilder* field_mapping_builder, diff --git a/src/paimon/core/operation/data_evolution_split_read.cpp b/src/paimon/core/operation/data_evolution_split_read.cpp index 7a55ec40..afda12bd 100644 --- a/src/paimon/core/operation/data_evolution_split_read.cpp +++ b/src/paimon/core/operation/data_evolution_split_read.cpp @@ -25,10 +25,10 @@ #include "paimon/common/reader/complete_row_kind_batch_reader.h" #include "paimon/common/reader/concat_batch_reader.h" #include "paimon/common/table/special_fields.h" +#include "paimon/common/utils/object_utils.h" #include "paimon/common/utils/range_helper.h" #include "paimon/core/core_options.h" #include "paimon/core/global_index/indexed_split_impl.h" - namespace paimon { Status DataEvolutionSplitRead::BlobBunch::Add(const std::shared_ptr& file) { if (!BlobUtils::IsBlobFile(file->file_name)) { @@ -155,7 +155,7 @@ Result> DataEvolutionSplitRead::InnerCreateReader( if (need_merge_files.size() == 1) { // No need to merge fields, just create a single file reader PAIMON_ASSIGN_OR_RAISE( - std::vector> raw_file_readers, + std::vector> raw_file_readers, CreateRawFileReaders(split_impl->Partition(), need_merge_files, raw_read_schema_, /*predicate=*/nullptr, /*deletion_file_map=*/{}, row_ranges, data_file_path_factory)); @@ -174,7 +174,7 @@ Result> DataEvolutionSplitRead::InnerCreateReader( ApplyPredicateFilterIfNeeded(std::move(concat_batch_reader), context_->GetPredicate())); return std::make_unique(std::move(batch_reader), pool_); } -Result> DataEvolutionSplitRead::ApplyIndexAndDvReaderIfNeeded( +Result> DataEvolutionSplitRead::ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, @@ -196,7 +196,7 @@ Result> DataEvolutionSplitRead::ApplyIndexAndDvRead PAIMON_RETURN_NOT_OK( file_reader->SetReadSchema(&c_read_schema, /*predicate=*/nullptr, selection_row_ids)); - std::unique_ptr reader; + std::unique_ptr reader; if (!file_reader->SupportPreciseBitmapSelection() && selection_row_ids) { // several format(e.g. lance, blob) will return accurate batch result, where // ApplyBitmapIndexBatchReader is not necessary @@ -335,16 +335,18 @@ Result> DataEvolutionSplitRead::CreateU // create new FieldMappingReader for read partial fields auto file_read_schema = DataField::ConvertDataFieldsToArrowSchema(read_fields_in_file); PAIMON_ASSIGN_OR_RAISE( - std::vector> file_readers, + std::vector> file_readers, CreateRawFileReaders(partition, bunch->Files(), file_read_schema, /*predicate=*/nullptr, /*deletion_file_map=*/{}, row_ranges, data_file_path_factory)); if (file_readers.size() == 1) { file_batch_readers[file_idx] = std::move(file_readers[0]); } else { + auto raw_readers = + ObjectUtils::MoveVector>(std::move(file_readers)); // Concat multiple blob files that map to the same data file. file_batch_readers[file_idx] = - std::make_unique(std::move(file_readers), pool_); + std::make_unique(std::move(raw_readers), pool_); } } } diff --git a/src/paimon/core/operation/data_evolution_split_read.h b/src/paimon/core/operation/data_evolution_split_read.h index 5b8c700a..bd3e8a4c 100644 --- a/src/paimon/core/operation/data_evolution_split_read.h +++ b/src/paimon/core/operation/data_evolution_split_read.h @@ -71,7 +71,7 @@ class DataEvolutionSplitRead : public AbstractSplitRead { Result Match(const std::shared_ptr& split, bool force_keep_delete) const override; - Result> ApplyIndexAndDvReaderIfNeeded( + Result> ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, diff --git a/src/paimon/core/operation/merge_file_split_read.cpp b/src/paimon/core/operation/merge_file_split_read.cpp index 4c003c0f..1f613312 100644 --- a/src/paimon/core/operation/merge_file_split_read.cpp +++ b/src/paimon/core/operation/merge_file_split_read.cpp @@ -34,8 +34,10 @@ #include "paimon/common/types/data_field.h" #include "paimon/common/utils/arrow/arrow_utils.h" #include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/object_utils.h" #include "paimon/core/core_options.h" #include "paimon/core/deletionvectors/apply_deletion_vector_batch_reader.h" +#include "paimon/core/deletionvectors/bitmap_deletion_vector.h" #include "paimon/core/deletionvectors/deletion_vector.h" #include "paimon/core/io/async_key_value_projection_reader.h" #include "paimon/core/io/concat_key_value_record_reader.h" @@ -171,7 +173,7 @@ MergeFileSplitRead::CreateMergeFunctionWrapper(const CoreOptions& core_options, return std::make_shared(std::move(merge_function)); } -Result> MergeFileSplitRead::ApplyIndexAndDvReaderIfNeeded( +Result> MergeFileSplitRead::ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, @@ -185,15 +187,33 @@ Result> MergeFileSplitRead::ApplyIndexAndDvReaderIf PAIMON_ASSIGN_OR_RAISE(deletion_vector, DeletionVector::Read(options_.GetFileSystem().get(), dv_iter->second, pool_.get())); } + + const RoaringBitmap32* deletion = nullptr; + if (auto* bitmap_dv = dynamic_cast(deletion_vector.get())) { + deletion = bitmap_dv->GetBitmap(); + } + + std::optional actual_selection; + if (deletion) { + actual_selection = *deletion; + PAIMON_ASSIGN_OR_RAISE(uint64_t num_rows, file_reader->GetNumberOfRows()); + actual_selection.value().Flip(0, num_rows); + } + ::ArrowSchema c_read_schema; PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*read_schema, &c_read_schema)); - PAIMON_RETURN_NOT_OK( - file_reader->SetReadSchema(&c_read_schema, predicate, /*selection_bitmap=*/std::nullopt)); - // TODO(xinyu.lxy): may push down bitmap - if (deletion_vector && !deletion_vector->IsEmpty()) { + + PAIMON_RETURN_NOT_OK(file_reader->SetReadSchema(&c_read_schema, predicate, actual_selection)); + + if (!file_reader->SupportPreciseBitmapSelection() && actual_selection) { return std::make_unique(std::move(file_reader), std::move(deletion_vector)); } + if (deletion_vector && !deletion && !deletion_vector->IsEmpty()) { + // TODO(xinyu.lxy): if deletion vector is bitmap64, use ApplyBitmapIndexBatchReader to + // filter result + return Status::NotImplemented("Only support BitmapDeletionVector"); + } return std::move(file_reader); } @@ -227,13 +247,14 @@ Result> MergeFileSplitRead::CreateNoMergeReader( PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr read_schema, raw_read_schema_->AddField(0, row_kind_field)); PAIMON_ASSIGN_OR_RAISE( - std::vector> raw_file_readers, + std::vector> raw_file_readers, CreateRawFileReaders(data_split->Partition(), data_split->DataFiles(), read_schema, only_filter_key ? predicate_for_keys_ : context_->GetPredicate(), deletion_file_map, /*row_ranges=*/{}, data_file_path_factory)); - auto concat_batch_reader = - std::make_unique(std::move(raw_file_readers), pool_); + auto raw_readers = + ObjectUtils::MoveVector>(std::move(raw_file_readers)); + auto concat_batch_reader = std::make_unique(std::move(raw_readers), pool_); return AbstractSplitRead::ApplyPredicateFilterIfNeeded(std::move(concat_batch_reader), context_->GetPredicate()); } @@ -433,7 +454,7 @@ Result> MergeFileSplitRead::CreateReaderFo // no overlap in a run const auto& data_files = sorted_run.Files(); PAIMON_ASSIGN_OR_RAISE( - std::vector> raw_file_readers, + std::vector> raw_file_readers, CreateRawFileReaders(partition, data_files, read_schema_, predicate, deletion_file_map, /*row_ranges=*/{}, data_file_path_factory)); diff --git a/src/paimon/core/operation/merge_file_split_read.h b/src/paimon/core/operation/merge_file_split_read.h index 471a1e44..a1d5e396 100644 --- a/src/paimon/core/operation/merge_file_split_read.h +++ b/src/paimon/core/operation/merge_file_split_read.h @@ -84,7 +84,7 @@ class MergeFileSplitRead : public AbstractSplitRead { Result Match(const std::shared_ptr& split, bool force_keep_delete) const override; - Result> ApplyIndexAndDvReaderIfNeeded( + Result> ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, diff --git a/src/paimon/core/operation/raw_file_split_read.cpp b/src/paimon/core/operation/raw_file_split_read.cpp index cbc72111..415ef1e2 100644 --- a/src/paimon/core/operation/raw_file_split_read.cpp +++ b/src/paimon/core/operation/raw_file_split_read.cpp @@ -26,6 +26,7 @@ #include "paimon/common/reader/complete_row_kind_batch_reader.h" #include "paimon/common/reader/concat_batch_reader.h" #include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/object_utils.h" #include "paimon/core/core_options.h" #include "paimon/core/deletionvectors/bitmap_deletion_vector.h" #include "paimon/core/deletionvectors/deletion_vector.h" @@ -80,11 +81,13 @@ Result> RawFileSplitRead::CreateReader( auto deletion_file_map = CreateDeletionFileMap(data_files, deletion_files); PAIMON_ASSIGN_OR_RAISE( - std::vector> raw_file_readers, + std::vector> raw_file_readers, CreateRawFileReaders(partition, data_files, raw_read_schema_, predicate, deletion_file_map, /*row_ranges=*/{}, data_file_path_factory)); - auto concat_batch_reader = - std::make_unique(std::move(raw_file_readers), pool_); + + auto raw_readers = + ObjectUtils::MoveVector>(std::move(raw_file_readers)); + auto concat_batch_reader = std::make_unique(std::move(raw_readers), pool_); PAIMON_ASSIGN_OR_RAISE(std::unique_ptr batch_reader, ApplyPredicateFilterIfNeeded(std::move(concat_batch_reader), predicate)); return std::make_unique(std::move(batch_reader), pool_); @@ -115,7 +118,7 @@ Result RawFileSplitRead::Match(const std::shared_ptr& split, return matched; } -Result> RawFileSplitRead::ApplyIndexAndDvReaderIfNeeded( +Result> RawFileSplitRead::ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, const std::shared_ptr& predicate, @@ -171,7 +174,7 @@ Result> RawFileSplitRead::ApplyIndexAndDvReaderIfNe PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*read_schema, &c_read_schema)); PAIMON_RETURN_NOT_OK(file_reader->SetReadSchema(&c_read_schema, predicate, actual_selection)); - std::unique_ptr reader; + std::unique_ptr reader; if (!file_reader->SupportPreciseBitmapSelection() && actual_selection) { // several format(e.g. lance, blob) will return accurate batch result, where // ApplyBitmapIndexBatchReader is not necessary @@ -182,7 +185,7 @@ Result> RawFileSplitRead::ApplyIndexAndDvReaderIfNe } if (deletion_vector && !deletion && !deletion_vector->IsEmpty()) { - // TODO(xinyu.lxy): if deletion vector is bitmap, use ApplyBitmapIndexBatchReader to + // TODO(xinyu.lxy): if deletion vector is bitmap64, use ApplyBitmapIndexBatchReader to // filter result return Status::NotImplemented("Only support BitmapDeletionVector"); } diff --git a/src/paimon/core/operation/raw_file_split_read.h b/src/paimon/core/operation/raw_file_split_read.h index 7c822c65..b7438f8a 100644 --- a/src/paimon/core/operation/raw_file_split_read.h +++ b/src/paimon/core/operation/raw_file_split_read.h @@ -69,7 +69,7 @@ class RawFileSplitRead : public AbstractSplitRead { Result Match(const std::shared_ptr& split, bool force_keep_delete) const override; - Result> ApplyIndexAndDvReaderIfNeeded( + Result> ApplyIndexAndDvReaderIfNeeded( std::unique_ptr&& file_reader, const std::shared_ptr& file, const std::shared_ptr& data_schema, const std::shared_ptr& read_schema, diff --git a/src/paimon/format/avro/avro_file_batch_reader.h b/src/paimon/format/avro/avro_file_batch_reader.h index 38aa054c..98d5deed 100644 --- a/src/paimon/format/avro/avro_file_batch_reader.h +++ b/src/paimon/format/avro/avro_file_batch_reader.h @@ -45,7 +45,7 @@ class AvroFileBatchReader : public FileBatchReader { Status SetReadSchema(::ArrowSchema* read_schema, const std::shared_ptr& predicate, const std::optional& selection_bitmap) override; - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { return previous_first_row_; } diff --git a/src/paimon/format/avro/avro_file_batch_reader_test.cpp b/src/paimon/format/avro/avro_file_batch_reader_test.cpp index 285b8c27..15ae3a90 100644 --- a/src/paimon/format/avro/avro_file_batch_reader_test.cpp +++ b/src/paimon/format/avro/avro_file_batch_reader_test.cpp @@ -320,25 +320,26 @@ TEST_F(AvroFileBatchReaderTest, TestGetPreviousBatchFirstRowNumber) { ASSERT_OK_AND_ASSIGN(auto num_rows, reader->GetNumberOfRows()); ASSERT_EQ(4, num_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(std::numeric_limits::max(), + reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber().value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber().value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber().value()); ArrowArrayRelease(batch4.first.get()); ArrowSchemaRelease(batch4.second.get()); ASSERT_OK_AND_ASSIGN(auto batch5, reader->NextBatch()); - ASSERT_EQ(4, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(4, reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch5)); } diff --git a/src/paimon/format/blob/blob_file_batch_reader.h b/src/paimon/format/blob/blob_file_batch_reader.h index d2a76673..d71b8425 100644 --- a/src/paimon/format/blob/blob_file_batch_reader.h +++ b/src/paimon/format/blob/blob_file_batch_reader.h @@ -98,7 +98,13 @@ class BlobFileBatchReader : public FileBatchReader { Result NextBatch() override; - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { + if (all_blob_lengths_.size() != target_blob_lengths_.size()) { + return Status::Invalid( + "Cannot call GetPreviousBatchFirstRowNumber in BlobFileBatchReader because, after " + "bitmap pushdown, rows in the array returned by NextBatch are no longer " + "contiguous."); + } return previous_batch_first_row_number_; } diff --git a/src/paimon/format/blob/blob_file_batch_reader_test.cpp b/src/paimon/format/blob/blob_file_batch_reader_test.cpp index c4376a92..dbc0f392 100644 --- a/src/paimon/format/blob/blob_file_batch_reader_test.cpp +++ b/src/paimon/format/blob/blob_file_batch_reader_test.cpp @@ -169,56 +169,25 @@ TEST_F(BlobFileBatchReaderTest, TestRowNumbers) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(std::numeric_limits::max(), + reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); ArrowArrayRelease(batch1.first.get()); ArrowSchemaRelease(batch1.second.get()); - ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber().value()); ArrowArrayRelease(batch2.first.get()); ArrowSchemaRelease(batch2.second.get()); ASSERT_OK_AND_ASSIGN(auto batch3, reader->NextBatch()); - ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(2, reader->GetPreviousBatchFirstRowNumber().value()); ArrowArrayRelease(batch3.first.get()); ArrowSchemaRelease(batch3.second.get()); ASSERT_OK_AND_ASSIGN(auto batch4, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); } -TEST_F(BlobFileBatchReaderTest, TestRowNumbersWithBitmap) { - auto schema = arrow::schema({BlobUtils::ToArrowField("my_blob_field", false)}); - ::ArrowSchema c_schema; - ASSERT_TRUE(arrow::ExportSchema(*schema, &c_schema).ok()); - - std::string test_data_path = paimon::test::GetDataDir() + "/db_with_blob.db/table_with_blob/"; - auto dir = paimon::test::UniqueTestDirectory::Create(); - std::string table_path = dir->Str(); - ASSERT_TRUE(paimon::test::TestUtil::CopyDirectory(test_data_path, table_path)); - - std::shared_ptr fs = std::make_shared(); - ASSERT_OK_AND_ASSIGN( - std::shared_ptr input_stream, - fs->Open(table_path + "/bucket-0/data-d7816e8e-6c6d-4e28-9137-837cdf706350-1.blob")); - ASSERT_OK_AND_ASSIGN(auto reader, BlobFileBatchReader::Create( - input_stream, - /*batch_size=*/1, /*blob_as_descriptor=*/true, pool_)); - RoaringBitmap32 roaring; - roaring.Add(1); - ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, roaring)); - ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); - ASSERT_EQ(3, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFirstRowNumber()); - ASSERT_OK_AND_ASSIGN(auto batch1, reader->NextBatch()); - ASSERT_EQ(1, reader->GetPreviousBatchFirstRowNumber()); - ArrowArrayRelease(batch1.first.get()); - ArrowSchemaRelease(batch1.second.get()); - ASSERT_OK_AND_ASSIGN(auto batch2, reader->NextBatch()); - ASSERT_EQ(3, reader->GetPreviousBatchFirstRowNumber()); - ASSERT_TRUE(BatchReader::IsEofBatch(batch2)); -} - TEST_F(BlobFileBatchReaderTest, InvalidScenario) { auto dir = paimon::test::UniqueTestDirectory::Create(); ASSERT_TRUE(dir); @@ -287,7 +256,8 @@ TEST_P(BlobFileBatchReaderTest, EmptyFile) { ASSERT_OK(reader->SetReadSchema(&c_schema, nullptr, std::nullopt)); ASSERT_OK_AND_ASSIGN(auto number_of_rows, reader->GetNumberOfRows()); ASSERT_EQ(0, number_of_rows); - ASSERT_EQ(std::numeric_limits::max(), reader->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(std::numeric_limits::max(), + reader->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch, reader->NextBatch()); ASSERT_TRUE(BatchReader::IsEofBatch(batch)); } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index 903c72af..1cb8fe41 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -41,10 +41,12 @@ class LanceFileBatchReader : public FileBatchReader { Result NextBatch() override; - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { // TODO(xinyu.lxy): support function - assert(false); - return -1; + return Status::Invalid( + "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after " + "bitmap pushdown, rows in the array returned by NextBatch are no longer " + "contiguous."); } Result GetNumberOfRows() const override { diff --git a/src/paimon/format/orc/orc_file_batch_reader.h b/src/paimon/format/orc/orc_file_batch_reader.h index ca4043b3..429e7680 100644 --- a/src/paimon/format/orc/orc_file_batch_reader.h +++ b/src/paimon/format/orc/orc_file_batch_reader.h @@ -62,7 +62,7 @@ class OrcFileBatchReader : public PrefetchFileBatchReader { // OrcFileBatchReader. Therefore, we need to hold BatchReader when using output ArrowArray. Result NextBatch() override; - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { return reader_->GetRowNumber(); } diff --git a/src/paimon/format/orc/orc_file_batch_reader_test.cpp b/src/paimon/format/orc/orc_file_batch_reader_test.cpp index ea6b1c07..d43c9300 100644 --- a/src/paimon/format/orc/orc_file_batch_reader_test.cpp +++ b/src/paimon/format/orc/orc_file_batch_reader_test.cpp @@ -356,10 +356,10 @@ TEST_P(OrcFileBatchReaderTest, TestNextBatchSimple) { for (auto batch_size : {1, 2, 3, 5, 8, 10}) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, batch_size, natural_read_size); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( orc_batch_reader.get())); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 8); orc_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -630,18 +630,18 @@ TEST_F(OrcFileBatchReaderTest, TestReadNoField) { auto orc_batch_reader = PrepareOrcFileBatchReader(file_name, &read_schema, /*batch_size=*/3, /*natural_read_size=*/10); // read 3 rows - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), -1); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), -1); ASSERT_OK_AND_ASSIGN(auto batch1, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), 0); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 0); // read 3 rows ASSERT_OK_AND_ASSIGN(auto batch2, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), 3); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 3); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), 6); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, orc_batch_reader->NextBatch()); - ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber(), 8); + ASSERT_EQ(orc_batch_reader->GetPreviousBatchFirstRowNumber().value(), 8); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); orc_batch_reader->Close(); diff --git a/src/paimon/format/parquet/file_reader_wrapper.h b/src/paimon/format/parquet/file_reader_wrapper.h index becadb7d..d79e46fe 100644 --- a/src/paimon/format/parquet/file_reader_wrapper.h +++ b/src/paimon/format/parquet/file_reader_wrapper.h @@ -51,7 +51,7 @@ class FileReaderWrapper { Result> Next(); - uint64_t GetPreviousBatchFirstRowNumber() const { + Result GetPreviousBatchFirstRowNumber() const { return previous_first_row_; } diff --git a/src/paimon/format/parquet/file_reader_wrapper_test.cpp b/src/paimon/format/parquet/file_reader_wrapper_test.cpp index 9b2a6155..fccfc60f 100644 --- a/src/paimon/format/parquet/file_reader_wrapper_test.cpp +++ b/src/paimon/format/parquet/file_reader_wrapper_test.cpp @@ -178,9 +178,9 @@ TEST_F(FileReaderWrapperTest, EmptyFile) { ASSERT_EQ(0, reader_wrapper->GetNumberOfRowGroups()); ASSERT_EQ(std::numeric_limits::max(), reader_wrapper->GetNextRowToRead()); ASSERT_EQ(std::numeric_limits::max(), - reader_wrapper->GetPreviousBatchFirstRowNumber()); + reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto batch, reader_wrapper->Next()); - ASSERT_EQ(0, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(0, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_EQ(0, reader_wrapper->GetNextRowToRead()); ASSERT_TRUE(reader_wrapper->GetAllRowGroupRanges().empty()); ASSERT_OK_AND_ASSIGN(std::shared_ptr record_batch, reader_wrapper->Next()); @@ -200,7 +200,7 @@ TEST_F(FileReaderWrapperTest, Simple) { ASSERT_EQ(6, reader_wrapper->GetNumberOfRowGroups()); ASSERT_EQ(std::numeric_limits::max(), reader_wrapper->GetNextRowToRead()); ASSERT_EQ(std::numeric_limits::max(), - reader_wrapper->GetPreviousBatchFirstRowNumber()); + reader_wrapper->GetPreviousBatchFirstRowNumber().value()); std::vector> expected_all_row_group_ranges = { {0, 1000}, {1000, 2000}, {2000, 3000}, {3000, 4000}, {4000, 5000}, {5000, 5500}}; ASSERT_EQ(expected_all_row_group_ranges, reader_wrapper->GetAllRowGroupRanges()); @@ -208,17 +208,17 @@ TEST_F(FileReaderWrapperTest, Simple) { ASSERT_TRUE(record_batch); ASSERT_EQ(512, record_batch->num_rows()); ASSERT_EQ(512, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(0, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(0, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_TRUE(record_batch); ASSERT_EQ(488, record_batch->num_rows()); ASSERT_EQ(1000, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(512, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(512, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_TRUE(record_batch); ASSERT_EQ(512, record_batch->num_rows()); ASSERT_EQ(1512, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_NOK_WITH_MSG(reader_wrapper->SeekToRow(1001), "should not be in the middle of readable range"); ASSERT_OK(reader_wrapper->SeekToRow(1000)); @@ -226,16 +226,16 @@ TEST_F(FileReaderWrapperTest, Simple) { ASSERT_TRUE(record_batch); ASSERT_EQ(512, record_batch->num_rows()); ASSERT_EQ(1512, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK(reader_wrapper->SeekToRow(5600)); ASSERT_EQ(5500, reader_wrapper->GetNextRowToRead()); ASSERT_EQ(6, reader_wrapper->current_row_group_idx_); - ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_FALSE(record_batch); ASSERT_EQ(5500, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(5500, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(5500, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); } TEST_F(FileReaderWrapperTest, GetRowGroupRanges) { @@ -294,17 +294,17 @@ TEST_F(FileReaderWrapperTest, PrepareForReading) { ASSERT_OK(reader_wrapper->SeekToRow(0)); ASSERT_EQ(1000, reader_wrapper->GetNextRowToRead()); ASSERT_EQ(std::numeric_limits::max(), - reader_wrapper->GetPreviousBatchFirstRowNumber()); + reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(auto record_batch, reader_wrapper->Next()); ASSERT_EQ(512, record_batch->num_rows()); ASSERT_EQ(1, record_batch->num_columns()); ASSERT_EQ(1512, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1000, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_TRUE(record_batch); ASSERT_EQ(488, record_batch->num_rows()); ASSERT_EQ(5500, reader_wrapper->GetNextRowToRead()); - ASSERT_EQ(1512, reader_wrapper->GetPreviousBatchFirstRowNumber()); + ASSERT_EQ(1512, reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_FALSE(record_batch); @@ -313,7 +313,7 @@ TEST_F(FileReaderWrapperTest, PrepareForReading) { /*column_indices=*/{})); ASSERT_EQ(0, reader_wrapper->GetNextRowToRead()); ASSERT_EQ(std::numeric_limits::max(), - reader_wrapper->GetPreviousBatchFirstRowNumber()); + reader_wrapper->GetPreviousBatchFirstRowNumber().value()); ASSERT_OK_AND_ASSIGN(record_batch, reader_wrapper->Next()); ASSERT_EQ(512, record_batch->num_rows()); ASSERT_EQ(0, record_batch->num_columns()); @@ -323,7 +323,7 @@ TEST_F(FileReaderWrapperTest, PrepareForReading) { /*column_indices=*/{0})); ASSERT_EQ(5500, reader_wrapper->GetNextRowToRead()); ASSERT_EQ(std::numeric_limits::max(), - reader_wrapper->GetPreviousBatchFirstRowNumber()); + reader_wrapper->GetPreviousBatchFirstRowNumber().value()); } } // namespace paimon::parquet::test diff --git a/src/paimon/format/parquet/parquet_file_batch_reader.h b/src/paimon/format/parquet/parquet_file_batch_reader.h index 6294eecd..81fb2b8d 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader.h +++ b/src/paimon/format/parquet/parquet_file_batch_reader.h @@ -84,7 +84,7 @@ class ParquetFileBatchReader : public PrefetchFileBatchReader { Result>> GenReadRanges( bool* need_prefetch) const override; - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { assert(reader_); return reader_->GetPreviousBatchFirstRowNumber(); } diff --git a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp index 9f4e0b2f..5ddeb50f 100644 --- a/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp +++ b/src/paimon/format/parquet/parquet_file_batch_reader_test.cpp @@ -239,11 +239,11 @@ TEST_F(ParquetFileBatchReaderTest, TestNextBatchSimple) { auto parquet_batch_reader = PrepareParquetFileBatchReader(file_name, schema_, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, batch_size); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto result_array, paimon::test::ReadResultCollector::CollectResult( parquet_batch_reader.get())); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); parquet_batch_reader->Close(); auto expected_array = std::make_shared(struct_array_); ASSERT_TRUE(result_array->Equals(expected_array)); @@ -527,19 +527,19 @@ TEST_F(ParquetFileBatchReaderTest, TestReadNoField) { PrepareParquetFileBatchReader(file_name, read_schema, /*predicate=*/nullptr, /*selection_bitmap=*/std::nullopt, /*batch_size=*/2); // read 2 rows - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), std::numeric_limits::max()); ASSERT_OK_AND_ASSIGN(auto batch1, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), 0); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 0); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch2, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), 2); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 2); // read 2 rows ASSERT_OK_AND_ASSIGN(auto batch3, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), 4); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 4); // read rows with eof ASSERT_OK_AND_ASSIGN(auto batch4, parquet_batch_reader->NextBatch()); - ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber(), 6); + ASSERT_EQ(parquet_batch_reader->GetPreviousBatchFirstRowNumber().value(), 6); ASSERT_TRUE(BatchReader::IsEofBatch(batch4)); parquet_batch_reader->Close(); diff --git a/src/paimon/testing/mock/mock_file_batch_reader.h b/src/paimon/testing/mock/mock_file_batch_reader.h index 19a89a26..eb2bc1b5 100644 --- a/src/paimon/testing/mock/mock_file_batch_reader.h +++ b/src/paimon/testing/mock/mock_file_batch_reader.h @@ -149,7 +149,7 @@ class MockFileBatchReader : public PrefetchFileBatchReader { return metrics; } - uint64_t GetPreviousBatchFirstRowNumber() const override { + Result GetPreviousBatchFirstRowNumber() const override { return previous_batch_first_row_num_; } diff --git a/src/paimon/testing/mock/mock_key_value_data_file_record_reader.h b/src/paimon/testing/mock/mock_key_value_data_file_record_reader.h index fa98e5af..b9ed75ef 100644 --- a/src/paimon/testing/mock/mock_key_value_data_file_record_reader.h +++ b/src/paimon/testing/mock/mock_key_value_data_file_record_reader.h @@ -25,7 +25,7 @@ namespace paimon::test { // mock reader hold data array class MockKeyValueDataFileRecordReader : public KeyValueDataFileRecordReader { public: - MockKeyValueDataFileRecordReader(std::unique_ptr&& reader, int32_t key_arity, + MockKeyValueDataFileRecordReader(std::unique_ptr&& reader, int32_t key_arity, const std::shared_ptr& value_schema, int32_t level, const std::shared_ptr& pool) : KeyValueDataFileRecordReader(std::move(reader), key_arity, value_schema, level, pool) {} From ee4820bfc9e6a09ef9e52dbc2640d5eb245e609e Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Fri, 13 Mar 2026 10:10:48 +0000 Subject: [PATCH 2/8] fix0610 --- src/paimon/common/utils/object_utils.h | 7 ++++--- src/paimon/core/mergetree/lookup_levels.cpp | 4 ++-- src/paimon/core/mergetree/lookup_levels.h | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/paimon/common/utils/object_utils.h b/src/paimon/common/utils/object_utils.h index 008b0d21..bc3c9a99 100644 --- a/src/paimon/common/utils/object_utils.h +++ b/src/paimon/common/utils/object_utils.h @@ -27,7 +27,6 @@ #include "paimon/result.h" #include "paimon/traits.h" - namespace paimon { /// Utils for objects. class ObjectUtils { @@ -133,10 +132,12 @@ class ObjectUtils { return index_map; } - /// Precondition: U can be moved to T. + /// Precondition: U and T must be pointer and U::value can move to T::value template static std::vector MoveVector(std::vector&& input) { - static_assert(std::is_constructible_v, "U cannot be moved to T"); + static_assert(is_pointer::value && is_pointer::value && + std::is_convertible_v, value_type_traits_t>, + "U and T must be pointer and U::value can move to T::value"); std::vector result; result.reserve(input.size()); for (auto& item : input) { diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp index c3f73a21..21fc1968 100644 --- a/src/paimon/core/mergetree/lookup_levels.cpp +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -87,7 +87,7 @@ Result> LookupLevels::Lookup(const std::shared_ptr -Result> LookupLevels::lookupLevel0( +Result> LookupLevels::LookupLevel0( const std::shared_ptr& key, const std::set, Levels::Level0Comparator>& level0) { auto lookup = [this](const std::shared_ptr& key, @@ -241,7 +241,7 @@ Status LookupLevels::CreateSstFileFromDataFile(const std::shared_ptrPut(std::move(key_bytes), std::move(value_bytes))); } } - kv_writer->Close(); + PAIMON_RETURN_NOT_OK(kv_writer->Close()); write_guard.Release(); return Status::OK(); } diff --git a/src/paimon/core/mergetree/lookup_levels.h b/src/paimon/core/mergetree/lookup_levels.h index cd5281ca..ed18432a 100644 --- a/src/paimon/core/mergetree/lookup_levels.h +++ b/src/paimon/core/mergetree/lookup_levels.h @@ -49,7 +49,7 @@ class LookupLevels { Result> Lookup(const std::shared_ptr& key, int32_t start_level); - Result> lookupLevel0( + Result> LookupLevel0( const std::shared_ptr& key, const std::set, Levels::Level0Comparator>& level0); From d2ed6793e7b394e4e494544b73da942fafe8d74e Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Fri, 13 Mar 2026 18:20:37 +0800 Subject: [PATCH 3/8] fix --- src/paimon/core/mergetree/lookup_levels.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp index 21fc1968..8f7929e5 100644 --- a/src/paimon/core/mergetree/lookup_levels.cpp +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -81,7 +81,7 @@ Result> LookupLevels::Lookup(const std::shared_ptr& key, const std::set, Levels::Level0Comparator>& level0) - -> Result> { return this->lookupLevel0(key, level0); }; + -> Result> { return this->LookupLevel0(key, level0); }; return LookupUtils::Lookup(*levels_, key, start_level, std::function(lookup), std::function(lookup_level0)); } From 0b7782f9f00816447185d944a90a646b8480693c Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Sat, 14 Mar 2026 10:41:36 +0800 Subject: [PATCH 4/8] fix --- src/paimon/core/mergetree/levels_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/core/mergetree/levels_test.cpp b/src/paimon/core/mergetree/levels_test.cpp index 1449b556..ac35dc94 100644 --- a/src/paimon/core/mergetree/levels_test.cpp +++ b/src/paimon/core/mergetree/levels_test.cpp @@ -117,7 +117,7 @@ TEST_F(LevelsTest, TestAddLevel0File) { ASSERT_EQ(levels->TotalFileSize(), 5); auto new_level0 = CreateDataFileMeta(0, 0, 100, 0); - levels->AddLevel0File(new_level0); + ASSERT_OK(levels->AddLevel0File(new_level0)); ASSERT_EQ(levels->TotalFileSize(), 6); std::vector expected_sorted_run = { LevelSortedRun(0, SortedRun::FromSingle(input_files[1])), From 28f8fde9ceb3e6393f5e844cf19584f56d566b4d Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Sat, 14 Mar 2026 12:21:53 +0800 Subject: [PATCH 5/8] fix lance GetPreviousBatchFirstRowNumber --- .../format/lance/lance_file_batch_reader.cpp | 11 +++- .../format/lance/lance_file_batch_reader.h | 16 ++++-- .../lance/lance_format_reader_writer_test.cpp | 55 +++++++++++++++++++ 3 files changed, 76 insertions(+), 6 deletions(-) diff --git a/src/paimon/format/lance/lance_file_batch_reader.cpp b/src/paimon/format/lance/lance_file_batch_reader.cpp index e7da4ed6..79afcacf 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.cpp +++ b/src/paimon/format/lance/lance_file_batch_reader.cpp @@ -70,8 +70,8 @@ Status LanceFileBatchReader::SetReadSchema(::ArrowSchema* read_schema, arrow::ImportSchema(read_schema)); read_field_names_ = arrow_schema->field_names(); assert(!read_field_names_.empty()); + read_row_ids_.clear(); if (selection_bitmap) { - read_row_ids_.clear(); read_row_ids_.reserve(selection_bitmap.value().Cardinality()); for (auto iter = selection_bitmap.value().Begin(); iter != selection_bitmap.value().End(); ++iter) { @@ -84,6 +84,8 @@ Status LanceFileBatchReader::SetReadSchema(::ArrowSchema* read_schema, release_stream_reader(stream_reader_, error_message_.data(), error_message_.size()); PAIMON_RETURN_NOT_OK(LanceToPaimonStatus(err_code, error_message_)); stream_reader_ = nullptr; + previous_batch_first_row_num_ = std::numeric_limits::max(); + last_batch_row_num_ = 0; } return Status::OK(); } @@ -102,6 +104,12 @@ Result LanceFileBatchReader::NextBatch() { PAIMON_RETURN_NOT_OK(LanceToPaimonStatus(err_code, error_message_)); assert(stream_reader_); } + if (previous_batch_first_row_num_ == std::numeric_limits::max()) { + // first read + previous_batch_first_row_num_ = 0; + } else { + previous_batch_first_row_num_ += last_batch_row_num_; + } auto c_array = std::make_unique(); auto c_schema = std::make_unique(); bool is_eof = false; @@ -111,6 +119,7 @@ Result LanceFileBatchReader::NextBatch() { if (is_eof) { return BatchReader::MakeEofBatch(); } + last_batch_row_num_ = c_array->length; return std::make_pair(std::move(c_array), std::move(c_schema)); } diff --git a/src/paimon/format/lance/lance_file_batch_reader.h b/src/paimon/format/lance/lance_file_batch_reader.h index 1cb8fe41..fb262803 100644 --- a/src/paimon/format/lance/lance_file_batch_reader.h +++ b/src/paimon/format/lance/lance_file_batch_reader.h @@ -42,11 +42,14 @@ class LanceFileBatchReader : public FileBatchReader { Result NextBatch() override; Result GetPreviousBatchFirstRowNumber() const override { - // TODO(xinyu.lxy): support function - return Status::Invalid( - "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after " - "bitmap pushdown, rows in the array returned by NextBatch are no longer " - "contiguous."); + if (!read_row_ids_.empty() && read_row_ids_.size() != num_rows_) { + // TODO(xinyu.lxy): support function + return Status::Invalid( + "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after " + "bitmap pushdown, rows in the array returned by NextBatch are no longer " + "contiguous."); + } + return previous_batch_first_row_num_; } Result GetNumberOfRows() const override { @@ -76,6 +79,9 @@ class LanceFileBatchReader : public FileBatchReader { int32_t batch_size_ = -1; int32_t batch_readahead_ = -1; uint64_t num_rows_ = 0; + // only validate when there is no bitmap pushdown + uint64_t previous_batch_first_row_num_ = std::numeric_limits::max(); + uint64_t last_batch_row_num_ = 0; mutable std::string error_message_; LanceFileReader* file_reader_ = nullptr; LanceReaderAdapter* stream_reader_ = nullptr; diff --git a/src/paimon/format/lance/lance_format_reader_writer_test.cpp b/src/paimon/format/lance/lance_format_reader_writer_test.cpp index 06408ab0..d4bcc8b3 100644 --- a/src/paimon/format/lance/lance_format_reader_writer_test.cpp +++ b/src/paimon/format/lance/lance_format_reader_writer_test.cpp @@ -419,4 +419,59 @@ TEST_F(LanceFileReaderWriterTest, TestTimestampType) { CheckResult(src_chunk_array, schema, /*enable_tz=*/false); } +TEST_F(LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) { + arrow::FieldVector fields = {arrow::field("f1", arrow::int32()), + arrow::field("f2", arrow::utf8())}; + auto schema = arrow::schema(fields); + auto array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([ + [1, "Hello"], + [2, "World"], + [3, "apple"], + [4, "Alice"], + [5, "Bob"], + [6, "Lucy"] + ])") + .ValueOrDie()); + auto src_chunk_array = std::make_shared(arrow::ArrayVector({array})); + + auto dir = paimon::test::UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + std::string file_path = dir->Str() + "/test.lance"; + WriteFile(file_path, src_chunk_array, schema); + ASSERT_OK_AND_ASSIGN( + std::unique_ptr reader, + LanceFileBatchReader::Create(file_path, /*batch_size=*/4, /*batch_readahead=*/2)); + ASSERT_EQ(std::numeric_limits::max(), + reader->GetPreviousBatchFirstRowNumber().value()); + + // first batch row 0-3 + ASSERT_OK_AND_ASSIGN(auto read_batch, reader->NextBatch()); + ASSERT_OK_AND_ASSIGN(auto read_array, + paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); + ASSERT_TRUE(read_array->Equals(array->Slice(0, 4))); + ASSERT_EQ(0, reader->GetPreviousBatchFirstRowNumber().value()); + + // second batch 4-5 + ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); + ASSERT_OK_AND_ASSIGN(read_array, + paimon::test::ReadResultCollector::GetArray(std::move(read_batch))); + ASSERT_TRUE(read_array->Equals(array->Slice(4, 2))); + ASSERT_EQ(4, reader->GetPreviousBatchFirstRowNumber().value()); + + // eof + ASSERT_OK_AND_ASSIGN(read_batch, reader->NextBatch()); + ASSERT_TRUE(BatchReader::IsEofBatch(read_batch)); + ASSERT_EQ(6, reader->GetPreviousBatchFirstRowNumber().value()); + + // test with bitmap pushdown + ArrowSchema c_read_schema; + ASSERT_TRUE(arrow::ExportSchema(*schema, &c_read_schema).ok()); + ASSERT_OK(reader->SetReadSchema(&c_read_schema, /*predicate=*/nullptr, + /*selection_bitmap=*/RoaringBitmap32::From({0, 3}))); + ASSERT_NOK_WITH_MSG( + reader->GetPreviousBatchFirstRowNumber(), + "Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after bitmap " + "pushdown, rows in the array returned by NextBatch are no longer contiguous."); +} } // namespace paimon::lance::test From c1e6cd3e58fe15c1ea88be9f99615d3835be5033 Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Sun, 15 Mar 2026 08:40:36 +0800 Subject: [PATCH 6/8] fix --- src/paimon/common/utils/object_utils_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/paimon/common/utils/object_utils_test.cpp b/src/paimon/common/utils/object_utils_test.cpp index 9d052399..095ac201 100644 --- a/src/paimon/common/utils/object_utils_test.cpp +++ b/src/paimon/common/utils/object_utils_test.cpp @@ -106,7 +106,6 @@ TEST(ObjectUtilsTest, TestMoveVector) { auto base_vec = paimon::ObjectUtils::MoveVector>(std::move(derived_vec)); - ASSERT_TRUE(derived_vec.empty()); ASSERT_EQ(base_vec[0]->Value(), 10); ASSERT_EQ(base_vec[1]->Value(), 20); ASSERT_EQ(base_vec[2]->Value(), 30); From 073bcc19bf39f44d1beba5437d2ae00546b46f90 Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Mon, 16 Mar 2026 03:07:49 +0000 Subject: [PATCH 7/8] fix1107 --- src/paimon/core/mergetree/levels.cpp | 2 ++ src/paimon/core/mergetree/lookup_file.h | 5 ++++- src/paimon/core/mergetree/lookup_file_test.cpp | 1 + src/paimon/core/mergetree/lookup_levels.cpp | 2 -- src/paimon/core/mergetree/sorted_run.h | 1 + 5 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/paimon/core/mergetree/levels.cpp b/src/paimon/core/mergetree/levels.cpp index ba3f5e94..3cf3175b 100644 --- a/src/paimon/core/mergetree/levels.cpp +++ b/src/paimon/core/mergetree/levels.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ #include "paimon/core/mergetree/levels.h" + +#include namespace paimon { bool Levels::Level0Comparator::operator()(const std::shared_ptr& a, const std::shared_ptr& b) const { diff --git a/src/paimon/core/mergetree/lookup_file.h b/src/paimon/core/mergetree/lookup_file.h index 469109bc..ec12c3f1 100644 --- a/src/paimon/core/mergetree/lookup_file.h +++ b/src/paimon/core/mergetree/lookup_file.h @@ -15,6 +15,7 @@ */ #pragma once +#include "fmt/format.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/lookup/lookup_store_factory.h" #include "paimon/common/utils/binary_row_partition_computer.h" @@ -35,7 +36,9 @@ class LookupFile { reader_(std::move(reader)) {} ~LookupFile() { - [[maybe_unused]] auto status = Close(); + if (!closed_) { + [[maybe_unused]] auto status = Close(); + } } const std::string& LocalFile() const { return local_file_; diff --git a/src/paimon/core/mergetree/lookup_file_test.cpp b/src/paimon/core/mergetree/lookup_file_test.cpp index 7aeaf1ff..b05b3789 100644 --- a/src/paimon/core/mergetree/lookup_file_test.cpp +++ b/src/paimon/core/mergetree/lookup_file_test.cpp @@ -23,6 +23,7 @@ namespace paimon::test { TEST(LookupFileTest, TestSimple) { class FakeLookupStoreReader : public LookupStoreReader { + public: FakeLookupStoreReader(const std::map& kvs, std::shared_ptr& pool) : pool_(pool), kvs_(kvs) {} diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp index 8f7929e5..fce80818 100644 --- a/src/paimon/core/mergetree/lookup_levels.cpp +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -253,8 +253,6 @@ Result>> LookupLevels::GetOrCreateProcess if (iter != schema_id_and_ser_version_to_processors_.end()) { return iter->second; } - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr table_schema, - schema_manager_->ReadSchema(schema_id)); PAIMON_ASSIGN_OR_RAISE( std::shared_ptr> processor, processor_factory_->Create(ser_version, serializer_factory_, value_schema_, pool_)); diff --git a/src/paimon/core/mergetree/sorted_run.h b/src/paimon/core/mergetree/sorted_run.h index ca16155b..efa248da 100644 --- a/src/paimon/core/mergetree/sorted_run.h +++ b/src/paimon/core/mergetree/sorted_run.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include From b9800aecc7d71b29db5d3dd21362a260fe1b0fce Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Mon, 16 Mar 2026 13:42:46 +0800 Subject: [PATCH 8/8] fix review --- src/paimon/core/mergetree/levels_test.cpp | 2 +- src/paimon/core/mergetree/lookup_levels.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/paimon/core/mergetree/levels_test.cpp b/src/paimon/core/mergetree/levels_test.cpp index ac35dc94..e7af5d1c 100644 --- a/src/paimon/core/mergetree/levels_test.cpp +++ b/src/paimon/core/mergetree/levels_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/paimon/core/mergetree/lookup_levels.cpp b/src/paimon/core/mergetree/lookup_levels.cpp index fce80818..5c12cce7 100644 --- a/src/paimon/core/mergetree/lookup_levels.cpp +++ b/src/paimon/core/mergetree/lookup_levels.cpp @@ -214,7 +214,9 @@ Status LookupLevels::CreateSstFileFromDataFile(const std::shared_ptrCreateWriter(fs_, kv_file_path, bloom_filter, pool_)); ScopeGuard write_guard([&]() -> void { - [[maybe_unused]] auto status = fs_->Delete(kv_file_path, /*recursive=*/false); + [[maybe_unused]] auto status = kv_writer->Close(); + reader->Close(); + [[maybe_unused]] auto delete_status = fs_->Delete(kv_file_path, /*recursive=*/false); }); // Read each KeyValue and write to lookup file with or without position.