Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/src/arrow/chunked_array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr<DataType> type)
}
}

int64_t ChunkedArray::ComputeLogicalNullCount() const {
int64_t count = 0;
for (const auto& chunk : chunks_) {
count += chunk->ComputeLogicalNullCount();
}
return count;
}

Result<std::shared_ptr<ChunkedArray>> ChunkedArray::Make(ArrayVector chunks,
std::shared_ptr<DataType> type) {
if (type == nullptr) {
Expand Down
10 changes: 10 additions & 0 deletions cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ class ARROW_EXPORT ChunkedArray {
/// \return the total number of nulls among all chunks
int64_t null_count() const { return null_count_; }

/// \brief Computes the logical null count across all chunks
///
/// This returns the sum of Array::ComputeLogicalNullCount() over the chunks.
/// Unlike null_count(), it accounts for types that carry logical nulls
/// without a validity bitmap, such as union and run-end encoded arrays; for
/// those types the count is recomputed on every call.
///
/// \see Array::ComputeLogicalNullCount
int64_t ComputeLogicalNullCount() const;

/// \return the total number of chunks in the chunked array
int num_chunks() const { return static_cast<int>(chunks_.size()); }

Expand Down
32 changes: 32 additions & 0 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <memory>
#include <vector>

#include "arrow/array/builder_run_end.h"
#include "arrow/chunk_resolver.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
Expand Down Expand Up @@ -76,6 +77,37 @@ TEST_F(TestChunkedArray, Make) {
ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0}, int16()));
}

TEST_F(TestChunkedArray, ComputeLogicalNullCount) {
// For types with a validity bitmap, the logical null count matches
// null_count() (the sum over chunks).
auto chunk0 = ArrayFromJSON(int32(), "[1, null, 3]");
auto chunk1 = ArrayFromJSON(int32(), "[null, 5]");
ChunkedArray with_bitmap({chunk0, chunk1});
ASSERT_EQ(with_bitmap.null_count(), 2);
ASSERT_EQ(with_bitmap.ComputeLogicalNullCount(), 2);

// An empty chunked array has no logical nulls.
ASSERT_OK_AND_ASSIGN(auto empty, ChunkedArray::MakeEmpty(int32()));
ASSERT_EQ(empty->ComputeLogicalNullCount(), 0);

// Run-end encoded arrays carry logical nulls without a top-level validity
// bitmap, so null_count() is 0 while the logical null count is not.
auto pool = default_memory_pool();
auto ree_type = run_end_encoded(int32(), int32());
RunEndEncodedBuilder ree_builder(pool, std::make_shared<Int32Builder>(pool),
std::make_shared<Int32Builder>(pool), ree_type);
ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(2), 2));
ASSERT_OK(ree_builder.AppendNulls(3));
ASSERT_OK_AND_ASSIGN(auto ree_chunk0, ree_builder.Finish());
ASSERT_OK(ree_builder.AppendNulls(4));
ASSERT_OK(ree_builder.AppendScalar(*MakeScalar<int32_t>(8), 5));
ASSERT_OK_AND_ASSIGN(auto ree_chunk1, ree_builder.Finish());

ChunkedArray ree_ca({ree_chunk0, ree_chunk1}, ree_type);
ASSERT_EQ(ree_ca.null_count(), 0);
ASSERT_EQ(ree_ca.ComputeLogicalNullCount(), 7);
}

TEST_F(TestChunkedArray, MakeEmpty) {
ASSERT_OK_AND_ASSIGN(std::shared_ptr<ChunkedArray> empty,
ChunkedArray::MakeEmpty(int64()));
Expand Down