diff --git a/be/src/exprs/function/array/function_array_element.h b/be/src/exprs/function/array/function_array_element.h index 065f0f5fd18b5e..2a932479862a5f 100644 --- a/be/src/exprs/function/array/function_array_element.h +++ b/be/src/exprs/function/array/function_array_element.h @@ -105,25 +105,42 @@ class FunctionArrayElement : public IFunction { UInt8* dst_null_map = dst_null_column->get_data().data(); const UInt8* src_null_map = nullptr; ColumnsWithTypeAndName args; - block.replace_by_position( - arguments[0], - block.get_by_position(arguments[0]).column->convert_to_full_column_if_const()); - auto col_left = block.get_by_position(arguments[0]); - if (col_left.column->is_nullable()) { - const auto* null_col = assert_cast(col_left.column.get()); - src_null_map = null_col->get_null_map_column().get_data().data(); - args = {{null_col->get_nested_column_ptr(), remove_nullable(col_left.type), - col_left.name}, - block.get_by_position(arguments[1])}; - } else { - args = {col_left, block.get_by_position(arguments[1])}; - } ColumnPtr res_column = nullptr; - if (is_column(args[0].column.get()) || - check_column_const(args[0].column.get())) { + + auto col_left_raw = block.get_by_position(arguments[0]); + // Map element lookup requires row-aligned offsets; keep original path for map type. + if (remove_nullable(col_left_raw.type)->get_primitive_type() == TYPE_MAP) { + block.replace_by_position(arguments[0], + col_left_raw.column->convert_to_full_column_if_const()); + auto col_left = block.get_by_position(arguments[0]); + if (col_left.column->is_nullable()) { + const auto* null_col = assert_cast(col_left.column.get()); + src_null_map = null_col->get_null_map_column().get_data().data(); + args = {{null_col->get_nested_column_ptr(), remove_nullable(col_left.type), + col_left.name}, + block.get_by_position(arguments[1])}; + } else { + args = {col_left, block.get_by_position(arguments[1])}; + } res_column = _execute_map(args, input_rows_count, src_null_map, dst_null_map); } else { - res_column = _execute_nullable(args, input_rows_count, src_null_map, dst_null_map); + // Array element access: avoid materializing a constant array column. + // A literal like [[1],[2]] becomes ColumnConst with a single-row inner column; + // unpack_if_const() gives us that inner column plus a constancy flag so inner + // loops can use index_check_const() instead of expanding N copies. + auto [unpacked_col, is_const_array] = unpack_if_const(col_left_raw.column); + if (unpacked_col->is_nullable()) { + const auto* null_col = assert_cast(unpacked_col.get()); + src_null_map = null_col->get_null_map_column().get_data().data(); + args = {{null_col->get_nested_column_ptr(), remove_nullable(col_left_raw.type), + col_left_raw.name}, + block.get_by_position(arguments[1])}; + } else { + args = {{unpacked_col, col_left_raw.type, col_left_raw.name}, + block.get_by_position(arguments[1])}; + } + res_column = _execute_nullable(args, input_rows_count, src_null_map, dst_null_map, + is_const_array); } if (!res_column) { return Status::RuntimeError("unsupported types for function {}({}, {})", get_name(), @@ -173,21 +190,23 @@ class FunctionArrayElement : public IFunction { ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map, - const UInt8* idx_null_map, bool is_const_index) const { + const UInt8* idx_null_map, bool is_const_index, bool is_const_array, + size_t input_rows_count) const { const auto& nested_data = reinterpret_cast(nested_column).get_data(); const auto& index_data = assert_cast(indices).get_data(); auto dst_column = nested_column.clone_empty(); auto& dst_data = reinterpret_cast(*dst_column).get_data(); - dst_data.resize(offsets.size()); + dst_data.resize(input_rows_count); - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = row == 0 ? 0 : offsets[row - 1]; - size_t len = offsets[row] - off; + for (size_t row = 0; row < input_rows_count; ++row) { + size_t arr_row = index_check_const(row, is_const_array); + size_t off = arr_row == 0 ? 0 : offsets[arr_row - 1]; + size_t len = offsets[arr_row] - off; size_t idx = index_check_const(row, is_const_index); auto index = (idx_null_map && idx_null_map[idx]) ? 0 : static_cast(index_data[idx]); - bool null_flag = bool(arr_null_map && arr_null_map[row]); + bool null_flag = bool(arr_null_map && arr_null_map[arr_row]); if (!null_flag && index > 0 && index <= len) { index += off - 1; } else if (!null_flag && index < 0 && -index <= len) { @@ -208,7 +227,8 @@ class FunctionArrayElement : public IFunction { ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map, - const UInt8* idx_null_map, bool is_const_index) const { + const UInt8* idx_null_map, bool is_const_index, bool is_const_array, + size_t input_rows_count) const { const auto& src_str_offs = reinterpret_cast(nested_column).get_offsets(); const auto& src_str_chars = @@ -218,17 +238,18 @@ class FunctionArrayElement : public IFunction { // prepare return data auto dst_column = ColumnString::create(); auto& dst_str_offs = dst_column->get_offsets(); - dst_str_offs.resize(offsets.size()); + dst_str_offs.resize(input_rows_count); auto& dst_str_chars = dst_column->get_chars(); dst_str_chars.reserve(src_str_chars.size()); - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = row == 0 ? 0 : offsets[row - 1]; - size_t len = offsets[row] - off; + for (size_t row = 0; row < input_rows_count; ++row) { + size_t arr_row = index_check_const(row, is_const_array); + size_t off = arr_row == 0 ? 0 : offsets[arr_row - 1]; + size_t len = offsets[arr_row] - off; size_t idx = index_check_const(row, is_const_index); auto index = (idx_null_map && idx_null_map[idx]) ? 0 : static_cast(index_data[idx]); - bool null_flag = bool(arr_null_map && arr_null_map[row]); + bool null_flag = bool(arr_null_map && arr_null_map[arr_row]); if (!null_flag && index > 0 && index <= len) { index += off - 1; } else if (!null_flag && index < 0 && -index <= len) { @@ -277,26 +298,28 @@ class FunctionArrayElement : public IFunction { ColumnWithTypeAndName data(std::move(val_arr), std::make_shared(val_type), "value"); ColumnsWithTypeAndName args = {data, indices}; - return _execute_nullable(args, input_rows_count, src_null_map, dst_null_map); + return _execute_nullable(args, input_rows_count, src_null_map, dst_null_map, false); } template ColumnPtr _execute_common(const ColumnArray::Offsets64& offsets, const IColumn& nested_column, const UInt8* arr_null_map, const IColumn& indices, const UInt8* nested_null_map, UInt8* dst_null_map, - const UInt8* idx_null_map, bool is_const_index) const { + const UInt8* idx_null_map, bool is_const_index, bool is_const_array, + size_t input_rows_count) const { const auto& index_data = assert_cast(indices).get_data(); auto dst_column = nested_column.clone_empty(); - dst_column->reserve(offsets.size()); + dst_column->reserve(input_rows_count); - for (size_t row = 0; row < offsets.size(); ++row) { - size_t off = row == 0 ? 0 : offsets[row - 1]; - size_t len = offsets[row] - off; + for (size_t row = 0; row < input_rows_count; ++row) { + size_t arr_row = index_check_const(row, is_const_array); + size_t off = arr_row == 0 ? 0 : offsets[arr_row - 1]; + size_t len = offsets[arr_row] - off; size_t idx = index_check_const(row, is_const_index); auto index = (idx_null_map && idx_null_map[idx]) ? 0 : static_cast(index_data[idx]); - bool null_flag = bool(arr_null_map && arr_null_map[row]); + bool null_flag = bool(arr_null_map && arr_null_map[arr_row]); if (!null_flag && index > 0 && index <= len) { index += off - 1; } else if (!null_flag && index < 0 && -index <= len) { @@ -319,12 +342,14 @@ class FunctionArrayElement : public IFunction { } ColumnPtr _execute_nullable(const ColumnsWithTypeAndName& arguments, size_t input_rows_count, - const UInt8* src_null_map, UInt8* dst_null_map) const { - // check array nested column type and get data - auto left_column = arguments[0].column->convert_to_full_column_if_const(); - const auto& array_column = assert_cast(*left_column); + const UInt8* src_null_map, UInt8* dst_null_map, + bool is_const_array) const { + // arguments[0].column is already the raw ColumnArray (possibly size-1 when is_const_array). + // Do NOT call convert_to_full_column_if_const() here; const-awareness is handled below + // via index_check_const(row, is_const_array). + const auto& array_column = assert_cast(*arguments[0].column); const auto& offsets = array_column.get_offsets(); - DCHECK(offsets.size() == input_rows_count); + DCHECK(is_const_array ? offsets.size() == 1 : offsets.size() == input_rows_count); const UInt8* nested_null_map = nullptr; ColumnPtr nested_column = nullptr; if (is_column_nullable(array_column.get_data())) { @@ -362,19 +387,22 @@ class FunctionArrayElement : public IFunction { using DataDispatchType = std::decay_t; res = _execute_number( offsets, *nested_column, src_null_map, *idx_col_raw, nested_null_map, - dst_null_map, idx_null_map, is_const_index); + dst_null_map, idx_null_map, is_const_index, is_const_array, + input_rows_count); return true; }; if (is_string_type(left_element_type->get_primitive_type())) { res = _execute_string(offsets, *nested_column, src_null_map, *idx_col_raw, nested_null_map, dst_null_map, - idx_null_map, is_const_index); + idx_null_map, is_const_index, is_const_array, + input_rows_count); } else if (!dispatch_switch_scalar(left_element_type->get_primitive_type(), data_call)) { res = _execute_common(offsets, *nested_column, src_null_map, *idx_col_raw, nested_null_map, dst_null_map, - idx_null_map, is_const_index); + idx_null_map, is_const_index, is_const_array, + input_rows_count); } return true; }; diff --git a/be/test/exprs/function/function_array_element_test.cpp b/be/test/exprs/function/function_array_element_test.cpp index 784188f2e43878..7d3c9bceccf364 100644 --- a/be/test/exprs/function/function_array_element_test.cpp +++ b/be/test/exprs/function/function_array_element_test.cpp @@ -17,13 +17,21 @@ #include +#include "core/column/column_array.h" +#include "core/column/column_const.h" +#include "core/column/column_nullable.h" +#include "core/column/column_string.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_array.h" #include "core/data_type/data_type_date.h" #include "core/data_type/data_type_date_time.h" #include "core/data_type/data_type_decimal.h" +#include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_string.h" #include "core/types.h" #include "exprs/function/function_test_util.h" +#include "exprs/function/simple_function_factory.h" namespace doris { @@ -131,4 +139,226 @@ TEST(function_array_element_test, element_at) { } } +// Helper: build a ColumnConst wrapping a single Array(Int32) value [values...]. +// The apparent size of the returned ColumnConst is `apparent_size`. +static ColumnPtr make_const_int32_array(std::vector values, size_t apparent_size) { + auto data_col = ColumnInt32::create(); + for (auto v : values) { + data_col->insert_value(v); + } + auto offsets = ColumnArray::ColumnOffsets::create(); + offsets->insert_value(static_cast(values.size())); + auto arr = ColumnArray::create(std::move(data_col), std::move(offsets)); + // element_at always produces Nullable, so use a non-null wrapper + auto null_map = ColumnUInt8::create(1, 0 /*not null*/); + auto nullable_arr = ColumnNullable::create(std::move(arr), std::move(null_map)); + return ColumnConst::create(std::move(nullable_arr), apparent_size); +} + +// Helper: build a ColumnConst wrapping a single Array(String) value [values...]. +static ColumnPtr make_const_string_array(std::vector values, size_t apparent_size) { + auto data_col = ColumnString::create(); + for (const auto& v : values) { + data_col->insert_data(v.data(), v.size()); + } + auto offsets = ColumnArray::ColumnOffsets::create(); + offsets->insert_value(static_cast(values.size())); + auto arr = ColumnArray::create(std::move(data_col), std::move(offsets)); + auto null_map = ColumnUInt8::create(1, 0); + auto nullable_arr = ColumnNullable::create(std::move(arr), std::move(null_map)); + return ColumnConst::create(std::move(nullable_arr), apparent_size); +} + +// Invoke element_at(arr_col, idx_col) and return the result ColumnNullable. +// arr_type must match arr_col; idx_type must match idx_col. +static ColumnPtr run_element_at(ColumnPtr arr_col, DataTypePtr arr_type, ColumnPtr idx_col, + DataTypePtr idx_type, DataTypePtr result_type, + size_t input_rows_count) { + ColumnsWithTypeAndName args = {{arr_col, arr_type, "arr"}, {idx_col, idx_type, "idx"}}; + auto func = SimpleFunctionFactory::instance().get_function("element_at", args, result_type); + EXPECT_NE(func, nullptr); + + Block block; + block.insert({arr_col, arr_type, "arr"}); + block.insert({idx_col, idx_type, "idx"}); + block.insert({nullptr, result_type, "result"}); + + EXPECT_TRUE(func->execute(nullptr, block, {0, 1}, 2, input_rows_count).ok()); + return block.get_by_position(2).column; +} + +// Tests for element_at with a constant (ColumnConst) array argument and a varying index. +// The key invariant: no row-count copies of the const array should be made; the function +// must produce correct results using the single stored value. +TEST(function_array_element_test, element_at_const_int32_array_varying_index) { + // Const array: [10, 20, 30] (same for every row) + // Indices: [ 1, 2, 3, 4, -1, -3, -4, 0] + // Expected: [10, 20, 30, NG, 30, 10, NG, NG] (NG = NULL) + constexpr size_t N = 8; + + auto arr_type = + make_nullable(std::make_shared(std::make_shared())); + auto const_arr = make_const_int32_array({10, 20, 30}, N); + + auto idx_data = ColumnInt32::create(); + for (Int32 v : {1, 2, 3, 4, -1, -3, -4, 0}) { + idx_data->insert_value(v); + } + auto idx_null_map = ColumnUInt8::create(N, 0); + auto idx_col = ColumnNullable::create(std::move(idx_data), std::move(idx_null_map)); + auto idx_type = make_nullable(std::make_shared()); + + auto result_type = make_nullable(std::make_shared()); + auto result = run_element_at(const_arr, arr_type, std::move(idx_col), idx_type, result_type, N); + + ASSERT_EQ(result->size(), N); + const auto& nr = assert_cast(*result); + const auto& data = assert_cast(nr.get_nested_column()); + + struct Expected { + bool is_null; + Int32 val; + }; + const Expected expected[N] = {{false, 10}, {false, 20}, {false, 30}, {true, 0}, + {false, 30}, {false, 10}, {true, 0}, {true, 0}}; + for (size_t i = 0; i < N; ++i) { + EXPECT_EQ(nr.is_null_at(i), expected[i].is_null) << "row " << i; + if (!expected[i].is_null) { + EXPECT_EQ(data.get_element(i), expected[i].val) << "row " << i; + } + } +} + +// Const array where the array itself is NULL → every row must return NULL. +TEST(function_array_element_test, element_at_const_null_array) { + constexpr size_t N = 4; + + auto arr_type = + make_nullable(std::make_shared(std::make_shared())); + + // Build a ColumnConst wrapping a size-1 Nullable(Array) that IS null. + auto inner_data = ColumnInt32::create(); + auto inner_offsets = ColumnArray::ColumnOffsets::create(); + inner_offsets->insert_value(0); + auto inner_arr = ColumnArray::create(std::move(inner_data), std::move(inner_offsets)); + auto null_map = ColumnUInt8::create(1, 1 /*null*/); + auto nullable_arr = ColumnNullable::create(std::move(inner_arr), std::move(null_map)); + ColumnPtr const_arr = ColumnConst::create(std::move(nullable_arr), N); + + auto idx_data = ColumnInt32::create(); + for (Int32 v : {1, 1, 1, 1}) { + idx_data->insert_value(v); + } + auto idx_null_map = ColumnUInt8::create(N, 0); + auto idx_col = ColumnNullable::create(std::move(idx_data), std::move(idx_null_map)); + auto idx_type = make_nullable(std::make_shared()); + + auto result_type = make_nullable(std::make_shared()); + auto result = run_element_at(const_arr, arr_type, std::move(idx_col), idx_type, result_type, N); + + ASSERT_EQ(result->size(), N); + const auto& nr = assert_cast(*result); + for (size_t i = 0; i < N; ++i) { + EXPECT_TRUE(nr.is_null_at(i)) << "row " << i; + } +} + +// Const array with nullable index: NULL index → NULL result. +TEST(function_array_element_test, element_at_const_array_null_index) { + constexpr size_t N = 4; + + auto arr_type = + make_nullable(std::make_shared(std::make_shared())); + auto const_arr = make_const_int32_array({10, 20, 30}, N); + + auto idx_data = ColumnInt32::create(); + for (Int32 v : {1, 1, 1, 1}) { + idx_data->insert_value(v); + } + // All indices are NULL + auto idx_null_map = ColumnUInt8::create(N, 1 /*null*/); + auto idx_col = ColumnNullable::create(std::move(idx_data), std::move(idx_null_map)); + auto idx_type = make_nullable(std::make_shared()); + + auto result_type = make_nullable(std::make_shared()); + auto result = run_element_at(const_arr, arr_type, std::move(idx_col), idx_type, result_type, N); + + ASSERT_EQ(result->size(), N); + const auto& nr = assert_cast(*result); + for (size_t i = 0; i < N; ++i) { + EXPECT_TRUE(nr.is_null_at(i)) << "row " << i; + } +} + +// Const Array(String) – exercises _execute_string code path. +TEST(function_array_element_test, element_at_const_string_array_varying_index) { + constexpr size_t N = 5; + + auto arr_type = + make_nullable(std::make_shared(std::make_shared())); + auto const_arr = make_const_string_array({"hello", "world", ""}, N); + + auto idx_data = ColumnInt32::create(); + for (Int32 v : {1, 2, 3, 4, -1}) { + idx_data->insert_value(v); + } + auto idx_null_map = ColumnUInt8::create(N, 0); + auto idx_col = ColumnNullable::create(std::move(idx_data), std::move(idx_null_map)); + auto idx_type = make_nullable(std::make_shared()); + + auto result_type = make_nullable(std::make_shared()); + auto result = run_element_at(const_arr, arr_type, std::move(idx_col), idx_type, result_type, N); + + ASSERT_EQ(result->size(), N); + const auto& nr = assert_cast(*result); + const auto& str_col = assert_cast(nr.get_nested_column()); + + EXPECT_FALSE(nr.is_null_at(0)); + EXPECT_EQ(str_col.get_data_at(0), std::string_view("hello")); + EXPECT_FALSE(nr.is_null_at(1)); + EXPECT_EQ(str_col.get_data_at(1), std::string_view("world")); + EXPECT_FALSE(nr.is_null_at(2)); + EXPECT_EQ(str_col.get_data_at(2), std::string_view("", 0)); + EXPECT_TRUE(nr.is_null_at(3)); // index 4 is out of bounds + EXPECT_FALSE(nr.is_null_at(4)); + EXPECT_EQ(str_col.get_data_at(4), std::string_view("", 0)); // -1 → last = "" +} + +// Large batch: verifies const-array optimization is correct for batch_size > 1. +TEST(function_array_element_test, element_at_const_array_large_batch) { + constexpr size_t N = 4096; + + auto arr_type = + make_nullable(std::make_shared(std::make_shared())); + // Const array: [100, 200, 300] + auto const_arr = make_const_int32_array({100, 200, 300}, N); + + // Indices cycle through: 1→100, 2→200, 3→300, 4→NULL + auto idx_data = ColumnInt32::create(); + idx_data->reserve(N); + for (size_t i = 0; i < N; ++i) { + idx_data->insert_value(static_cast(i % 4 + 1)); + } + auto idx_null_map = ColumnUInt8::create(N, 0); + auto idx_col = ColumnNullable::create(std::move(idx_data), std::move(idx_null_map)); + auto idx_type = make_nullable(std::make_shared()); + + auto result_type = make_nullable(std::make_shared()); + auto result = run_element_at(const_arr, arr_type, std::move(idx_col), idx_type, result_type, N); + + ASSERT_EQ(result->size(), N); + const auto& nr = assert_cast(*result); + const auto& data = assert_cast(nr.get_nested_column()); + + const Int32 expected_vals[4] = {100, 200, 300, 0 /*null*/}; + const bool expected_null[4] = {false, false, false, true}; + for (size_t i = 0; i < N; ++i) { + size_t slot = i % 4; + ASSERT_EQ(nr.is_null_at(i), expected_null[slot]) << "row " << i; + if (!expected_null[slot]) { + ASSERT_EQ(data.get_element(i), expected_vals[slot]) << "row " << i; + } + } +} + } // namespace doris