diff --git a/be/src/agent/be_exec_version_manager.cpp b/be/src/agent/be_exec_version_manager.cpp index bfd0745e3166b2..9ad9d9e9a5949a 100644 --- a/be/src/agent/be_exec_version_manager.cpp +++ b/be/src/agent/be_exec_version_manager.cpp @@ -110,10 +110,22 @@ void BeExecVersionManager::check_function_compatibility(int current_be_exec_vers * * 7: start from doris 3.0.2 * a. window funnel logic change -* b. support const column in serialize/deserialize function: PR #41175 + * b. support const column in serialize/deserialize function: PR #41175 */ -const int BeExecVersionManager::max_be_exec_version = 8; +// ///////////////////////////////////////////////////////////////////////////// +// ATTN: !!! BE EXEC VERSION IS A VERY SENSITIVE COMPATIBILITY FIELD !!! +// 1. We should avoid abusing be_exec_version, especially not using it to handle +// compatibility issues of functions (use function aliases for that instead). +// 2. Do not fork versions in past releases; all new be exec versions should +// first go into master before entering new release versions. +// !!! DO NOT CHANGE IT UNLESS YOU ARE 100% SURE WHAT YOU ARE DOING !!! +// ///////////////////////////////////////////////////////////////////////////// + +// 10: start from doris 4.0.3 +// a. use new fixed object serialization way. + +const int BeExecVersionManager::max_be_exec_version = 10; const int BeExecVersionManager::min_be_exec_version = 0; std::map> BeExecVersionManager::_function_change_map {}; std::set BeExecVersionManager::_function_restrict_map; diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index ffde283f43364b..7ced5b9749ed5a 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -32,6 +32,7 @@ constexpr inline int AGGREGATION_2_1_VERSION = 6; // some aggregation changed the data format after this version constexpr inline int USE_CONST_SERDE = 8; // support const column in serialize/deserialize function: PR #41175 +constexpr inline int USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION = 10; class BeExecVersionManager { public: diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index aca5c500d19838..fee0f4c879c55f 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -302,9 +302,6 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b int col_id = AggSharedState::get_slot_column_id( Base::_shared_state->aggregate_evaluators[i]); auto column = block->get_by_position(col_id).column; - if (column->is_nullable()) { - column = ((vectorized::ColumnNullable*)column.get())->get_nested_column_ptr(); - } size_t buffer_size = Base::_shared_state->aggregate_evaluators[i]->function()->size_of_data() * @@ -354,10 +351,6 @@ Status AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b Base::_shared_state->aggregate_evaluators[i]); } auto column = block->get_by_position(col_id).column; - if (column->is_nullable()) { - column = ((vectorized::ColumnNullable*)column.get()) - ->get_nested_column_ptr(); - } size_t buffer_size = Base::_shared_state->aggregate_evaluators[i] ->function() @@ -412,9 +405,6 @@ Status AggSinkLocalState::_merge_without_key(vectorized::Block* block) { int col_id = AggSharedState::get_slot_column_id( Base::_shared_state->aggregate_evaluators[i]); auto column = block->get_by_position(col_id).column; - if (column->is_nullable()) { - column = ((vectorized::ColumnNullable*)column.get())->get_nested_column_ptr(); - } SCOPED_TIMER(_deserialize_data_timer); Base::_shared_state->aggregate_evaluators[i] @@ -731,8 +721,7 @@ AggSinkOperatorX::AggSinkOperatorX(ObjectPool* pool, int operator_id, int dest_i _is_first_phase(tnode.agg_node.__isset.is_first_phase && tnode.agg_node.is_first_phase), _pool(pool), _limit(tnode.limit), - _have_conjuncts((tnode.__isset.vconjunct && !tnode.vconjunct.nodes.empty()) || - (tnode.__isset.conjuncts && !tnode.conjuncts.empty())), + _have_conjuncts(tnode.__isset.conjuncts && !tnode.conjuncts.empty()), _is_colocate(tnode.agg_node.__isset.is_colocate && tnode.agg_node.is_colocate), _agg_fn_output_row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples) {} diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp b/be/src/pipeline/exec/aggregation_source_operator.cpp index a8062b3c2f5160..4dfc9be8b62176 100644 --- a/be/src/pipeline/exec/aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/aggregation_source_operator.cpp @@ -500,9 +500,6 @@ Status AggLocalState::merge_with_serialized_key_helper(vectorized::Block* block) for (int i = 0; i < Base::_shared_state->aggregate_evaluators.size(); ++i) { auto col_id = Base::_shared_state->probe_expr_ctxs.size() + i; auto column = block->get_by_position(col_id).column; - if (column->is_nullable()) { - column = ((vectorized::ColumnNullable*)column.get())->get_nested_column_ptr(); - } size_t buffer_size = Base::_shared_state->aggregate_evaluators[i]->function()->size_of_data() * rows; diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index ccbcb38c1ad151..ff874075602749 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -174,7 +174,7 @@ std::string OperatorXBase::debug_string(RuntimeState* state, int indentation_lev return state->get_local_state(operator_id())->debug_string(indentation_level); } -Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* /*state*/) { +Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* state) { std::string node_name = print_plan_node_type(tnode.node_type); _nereids_id = tnode.nereids_id; if (!tnode.intermediate_output_tuple_id_list.empty()) { @@ -194,11 +194,9 @@ Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* /*state*/) { _op_name = substr + "_OPERATOR"; if (tnode.__isset.vconjunct) { - vectorized::VExprContextSPtr context; - RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(tnode.vconjunct, context)); - _conjuncts.emplace_back(context); + return Status::InternalError("vconjunct is not supported yet"); } else if (tnode.__isset.conjuncts) { - for (auto& conjunct : tnode.conjuncts) { + for (const auto& conjunct : tnode.conjuncts) { vectorized::VExprContextSPtr context; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(conjunct, context)); _conjuncts.emplace_back(context); @@ -206,7 +204,6 @@ Status OperatorXBase::init(const TPlanNode& tnode, RuntimeState* /*state*/) { } // create the projections expr - if (tnode.__isset.projections) { DCHECK(tnode.__isset.output_tuple_id); RETURN_IF_ERROR(vectorized::VExpr::create_expr_trees(tnode.projections, _projections)); @@ -227,6 +224,12 @@ Status OperatorXBase::prepare(RuntimeState* state) { for (auto& conjunct : _conjuncts) { RETURN_IF_ERROR(conjunct->prepare(state, intermediate_row_desc())); } + if (state->enable_adjust_conjunct_order_by_cost()) { + std::ranges::sort(_conjuncts, [](const auto& a, const auto& b) { + return a->execute_cost() < b->execute_cost(); + }); + }; + for (int i = 0; i < _intermediate_projections.size(); i++) { RETURN_IF_ERROR(vectorized::VExpr::prepare(_intermediate_projections[i], state, intermediate_row_desc(i))); diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index a1a7e1fb461cb9..fe42afba15aefe 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -77,8 +77,14 @@ Status ScanLocalStateBase::update_late_arrival_runtime_filter(RuntimeState* stat int& arrived_rf_num) { // Lock needed because _conjuncts can be accessed concurrently by multiple scanner threads std::unique_lock lock(_conjuncts_lock); - return _helper.try_append_late_arrival_runtime_filter(state, _parent->row_descriptor(), - arrived_rf_num, _conjuncts); + RETURN_IF_ERROR(_helper.try_append_late_arrival_runtime_filter(state, _parent->row_descriptor(), + arrived_rf_num, _conjuncts)); + if (state->enable_adjust_conjunct_order_by_cost()) { + std::ranges::sort(_conjuncts, [](const auto& a, const auto& b) { + return a->execute_cost() < b->execute_cost(); + }); + }; + return Status::OK(); } Status ScanLocalStateBase::clone_conjunct_ctxs(vectorized::VExprContextSPtrs& scanner_conjuncts) { @@ -323,7 +329,7 @@ Status ScanLocalState::_normalize_conjuncts(RuntimeState* state) { message += conjunct->root()->debug_string(); } } - custom_profile()->add_info_string("RemainedDownPredicates", message); + custom_profile()->add_info_string("RemainedPredicates", message); } for (auto& it : _slot_id_to_value_range) { diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_operator.cpp index 33ee9a785847e0..534de501fa83bf 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/streaming_aggregation_operator.cpp @@ -786,7 +786,6 @@ StreamingAggOperatorX::StreamingAggOperatorX(ObjectPool* pool, int operator_id, _output_tuple_id(tnode.agg_node.output_tuple_id), _needs_finalize(tnode.agg_node.need_finalize), _is_first_phase(tnode.agg_node.__isset.is_first_phase && tnode.agg_node.is_first_phase), - _have_conjuncts(tnode.__isset.vconjunct && !tnode.vconjunct.nodes.empty()), _agg_fn_output_row_descriptor(descs, tnode.row_tuples, tnode.nullable_tuples) {} void StreamingAggOperatorX::update_operator(const TPlanNode& tnode, diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.h b/be/src/pipeline/exec/streaming_aggregation_operator.h index 3632bea9430e25..3612adec47d431 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.h +++ b/be/src/pipeline/exec/streaming_aggregation_operator.h @@ -270,7 +270,6 @@ class StreamingAggOperatorX MOCK_REMOVE(final) : public StatefulOperatorX _aggregate_evaluators; bool _can_short_circuit = false; std::vector _make_nullable_keys; - bool _have_conjuncts; RowDescriptor _agg_fn_output_row_descriptor; // For sort limit bool _do_sort_limit = false; diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 24543646cff123..8d61ada801ef81 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -127,6 +127,11 @@ class RuntimeState { : _query_options.mem_limit / 20; } + bool enable_adjust_conjunct_order_by_cost() const { + return _query_options.__isset.enable_adjust_conjunct_order_by_cost && + _query_options.enable_adjust_conjunct_order_by_cost; + } + int32_t max_column_reader_num() const { return _query_options.__isset.max_column_reader_num ? _query_options.max_column_reader_num : 20000; @@ -575,6 +580,11 @@ class RuntimeState { return _query_options.__isset.enable_parallel_scan && _query_options.enable_parallel_scan; } + bool enable_aggregate_function_null_v2() const { + return _query_options.__isset.enable_aggregate_function_null_v2 && + _query_options.enable_aggregate_function_null_v2; + } + bool is_read_csv_empty_line_as_null() const { return _query_options.__isset.read_csv_empty_line_as_null && _query_options.read_csv_empty_line_as_null; diff --git a/be/src/util/hash_util.hpp b/be/src/util/hash_util.hpp index 9c5d4ef3aca539..9371f8867ca9d4 100644 --- a/be/src/util/hash_util.hpp +++ b/be/src/util/hash_util.hpp @@ -34,20 +34,93 @@ #include "util/hash/city.h" #include "util/murmur_hash3.h" #include "util/sse_util.hpp" +#include "vec/common/endian.h" namespace doris { #include "common/compile_check_begin.h" +namespace detail { +// Slicing-by-4 table: t[0] is the standard byte-at-a-time table, +// t[1..3] are extended tables for parallel 4-byte processing. +struct CRC32SliceBy4Table { + uint32_t t[4][256] {}; + constexpr CRC32SliceBy4Table() { + // t[0]: standard CRC32 lookup table + for (uint32_t i = 0; i < 256; i++) { + uint32_t c = i; + for (int j = 0; j < 8; j++) { + c = (c & 1) ? ((c >> 1) ^ 0xEDB88320U) : (c >> 1); + } + t[0][i] = c; + } + // t[1..3]: each entry is one additional CRC byte-step applied to t[k-1] + for (uint32_t i = 0; i < 256; i++) { + uint32_t c = t[0][i]; + for (int k = 1; k < 4; k++) { + c = t[0][c & 0xFF] ^ (c >> 8); + t[k][i] = c; + } + } + } +}; +} // namespace detail + // Utility class to compute hash values. class HashUtil { +private: + static inline constexpr detail::CRC32SliceBy4Table CRC32_TABLE {}; + public: static uint32_t zlib_crc_hash(const void* data, uint32_t bytes, uint32_t hash) { return (uint32_t)crc32(hash, (const unsigned char*)data, bytes); } + // Inline CRC32 (zlib-compatible, standard CRC32 polynomial) for fixed-size types. + // Uses Slicing-by-4 technique for 4/8-byte types: processes 4 bytes at a time using + // 4 precomputed lookup tables, reducing serial table lookups from 4 to 1 per 4-byte chunk. + // Polynomial: 0xEDB88320 (reflected form of 0x04C11DB7). + // Endian note: CRC32 reflected algorithm processes bytes in address order (byte[0] first). + // Slicing-by-4 requires byte[0] at LSB of the loaded uint32_t, which is little-endian layout. + // LittleEndian::Load32 provides this on ALL platforms: noop on LE, bswap on BE. + template + static uint32_t zlib_crc32_fixed(const T& value, uint32_t hash) { + const auto* p = reinterpret_cast(&value); + // zlib convention: pre/post XOR with 0xFFFFFFFF + uint32_t crc = hash ^ 0xFFFFFFFFU; + + if constexpr (sizeof(T) == 1) { + // 1 byte: single table lookup + crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8); + } else if constexpr (sizeof(T) == 2) { + // 2 bytes: two sequential table lookups (slicing doesn't help below 4 bytes) + crc = CRC32_TABLE.t[0][(crc ^ p[0]) & 0xFF] ^ (crc >> 8); + crc = CRC32_TABLE.t[0][(crc ^ p[1]) & 0xFF] ^ (crc >> 8); + } else if constexpr (sizeof(T) == 4) { + // 4 bytes: one Slicing-by-4 step — 4 independent lookups in parallel + // LittleEndian::Load32 handles unaligned load + byte-swap on big-endian, + // ensuring byte[0] is always at LSB for correct CRC byte processing order. + uint32_t word = LittleEndian::Load32(p) ^ crc; + crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^ + CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF]; + } else if constexpr (sizeof(T) == 8) { + // 8 bytes: two Slicing-by-4 steps + uint32_t word = LittleEndian::Load32(p) ^ crc; + crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^ + CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF]; + + word = LittleEndian::Load32(p + 4) ^ crc; + crc = CRC32_TABLE.t[3][(word)&0xFF] ^ CRC32_TABLE.t[2][(word >> 8) & 0xFF] ^ + CRC32_TABLE.t[1][(word >> 16) & 0xFF] ^ CRC32_TABLE.t[0][(word >> 24) & 0xFF]; + } else { + // Fallback to zlib for larger/unusual types + return (uint32_t)crc32(hash, (const unsigned char*)&value, sizeof(T)); + } + return crc ^ 0xFFFFFFFFU; + } + static uint32_t zlib_crc_hash_null(uint32_t hash) { // null is treat as 0 when hash static const int INT_VALUE = 0; - return (uint32_t)crc32(hash, (const unsigned char*)(&INT_VALUE), 4); + return zlib_crc32_fixed(INT_VALUE, hash); } template diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index cdcef2f248f593..a0181210ac287d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -46,6 +46,7 @@ class IDataType; struct AggregateFunctionAttr { bool is_window_function {false}; bool is_foreach {false}; + bool enable_aggregate_function_null_v2 {false}; std::vector column_names; }; @@ -131,12 +132,13 @@ class IAggregateFunction { virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena&) const = 0; - virtual void merge_vec(const AggregateDataPtr* places, size_t offset, ConstAggregateDataPtr rhs, - Arena&, const size_t num_rows) const = 0; + virtual void merge_vec(const AggregateDataPtr __restrict* __restrict places, size_t offset, + ConstAggregateDataPtr __restrict rhs, Arena&, + const size_t num_rows) const = 0; // same as merge_vec, but only call "merge" function when place is not nullptr - virtual void merge_vec_selected(const AggregateDataPtr* places, size_t offset, - ConstAggregateDataPtr rhs, Arena&, + virtual void merge_vec_selected(const AggregateDataPtr __restrict* __restrict places, + size_t offset, ConstAggregateDataPtr __restrict rhs, Arena&, const size_t num_rows) const = 0; /// Serializes state (to transmit it over the network, for example). @@ -166,9 +168,6 @@ class IAggregateFunction { AggregateDataPtr rhs, const IColumn* column, Arena&, const size_t num_rows) const = 0; - virtual void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const = 0; - /// Deserializes state and merge it with current aggregation function. virtual void deserialize_and_merge(AggregateDataPtr __restrict place, AggregateDataPtr __restrict rhs, BufferReadable& buf, @@ -178,8 +177,10 @@ class IAggregateFunction { const IColumn& column, size_t begin, size_t end, Arena&) const = 0; - virtual void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, - const IColumn& column, Arena&) const = 0; + void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, + Arena& arena) const { + deserialize_and_merge_from_column_range(place, column, 0, column.size() - 1, arena); + } /// Inserts results into a column. // todo: Consider whether this passes a ConstAggregateDataPtr @@ -218,9 +219,6 @@ class IAggregateFunction { Arena& arena, UInt8* use_null_result, UInt8* could_use_previous_result) const = 0; - virtual void streaming_agg_serialize(const IColumn** columns, BufferWritable& buf, - const size_t num_rows, Arena&) const = 0; - virtual void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, const size_t num_rows, Arena&) const = 0; @@ -309,6 +307,9 @@ class IAggregateFunctionHelper : public IAggregateFunction { void destroy_vec(AggregateDataPtr __restrict place, const size_t num_rows) const noexcept override { + if (is_trivial()) { + return; + } const size_t size_of_data_ = size_of_data(); const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { @@ -419,8 +420,9 @@ class IAggregateFunctionHelper : public IAggregateFunction { serialize_vec(places, offset, writer, num_rows); } - void streaming_agg_serialize(const IColumn** columns, BufferWritable& buf, - const size_t num_rows, Arena& arena) const override { + void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, + const size_t num_rows, Arena& arena) const override { + VectorBufferWriter buf(assert_cast(*dst)); std::vector place(size_of_data()); const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { @@ -432,12 +434,6 @@ class IAggregateFunctionHelper : public IAggregateFunction { } } - void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, - const size_t num_rows, Arena& arena) const override { - VectorBufferWriter writer(assert_cast(*dst)); - streaming_agg_serialize(columns, writer, num_rows, arena); - } - void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, IColumn& to) const override { VectorBufferWriter writter(assert_cast(to)); @@ -516,13 +512,9 @@ class IAggregateFunctionHelper : public IAggregateFunction { derived->destroy_vec(rhs, num_rows); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena& arena, - size_t num_rows) const override { - deserialize_vec(places, assert_cast(&column), arena, num_rows); - } - - void merge_vec(const AggregateDataPtr* places, size_t offset, ConstAggregateDataPtr rhs, - Arena& arena, const size_t num_rows) const override { + void merge_vec(const AggregateDataPtr __restrict* __restrict places, size_t offset, + ConstAggregateDataPtr __restrict rhs, Arena& arena, + const size_t num_rows) const override { const auto* derived = assert_cast(this); const auto size_of_data = derived->size_of_data(); for (size_t i = 0; i != num_rows; ++i) { @@ -530,8 +522,8 @@ class IAggregateFunctionHelper : public IAggregateFunction { } } - void merge_vec_selected(const AggregateDataPtr* places, size_t offset, - ConstAggregateDataPtr rhs, Arena& arena, + void merge_vec_selected(const AggregateDataPtr __restrict* __restrict places, size_t offset, + ConstAggregateDataPtr __restrict rhs, Arena& arena, const size_t num_rows) const override { const auto* derived = assert_cast(this); const auto size_of_data = derived->size_of_data(); @@ -561,14 +553,6 @@ class IAggregateFunctionHelper : public IAggregateFunction { } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena& arena) const override { - if (column.empty()) { - return; - } - deserialize_and_merge_from_column_range(place, column, 0, column.size() - 1, arena); - } - void deserialize_and_merge(AggregateDataPtr __restrict place, AggregateDataPtr __restrict rhs, BufferReadable& buf, Arena& arena) const override { assert_cast(this)->deserialize(rhs, buf, diff --git a/be/src/vec/aggregate_functions/aggregate_function_array_agg.h b/be/src/vec/aggregate_functions/aggregate_function_array_agg.h index 9831c140e51ca2..439c18d5ac6f1c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_array_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_array_agg.h @@ -324,14 +324,6 @@ class AggregateFunctionArrayAgg this->data(place).insert_result_into(to); } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena& arena) const override { - const size_t num_rows = column.size(); - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).deserialize_and_merge(column, i); - } - } - void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { @@ -340,13 +332,6 @@ class AggregateFunctionArrayAgg } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena& arena, - size_t num_rows) const override { - for (size_t i = 0; i != num_rows; ++i) { - this->data(places).deserialize_and_merge(column, i); - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena& arena) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h index a0f3d25051c5f8..98ee71b0c5db2e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h @@ -56,24 +56,13 @@ template struct AggregateFunctionAvgData { using ResultType = typename PrimitiveTypeTraits::CppType; static constexpr PrimitiveType ResultPType = T; - typename PrimitiveTypeTraits::CppType sum {}; + ResultType sum {}; UInt64 count = 0; - AggregateFunctionAvgData& operator=(const AggregateFunctionAvgData& src) { - sum = src.sum; - count = src.count; - return *this; - } + AggregateFunctionAvgData& operator=(const AggregateFunctionAvgData& src) = default; template - ResultT result() const { - if constexpr (std::is_floating_point_v) { - if constexpr (std::numeric_limits::is_iec559) { - return static_cast(sum) / - static_cast(count); /// allow division by zero - } - } - + ResultT result(ResultType multiplier) const { if (!count) { // null is handled in AggregationNode::_get_without_key_result return static_cast(sum); @@ -81,18 +70,35 @@ struct AggregateFunctionAvgData { // to keep the same result with row vesion; see AggregateFunctions::decimalv2_avg_get_value if constexpr (T == TYPE_DECIMALV2 && IsDecimalV2) { DecimalV2Value decimal_val_count(count, 0); - DecimalV2Value decimal_val_sum(sum); + DecimalV2Value decimal_val_sum(sum * multiplier); DecimalV2Value cal_ret = decimal_val_sum / decimal_val_count; return cal_ret; } else { if constexpr (T == TYPE_DECIMAL256) { - return static_cast(sum / typename PrimitiveTypeTraits::CppType(count)); + return static_cast(sum * multiplier / + typename PrimitiveTypeTraits::CppType(count)); } else { - return static_cast(sum) / static_cast(count); + return static_cast(sum * multiplier) / static_cast(count); } } } + template + ResultT result() const { + if constexpr (std::is_floating_point_v) { + if constexpr (std::numeric_limits::is_iec559) { + return static_cast(sum) / + static_cast(count); /// allow division by zero + } + } + + if (!count) { + // null is handled in AggregationNode::_get_without_key_result + return static_cast(sum); + } + return static_cast(sum) / static_cast(count); + } + void write(BufferWritable& buf) const { buf.write_binary(sum); buf.write_binary(count); @@ -129,17 +135,29 @@ class AggregateFunctionAvg final // an implicit cast to float. using DataType = typename Data::ResultType; + + // consistent with fe/fe-common/src/main/java/org/apache/doris/catalog/ScalarType.java + static constexpr uint32_t DEFAULT_MIN_AVG_DECIMAL128_SCALE = 4; + /// ctor for native types AggregateFunctionAvg(const DataTypes& argument_types_) : IAggregateFunctionDataHelper>( argument_types_), - scale(get_decimal_scale(*argument_types_[0])) {} + output_scale(std::max(DEFAULT_MIN_AVG_DECIMAL128_SCALE, + get_decimal_scale(*argument_types_[0]))) { + if constexpr (is_decimal(T)) { + multiplier = ResultType(ResultDataType::get_scale_multiplier( + output_scale - get_decimal_scale(*argument_types_[0]))); + } + } String get_name() const override { return "avg"; } DataTypePtr get_return_type() const override { if constexpr (is_decimal(T)) { - return std::make_shared(ResultDataType::max_precision(), scale); + return std::make_shared( + ResultDataType::max_precision(), + std::max(DEFAULT_MIN_AVG_DECIMAL128_SCALE, output_scale)); } else { return std::make_shared(); } @@ -209,15 +227,11 @@ class AggregateFunctionAvg final void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { auto& column = assert_cast(to); - column.get_data().push_back(this->data(place).template result()); - } - - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - auto& col = assert_cast(column); - DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; - auto* data = col.get_data().data(); - memcpy(places, data, sizeof(Data) * num_rows); + if constexpr (is_decimal(T)) { + column.get_data().push_back(this->data(place).template result(multiplier)); + } else { + column.get_data().push_back(this->data(place).template result()); + } } void serialize_to_column(const std::vector& places, size_t offset, @@ -246,20 +260,6 @@ class AggregateFunctionAvg final } } - NO_SANITIZE_UNDEFINED void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, - const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; - auto* data = reinterpret_cast(col.get_data().data()); - - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).sum += data[i].sum; - this->data(place).count += data[i].count; - } - } - NO_SANITIZE_UNDEFINED void deserialize_and_merge_from_column_range( AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { @@ -276,15 +276,17 @@ class AggregateFunctionAvg final void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - this->merge_vec(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec(places, offset, AggregateDataPtr(data), arena, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec_selected(places, offset, AggregateDataPtr(data), arena, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, @@ -355,7 +357,8 @@ class AggregateFunctionAvg final } private: - UInt32 scale; + uint32_t output_scale; + ResultType multiplier; }; } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index a17f1a42ffb7fe..18a1d0480c5e75 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -187,17 +187,6 @@ class AggregateFunctionBitmapSerializationHelper } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - const auto& col = assert_cast(column); - const size_t num_rows = column.size(); - const auto* data = col.get_data().data(); - - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).merge(data[i]); - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index 473b557f70dd65..b0cf1c6aa8d577 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -146,17 +146,6 @@ class AggregateFunctionBitmapAgg final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - auto& col = assert_cast(column); - DCHECK(col.size() >= num_rows) << "source column's size should greater than num_rows"; - auto* src = col.get_data().data(); - auto* data = &(this->data(places)); - for (size_t i = 0; i != num_rows; ++i) { - data[i].value = src[i]; - } - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { auto& col = assert_cast(*dst); @@ -167,17 +156,6 @@ class AggregateFunctionBitmapAgg final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - auto* data = col.get_data().data(); - - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).value |= data[i]; - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h index 2fc31b0360b22e..08f753da91e093 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count.h @@ -61,6 +61,8 @@ class AggregateFunctionCount final DataTypePtr get_return_type() const override { return std::make_shared(); } + bool is_trivial() const override { return true; } + void add(AggregateDataPtr __restrict place, const IColumn**, ssize_t, Arena&) const override { ++data(place).count; } @@ -87,12 +89,6 @@ class AggregateFunctionCount final assert_cast(to).get_data().push_back(data(place).count); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - auto data = assert_cast(column).get_data().data(); - memcpy(places, data, sizeof(Data) * num_rows); - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { auto& col = assert_cast(*dst); @@ -119,16 +115,6 @@ class AggregateFunctionCount final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - auto* data = reinterpret_cast(col.get_data().data()); - for (size_t i = 0; i != num_rows; ++i) { - AggregateFunctionCount::data(place).count += data[i].count; - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { @@ -144,17 +130,17 @@ class AggregateFunctionCount final void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec(places, offset, AggregateDataPtr(data), arena, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec_selected(places, offset, AggregateDataPtr(data), arena, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, @@ -207,6 +193,8 @@ class AggregateFunctionCountNotNullUnary final DataTypePtr get_return_type() const override { return std::make_shared(); } + bool is_trivial() const override { return true; } + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena&) const override { data(place).count += @@ -242,12 +230,6 @@ class AggregateFunctionCountNotNullUnary final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - auto data = assert_cast(column).get_data().data(); - memcpy(places, data, sizeof(Data) * num_rows); - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { auto& col = assert_cast(*dst); @@ -275,16 +257,6 @@ class AggregateFunctionCountNotNullUnary final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - auto* data = reinterpret_cast(col.get_data().data()); - for (size_t i = 0; i != num_rows; ++i) { - AggregateFunctionCountNotNullUnary::data(place).count += data[i].count; - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { @@ -301,17 +273,17 @@ class AggregateFunctionCountNotNullUnary final void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec(places, offset, AggregateDataPtr(data), arena, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec_selected(places, offset, AggregateDataPtr(data), arena, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index f30b65bbb41cd0..5e162ef7f35dd1 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -261,16 +261,6 @@ class AggregateFunctionMapAgg final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - const auto& col = assert_cast(column); - auto* data = &(this->data(places)); - for (size_t i = 0; i != num_rows; ++i) { - auto map = col[i].get(); - data->add(map[0], map[1]); - } - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { for (size_t i = 0; i != num_rows; ++i) { @@ -279,16 +269,6 @@ class AggregateFunctionMapAgg final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - for (size_t i = 0; i != num_rows; ++i) { - auto map = col[i].get(); - this->data(place).add(map[0], map[1]); - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_map_v2.h b/be/src/vec/aggregate_functions/aggregate_function_map_v2.h index 0c806219dc335c..a56c54b429186d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map_v2.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map_v2.h @@ -235,16 +235,6 @@ class AggregateFunctionMapAggV2 final } } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - const auto& col = assert_cast(column); - auto* data = &(this->data(places)); - for (size_t i = 0; i != num_rows; ++i) { - auto map = col[i].get(); - data->add(map[0], map[1]); - } - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { for (size_t i = 0; i != num_rows; ++i) { @@ -253,16 +243,6 @@ class AggregateFunctionMapAggV2 final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - const auto& col = assert_cast(column); - const size_t num_rows = column.size(); - for (size_t i = 0; i != num_rows; ++i) { - auto map = col[i].get(); - this->data(place).add(map[0], map[1]); - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.h b/be/src/vec/aggregate_functions/aggregate_function_min_max.h index a2a868ad64bcbe..b39b30760929c7 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.h @@ -704,6 +704,8 @@ class AggregateFunctionsSingleValue final DataTypePtr get_return_type() const override { return type; } + bool is_trivial() const override { return Data::IsFixedLength; } + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena& arena) const override { this->data(place).change_if_better(*columns[0], row_num, arena); @@ -739,20 +741,6 @@ class AggregateFunctionsSingleValue final this->data(place).insert_result_into(to); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena& arena, - size_t num_rows) const override { - if constexpr (Data::IsFixedLength) { - const auto& col = assert_cast(column); - auto* column_data = reinterpret_cast(col.get_data().data()); - Data* data = reinterpret_cast(places); - for (size_t i = 0; i != num_rows; ++i) { - data[i] = column_data[i]; - } - } else { - Base::deserialize_from_column(places, column, arena, num_rows); - } - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { if constexpr (Data::IsFixedLength) { @@ -781,20 +769,6 @@ class AggregateFunctionsSingleValue final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena& arena) const override { - if constexpr (Data::IsFixedLength) { - const auto& col = assert_cast(column); - auto* column_data = reinterpret_cast(col.get_data().data()); - const size_t num_rows = column.size(); - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).change_if_better(column_data[i], arena); - } - } else { - Base::deserialize_and_merge_from_column(place, column, arena); - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena& arena) const override { @@ -814,17 +788,29 @@ class AggregateFunctionsSingleValue final void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec(places, offset, rhs, arena, num_rows); + if constexpr (Data::IsFixedLength) { + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec(places, offset, AggregateDataPtr(data), arena, num_rows); + } else { + this->deserialize_vec(rhs, assert_cast(column), arena, num_rows); + DEFER({ this->destroy_vec(rhs, num_rows); }); + this->merge_vec(places, offset, rhs, arena, num_rows); + } } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - DEFER({ this->destroy_vec(rhs, num_rows); }); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + if constexpr (Data::IsFixedLength) { + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec_selected(places, offset, AggregateDataPtr(data), arena, num_rows); + } else { + this->deserialize_vec(rhs, assert_cast(column), arena, num_rows); + DEFER({ this->destroy_vec(rhs, num_rows); }); + this->merge_vec_selected(places, offset, rhs, arena, num_rows); + } } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_null_v2.h b/be/src/vec/aggregate_functions/aggregate_function_null_v2.h new file mode 100644 index 00000000000000..21c605b24d03c8 --- /dev/null +++ b/be/src/vec/aggregate_functions/aggregate_function_null_v2.h @@ -0,0 +1,600 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNull.h +// and modified by Doris + +#pragma once + +#include + +#include + +#include "common/logging.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/aggregate_functions/aggregate_function_distinct.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/common/assert_cast.h" +#include "vec/common/string_buffer.hpp" +#include "vec/core/types.h" +#include "vec/data_types/data_type_nullable.h" + +namespace doris::vectorized { +#include "common/compile_check_begin.h" + +template +class AggregateFunctionNullBaseInlineV2 : public IAggregateFunctionHelper { +protected: + std::unique_ptr nested_function; + size_t prefix_size; + bool is_window_function = false; + + AggregateDataPtr nested_place(AggregateDataPtr __restrict place) const noexcept { + return place + prefix_size; + } + + ConstAggregateDataPtr nested_place(ConstAggregateDataPtr __restrict place) const noexcept { + return place + prefix_size; + } + + static void init(AggregateDataPtr __restrict place, bool is_window_function) noexcept { + init_flag(place); + init_null_count(place, is_window_function); + } + + static void init_flag(AggregateDataPtr __restrict place) noexcept { + if constexpr (result_is_nullable) { + place[0] = false; + } + } + + static void set_flag(AggregateDataPtr __restrict place) noexcept { + if constexpr (result_is_nullable) { + place[0] = true; + } + } + + static bool get_flag(ConstAggregateDataPtr __restrict place) noexcept { + return result_is_nullable ? place[0] : true; + } + + static void init_null_count(AggregateDataPtr __restrict place, + bool is_window_function) noexcept { + if (is_window_function && result_is_nullable) { + unaligned_store(place + 1, 0); + } + } + + static void update_null_count(AggregateDataPtr __restrict place, bool incremental, + bool is_window_function) noexcept { + if (is_window_function && result_is_nullable) { + auto null_count = unaligned_load(place + 1); + incremental ? null_count++ : null_count--; + unaligned_store(place + 1, null_count); + } + } + + static int32_t get_null_count(ConstAggregateDataPtr __restrict place, + bool is_window_function) noexcept { + int32_t num = 0; + if (is_window_function && result_is_nullable) { + num = unaligned_load(place + 1); + } + return num; + } + +public: + AggregateFunctionNullBaseInlineV2(IAggregateFunction* nested_function_, + const DataTypes& arguments, bool is_window_function_) + : IAggregateFunctionHelper(arguments), + nested_function {assert_cast(nested_function_)}, + is_window_function(is_window_function_) { + DCHECK(nested_function_ != nullptr); + if constexpr (result_is_nullable) { + if (this->is_window_function) { + // flag|---null_count----|-------padding-------|--nested_data----| + size_t nested_align = nested_function->align_of_data(); + prefix_size = 1 + sizeof(int32_t); + if (prefix_size % nested_align != 0) { + prefix_size += (nested_align - (prefix_size % nested_align)); + } + } else { + prefix_size = nested_function->align_of_data(); + } + } else { + prefix_size = 0; + } + } + + MutableColumnPtr create_serialize_column() const override { + if constexpr (result_is_nullable) { + return ColumnNullable::create(nested_function->create_serialize_column(), + ColumnUInt8::create()); + } + return nested_function->create_serialize_column(); + } + + DataTypePtr get_serialized_type() const override { + if constexpr (result_is_nullable) { + return make_nullable(nested_function->get_serialized_type()); + } + return nested_function->get_serialized_type(); + } + + void set_query_context(QueryContext* ctx) override { + return nested_function->set_query_context(ctx); + } + + bool is_blockable() const override { return nested_function->is_blockable(); } + + void set_version(const int version_) override { + IAggregateFunctionHelper::set_version(version_); + nested_function->set_version(version_); + } + + String get_name() const override { return "NullableV2(" + nested_function->get_name() + ")"; } + + DataTypePtr get_return_type() const override { + return result_is_nullable ? make_nullable(nested_function->get_return_type()) + : nested_function->get_return_type(); + } + + void create(AggregateDataPtr __restrict place) const override { + init(place, this->is_window_function); + nested_function->create(nested_place(place)); + } + + void destroy(AggregateDataPtr __restrict place) const noexcept override { + nested_function->destroy(nested_place(place)); + } + void reset(AggregateDataPtr place) const override { + init(place, this->is_window_function); + nested_function->reset(nested_place(place)); + } + + bool is_trivial() const override { return false; } + + size_t size_of_data() const override { return prefix_size + nested_function->size_of_data(); } + + size_t align_of_data() const override { + if (this->is_window_function && result_is_nullable) { + return std::max(nested_function->align_of_data(), alignof(int32_t)); + } else { + return nested_function->align_of_data(); + } + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, + Arena& arena) const override { + if (get_flag(rhs)) { + set_flag(place); + nested_function->merge(nested_place(place), nested_place(rhs), arena); + } + } + + void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { + bool flag = get_flag(place); + if constexpr (result_is_nullable) { + buf.write_binary(flag); + } + if (flag) { + nested_function->serialize(nested_place(place), buf); + } + } + + void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, + Arena& arena) const override { + bool flag = true; + if constexpr (result_is_nullable) { + buf.read_binary(flag); + } + if (flag) { + set_flag(place); + nested_function->deserialize(nested_place(place), buf, arena); + } + } + + void serialize_to_column(const std::vector& places, size_t offset, + MutableColumnPtr& dst, const size_t num_rows) const override { + if constexpr (result_is_nullable) { + auto& nullable_col = assert_cast(*dst); + auto& nested_col = nullable_col.get_nested_column(); + auto& null_map = nullable_col.get_null_map_data(); + MutableColumnPtr nested_col_ptr = nested_col.assume_mutable(); + + null_map.resize(num_rows); + uint8_t* __restrict null_map_data = null_map.data(); + for (size_t i = 0; i < num_rows; ++i) { + null_map_data[i] = !get_flag(places[i] + offset); + } + nested_function->serialize_to_column(places, offset + prefix_size, nested_col_ptr, + num_rows); + } else { + nested_function->serialize_to_column(places, offset, dst, num_rows); + } + } + + void streaming_agg_serialize_to_column(const IColumn** columns, MutableColumnPtr& dst, + const size_t num_rows, Arena& arena) const override { + const auto* src_nullable_col = assert_cast(columns[0]); + const auto* __restrict src_null_map_data = src_nullable_col->get_null_map_data().data(); + + size_t nested_size = nested_function->size_of_data(); + std::vector nested_places(num_rows); + std::vector places_data(num_rows * nested_size); + for (size_t i = 0; i < num_rows; ++i) { + nested_places[i] = places_data.data() + i * nested_size; + } + + if (!nested_function->is_trivial()) { + for (int i = 0; i < num_rows; ++i) { + try { + nested_function->create(nested_places[i]); + } catch (...) { + for (int j = 0; j < i; ++j) { + nested_function->destroy(nested_places[j]); + } + throw; + } + } + } + Defer destroy_places = {[&]() { + if (!nested_function->is_trivial()) { + for (int i = 0; i < num_rows; ++i) { + nested_function->destroy(nested_places[i]); + } + } + }}; + const IColumn* src_nested_column = + src_nullable_col->get_nested_column().assume_mutable().get(); + if (src_nullable_col->has_null()) { + for (size_t i = 0; i < num_rows; ++i) { + if (!src_null_map_data[i]) { + nested_function->add(nested_places[i], &src_nested_column, i, arena); + } + } + } else { + nested_function->add_batch(num_rows, nested_places.data(), 0, &src_nested_column, arena, + false); + } + + if constexpr (result_is_nullable) { + auto& dst_nullable_col = assert_cast(*dst); + MutableColumnPtr nested_col_ptr = dst_nullable_col.get_nested_column().assume_mutable(); + dst_nullable_col.get_null_map_column().insert_range_from( + src_nullable_col->get_null_map_column(), 0, num_rows); + nested_function->serialize_to_column(nested_places, 0, nested_col_ptr, num_rows); + } else { + nested_function->serialize_to_column(nested_places, 0, dst, num_rows); + } + } + + void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, + IColumn& to) const override { + if constexpr (result_is_nullable) { + auto& nullable_col = assert_cast(to); + auto& nested_col = nullable_col.get_nested_column(); + auto& null_map = nullable_col.get_null_map_data(); + + bool flag = get_flag(place); + if (flag) { + nested_function->serialize_without_key_to_column(nested_place(place), nested_col); + null_map.push_back(0); + } else { + nested_col.insert_default(); + null_map.push_back(1); + } + } else { + nested_function->serialize_without_key_to_column(nested_place(place), to); + } + } + + void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, + AggregateDataPtr rhs, const IColumn* column, Arena& arena, + const size_t num_rows) const override { + if constexpr (result_is_nullable) { + const auto& nullable_col = assert_cast(*column); + const auto& nested_col = nullable_col.get_nested_column(); + const auto* __restrict null_map_data = nullable_col.get_null_map_data().data(); + + for (size_t i = 0; i < num_rows; ++i) { + *(places[i] + offset) |= (!null_map_data[i]); + } + nested_function->deserialize_and_merge_vec(places, offset + prefix_size, rhs, + &nested_col, arena, num_rows); + } else { + this->nested_function->deserialize_and_merge_vec(places, offset, rhs, column, arena, + num_rows); + } + } + + void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, + AggregateDataPtr rhs, const IColumn* column, + Arena& arena, const size_t num_rows) const override { + if constexpr (result_is_nullable) { + const auto& nullable_col = assert_cast(*column); + const auto& nested_col = nullable_col.get_nested_column(); + const auto* __restrict null_map_data = nullable_col.get_null_map_data().data(); + + for (size_t i = 0; i < num_rows; ++i) { + if (places[i]) { + *(places[i] + offset) |= (!null_map_data[i]); + } + } + nested_function->deserialize_and_merge_vec_selected(places, offset + prefix_size, rhs, + &nested_col, arena, num_rows); + } else { + this->nested_function->deserialize_and_merge_vec_selected(places, offset, rhs, column, + arena, num_rows); + } + } + + void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, + const IColumn& column, size_t begin, size_t end, + Arena& arena) const override { + DCHECK(end <= column.size() && begin <= end) + << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); + + if constexpr (result_is_nullable) { + const auto& nullable_col = assert_cast(column); + const auto& nested_col = nullable_col.get_nested_column(); + const auto& null_map = nullable_col.get_null_map_data(); + + for (size_t i = begin; i <= end; ++i) { + if (!null_map[i]) { + set_flag(place); + nested_function->deserialize_and_merge_from_column_range( + nested_place(place), nested_col, i, i, arena); + } + } + } else { + nested_function->deserialize_and_merge_from_column_range(place, column, begin, end, + arena); + } + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + if constexpr (result_is_nullable) { + auto& to_concrete = assert_cast(to); + if (get_flag(place)) { + nested_function->insert_result_into(nested_place(place), + to_concrete.get_nested_column()); + to_concrete.get_null_map_data().push_back(0); + } else { + to_concrete.insert_default(); + } + } else { + nested_function->insert_result_into(nested_place(place), to); + } + } +}; + +template +class AggregateFunctionNullUnaryInlineV2 final + : public AggregateFunctionNullBaseInlineV2< + NestFuction, result_is_nullable, + AggregateFunctionNullUnaryInlineV2> { +public: + AggregateFunctionNullUnaryInlineV2(IAggregateFunction* nested_function_, + const DataTypes& arguments, bool is_window_function_) + : AggregateFunctionNullBaseInlineV2< + NestFuction, result_is_nullable, + AggregateFunctionNullUnaryInlineV2>( + nested_function_, arguments, is_window_function_) {} + + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, + Arena& arena) const override { + const auto* column = + assert_cast(columns[0]); + if (!column->is_null_at(row_num)) { + this->set_flag(place); + const IColumn* nested_column = &column->get_nested_column(); + this->nested_function->add(this->nested_place(place), &nested_column, row_num, arena); + } else { + this->update_null_count(place, true, this->is_window_function); + } + } + + IAggregateFunction* transmit_to_stable() override { + auto f = AggregateFunctionNullBaseInlineV2< + NestFuction, result_is_nullable, + AggregateFunctionNullUnaryInlineV2>:: + nested_function->transmit_to_stable(); + if (!f) { + return nullptr; + } + return new AggregateFunctionNullUnaryInlineV2< + typename FunctionStableTransfer::FunctionStable, result_is_nullable>( + f, IAggregateFunction::argument_types, this->is_window_function); + } + + void add_batch(size_t batch_size, AggregateDataPtr* __restrict places, size_t place_offset, + const IColumn** columns, Arena& arena, bool agg_many) const override { + const auto* column = assert_cast(columns[0]); + const IColumn* nested_column = &column->get_nested_column(); + if (column->has_null()) { + const auto* __restrict null_map_data = column->get_null_map_data().data(); + for (int i = 0; i < batch_size; ++i) { + if (!null_map_data[i]) { + AggregateDataPtr __restrict place = places[i] + place_offset; + this->set_flag(place); + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } + } else { + if constexpr (result_is_nullable) { + for (int i = 0; i < batch_size; ++i) { + AggregateDataPtr __restrict place = places[i] + place_offset; + place[0] |= 1; + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } else { + this->nested_function->add_batch(batch_size, places, place_offset, &nested_column, + arena, agg_many); + } + } + } + + void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, + Arena& arena) const override { + const auto* column = assert_cast(columns[0]); + bool has_null = column->has_null(); + + if (has_null) { + for (size_t i = 0; i < batch_size; ++i) { + this->add(place, columns, i, arena); + } + } else { + this->set_flag(place); + const IColumn* nested_column = &column->get_nested_column(); + this->nested_function->add_batch_single_place(batch_size, this->nested_place(place), + &nested_column, arena); + } + } + + void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place, + const IColumn** columns, Arena& arena, bool has_null) override { + const auto* column = assert_cast(columns[0]); + + if (has_null) { + for (size_t i = batch_begin; i <= batch_end; ++i) { + this->add(place, columns, i, arena); + } + } else { + this->set_flag(place); + const IColumn* nested_column = &column->get_nested_column(); + this->nested_function->add_batch_range(batch_begin, batch_end, + this->nested_place(place), &nested_column, arena, + false); + } + } + + void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, + int64_t frame_end, AggregateDataPtr place, const IColumn** columns, + Arena& arena, UInt8* use_null_result, + UInt8* could_use_previous_result) const override { + auto current_frame_start = std::max(frame_start, partition_start); + auto current_frame_end = std::min(frame_end, partition_end); + if (current_frame_start >= current_frame_end) { + if (!*could_use_previous_result) { + this->init_flag(place); + *use_null_result = true; + return; + } + } else { + *use_null_result = false; + *could_use_previous_result = true; + } + const auto* column = assert_cast(columns[0]); + bool has_null = column->has_null(); + if (has_null) { + for (size_t i = current_frame_start; i < current_frame_end; ++i) { + this->add(place, columns, i, arena); + } + } else { + const IColumn* nested_column = &(column->get_nested_column()); + this->set_flag(place); + this->nested_function->add_range_single_place( + partition_start, partition_end, frame_start, frame_end, + this->nested_place(place), &nested_column, arena, use_null_result, + could_use_previous_result); + } + } + + bool supported_incremental_mode() const override { + return this->nested_function->supported_incremental_mode(); + } + + void execute_function_with_incremental(int64_t partition_start, int64_t partition_end, + int64_t frame_start, int64_t frame_end, + AggregateDataPtr place, const IColumn** columns, + Arena& arena, bool previous_is_nul, bool end_is_nul, + bool has_null, UInt8* use_null_result, + UInt8* could_use_previous_result) const override { + int64_t current_frame_start = std::max(frame_start, partition_start); + int64_t current_frame_end = std::min(frame_end, partition_end); + if (current_frame_start >= current_frame_end) { + *use_null_result = true; + this->init_flag(place); + return; + } + + DCHECK(columns[0]->is_nullable()) << columns[0]->get_name(); + const auto* column = assert_cast(columns[0]); + const IColumn* nested_column = &column->get_nested_column(); + + if (!column->has_null()) { + if (*could_use_previous_result) { + this->nested_function->execute_function_with_incremental( + partition_start, partition_end, frame_start, frame_end, + this->nested_place(place), &nested_column, arena, previous_is_nul, + end_is_nul, false, use_null_result, could_use_previous_result); + } else { + this->nested_function->add_range_single_place( + partition_start, partition_end, frame_start, frame_end, + this->nested_place(place), &nested_column, arena, use_null_result, + could_use_previous_result); + } + this->set_flag(place); + return; + } + + const auto* __restrict null_map_data = column->get_null_map_data().data(); + if (*could_use_previous_result) { + auto outcoming_pos = frame_start - 1; + auto incoming_pos = frame_end - 1; + bool is_previous_frame_start_null = false; + if (outcoming_pos >= partition_start && outcoming_pos < partition_end && + null_map_data[outcoming_pos] == 1) { + is_previous_frame_start_null = true; + DCHECK_EQ(result_is_nullable, true); + DCHECK_EQ(this->is_window_function, true); + this->update_null_count(place, false, this->is_window_function); + } + bool is_current_frame_end_null = false; + if (incoming_pos >= partition_start && incoming_pos < partition_end && + null_map_data[incoming_pos] == 1) { + is_current_frame_end_null = true; + DCHECK_EQ(result_is_nullable, true); + DCHECK_EQ(this->is_window_function, true); + this->update_null_count(place, true, this->is_window_function); + } + const IColumn* columns_tmp[2] {nested_column, &(*column->get_null_map_column_ptr())}; + this->nested_function->execute_function_with_incremental( + partition_start, partition_end, frame_start, frame_end, + this->nested_place(place), columns_tmp, arena, is_previous_frame_start_null, + is_current_frame_end_null, true, use_null_result, could_use_previous_result); + DCHECK_EQ(result_is_nullable, true); + DCHECK_EQ(this->is_window_function, true); + if (current_frame_end - current_frame_start == + this->get_null_count(place, this->is_window_function)) { + this->init_flag(place); + } else { + this->set_flag(place); + } + } else { + this->add_range_single_place(partition_start, partition_end, frame_start, frame_end, + place, columns, arena, use_null_result, + could_use_previous_result); + } + } +}; + +} // namespace doris::vectorized + +#include "common/compile_check_end.h" diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h index 62822288860019..952bee8befd731 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h @@ -133,13 +133,6 @@ class AggregateFunctionSum final column.get_data().push_back(this->data(place).get()); } - void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { - auto& col = assert_cast(column); - auto* data = col.get_data().data(); - memcpy(places, data, sizeof(Data) * num_rows); - } - void serialize_to_column(const std::vector& places, size_t offset, MutableColumnPtr& dst, const size_t num_rows) const override { auto& col = assert_cast(*dst); @@ -168,16 +161,6 @@ class AggregateFunctionSum final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - auto* data = reinterpret_cast(col.get_data().data()); - for (size_t i = 0; i != num_rows; ++i) { - this->data(place).sum += data[i].sum; - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { @@ -193,15 +176,17 @@ class AggregateFunctionSum final void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - this->merge_vec(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec(places, offset, AggregateDataPtr(data), arena, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena& arena, const size_t num_rows) const override { - this->deserialize_from_column(rhs, *column, arena, num_rows); - this->merge_vec_selected(places, offset, rhs, arena, num_rows); + const auto& col = assert_cast(*column); + const auto* data = col.get_data().data(); + this->merge_vec_selected(places, offset, AggregateDataPtr(data), arena, num_rows); } void serialize_without_key_to_column(ConstAggregateDataPtr __restrict place, diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h index 536a64af6028cd..1c27c37b5ac628 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq_distribute_key.h @@ -165,7 +165,7 @@ class AggregateFunctionUniqDistributeKey final } void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena&, - size_t num_rows) const override { + size_t num_rows) const { auto data = reinterpret_cast( assert_cast(column).get_data().data()); for (size_t i = 0; i != num_rows; ++i) { @@ -199,16 +199,6 @@ class AggregateFunctionUniqDistributeKey final } } - void deserialize_and_merge_from_column(AggregateDataPtr __restrict place, const IColumn& column, - Arena&) const override { - auto& col = assert_cast(column); - const size_t num_rows = column.size(); - auto* data = reinterpret_cast(col.get_data().data()); - for (size_t i = 0; i != num_rows; ++i) { - AggregateFunctionUniqDistributeKey::data(place).count += data[i]; - } - } - void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict place, const IColumn& column, size_t begin, size_t end, Arena&) const override { diff --git a/be/src/vec/aggregate_functions/helpers.h b/be/src/vec/aggregate_functions/helpers.h index 2317925de7be0e..5762aa7ad4d8c3 100644 --- a/be/src/vec/aggregate_functions/helpers.h +++ b/be/src/vec/aggregate_functions/helpers.h @@ -23,6 +23,7 @@ #include "runtime/define_primitive_type.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_null.h" +#include "vec/aggregate_functions/aggregate_function_null_v2.h" #include "vec/core/call_on_type_index.h" #include "vec/data_types/data_type.h" #include "vec/utils/template_helpers.hpp" @@ -68,12 +69,13 @@ decltype(&IAggregateFunctionHelper< \ FunctionTemplate>::serialize_without_key_to_column)>, \ "need to override serialize_without_key_to_column"); \ - static_assert(!std::is_same_v< \ - decltype(&FunctionTemplate::deserialize_and_merge_from_column), \ - decltype(&IAggregateFunctionHelper< \ - FunctionTemplate>::deserialize_and_merge_from_column)>, \ - "need to override " \ - "deserialize_and_merge_from_column"); \ + static_assert( \ + !std::is_same_v< \ + decltype(&FunctionTemplate::deserialize_and_merge_from_column_range), \ + decltype(&IAggregateFunctionHelper< \ + FunctionTemplate>::deserialize_and_merge_from_column)>, \ + "need to override " \ + "deserialize_and_merge_from_column"); \ } \ } while (false) @@ -85,6 +87,11 @@ struct creator_without_type { using NullableT = std::conditional_t, AggregateFunctionNullUnaryInline>; + template + using NullableV2T = + std::conditional_t, + AggregateFunctionNullUnaryInlineV2>; + template static AggregateFunctionPtr creator(const std::string& name, const DataTypes& argument_types, const DataTypePtr& result_type, @@ -142,9 +149,15 @@ struct creator_without_type { if (have_nullable(argument_types_)) { std::visit( [&](auto multi_arguments, auto result_is_nullable) { - result.reset(new NullableT( - result.release(), argument_types_, attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } }, make_bool_variant(argument_types_.size() > 1), make_bool_variant(result_is_nullable)); @@ -166,11 +179,21 @@ struct creator_without_type { std::forward(args)..., remove_nullable(argument_types_))); if (have_nullable(argument_types_)) { if (argument_types_.size() > 1) { - result.reset(new NullableT( - result.release(), argument_types_, attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } } else { - result.reset(new NullableT( - result.release(), argument_types_, attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } } } @@ -192,10 +215,15 @@ struct creator_without_type { if (have_nullable(argument_types_)) { std::visit( [&](auto result_is_nullable) { - result.reset( - new NullableT( - result.release(), argument_types_, - attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } }, make_bool_variant(result_is_nullable)); } @@ -220,8 +248,13 @@ struct creator_without_type { std::unique_ptr result(std::make_unique( std::forward(args)..., remove_nullable(argument_types_))); if (have_nullable(argument_types_)) { - result.reset(new NullableT( - result.release(), argument_types_, attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } } CHECK_AGG_FUNCTION_SERIALIZED_TYPE(AggregateFunctionTemplate); return AggregateFunctionPtr(result.release()); @@ -241,10 +274,15 @@ struct creator_without_type { if (have_nullable(argument_types_)) { std::visit( [&](auto result_is_nullable) { - result.reset( - new NullableT( - result.release(), argument_types_, - attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } }, make_bool_variant(result_is_nullable)); } @@ -268,8 +306,13 @@ struct creator_without_type { std::unique_ptr result(std::make_unique( std::forward(args)..., remove_nullable(argument_types_))); if (have_nullable(argument_types_)) { - result.reset(new NullableT( - result.release(), argument_types_, attr.is_window_function)); + if (attr.enable_aggregate_function_null_v2) { + result.reset(new NullableV2T( + result.release(), argument_types_, attr.is_window_function)); + } else { + result.reset(new NullableT( + result.release(), argument_types_, attr.is_window_function)); + } } CHECK_AGG_FUNCTION_SERIALIZED_TYPE(AggregateFunctionTemplate); return AggregateFunctionPtr(result.release()); diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index a36044f45f57ee..95ddc29423d734 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -126,7 +126,7 @@ void ColumnDecimal::update_crc_with_value(size_t start, size_t end, uint32_t& if (null_data == nullptr) { for (size_t i = start; i < end; i++) { if constexpr (T != TYPE_DECIMALV2) { - hash = HashUtil::zlib_crc_hash(&data[i], sizeof(value_type), hash); + hash = HashUtil::zlib_crc32_fixed(data[i], hash); } else { decimalv2_do_crc(i, hash); } @@ -135,7 +135,7 @@ void ColumnDecimal::update_crc_with_value(size_t start, size_t end, uint32_t& for (size_t i = start; i < end; i++) { if (null_data[i] == 0) { if constexpr (T != TYPE_DECIMALV2) { - hash = HashUtil::zlib_crc_hash(&data[i], sizeof(value_type), hash); + hash = HashUtil::zlib_crc32_fixed(data[i], hash); } else { decimalv2_do_crc(i, hash); } @@ -154,12 +154,13 @@ void ColumnDecimal::update_crcs_with_value(uint32_t* __restrict hashes, Primi if constexpr (T != TYPE_DECIMALV2) { if (null_data == nullptr) { for (size_t i = 0; i < s; i++) { - hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(value_type), hashes[i]); + hashes[i] = HashUtil::zlib_crc32_fixed(data[i], hashes[i]); } } else { for (size_t i = 0; i < s; i++) { - if (null_data[i] == 0) - hashes[i] = HashUtil::zlib_crc_hash(&data[i], sizeof(value_type), hashes[i]); + if (null_data[i] == 0) { + hashes[i] = HashUtil::zlib_crc32_fixed(data[i], hashes[i]); + } } } } else { diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 2939d67f29f7db..8316aa4c7907da 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -157,41 +157,31 @@ void ColumnVector::update_crcs_with_value(uint32_t* __restrict hashes, Primit auto s = rows; DCHECK(s == size()); - if constexpr (is_date_or_datetime(T)) { - char buf[64]; - auto date_convert_do_crc = [&](size_t i) { - const auto& date_val = (const VecDateTimeValue&)data[i]; - auto len = date_val.to_buffer(buf); - hashes[i] = HashUtil::zlib_crc_hash(buf, len, hashes[i]); - }; - - if (null_data == nullptr) { - for (size_t i = 0; i < s; i++) { - date_convert_do_crc(i); - } - } else { - for (size_t i = 0; i < s; i++) { - if (null_data[i] == 0) { - date_convert_do_crc(i); - } - } + if (null_data == nullptr) { + for (size_t i = 0; i < s; i++) { + hashes[i] = _zlib_crc32_hash(hashes[i], i); } } else { - if (null_data == nullptr) { - for (size_t i = 0; i < s; i++) { - hashes[i] = HashUtil::zlib_crc_hash( - &data[i], sizeof(typename PrimitiveTypeTraits::CppType), hashes[i]); - } - } else { - for (size_t i = 0; i < s; i++) { - if (null_data[i] == 0) - hashes[i] = HashUtil::zlib_crc_hash( - &data[i], sizeof(typename PrimitiveTypeTraits::CppType), hashes[i]); + for (size_t i = 0; i < s; i++) { + if (null_data[i] == 0) { + hashes[i] = _zlib_crc32_hash(hashes[i], i); } } } } +template +uint32_t ColumnVector::_zlib_crc32_hash(uint32_t hash, size_t idx) const { + if constexpr (is_date_or_datetime(T)) { + char buf[64]; + const auto& date_val = (const VecDateTimeValue&)data[idx]; + auto len = date_val.to_buffer(buf); + return HashUtil::zlib_crc_hash(buf, len, hash); + } else { + return HashUtil::zlib_crc32_fixed(data[idx], hash); + } +} + template uint32_t ColumnVector::_crc32c_hash(uint32_t hash, size_t idx) const { if constexpr (is_date_or_datetime(T)) { diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 4d8806b32883dc..707c8327875d45 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -401,6 +401,7 @@ class ColumnVector final : public COWHelper> { } protected: + uint32_t _zlib_crc32_hash(uint32_t hash, size_t idx) const; uint32_t _crc32c_hash(uint32_t hash, size_t idx) const; Container data; }; diff --git a/be/src/vec/data_types/data_type_fixed_length_object.cpp b/be/src/vec/data_types/data_type_fixed_length_object.cpp index d6dca09250ea09..a238c6681b0d5f 100644 --- a/be/src/vec/data_types/data_type_fixed_length_object.cpp +++ b/be/src/vec/data_types/data_type_fixed_length_object.cpp @@ -18,6 +18,7 @@ #include "vec/data_types/data_type_fixed_length_object.h" #include +#include #include #include @@ -33,8 +34,38 @@ namespace doris::vectorized { char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf, int be_exec_version) const { - if (be_exec_version >= USE_CONST_SERDE) { - // const flag + if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) { + // New serialization with streamvbyte encoding for large data + const auto* data_column = &column; + size_t real_need_copy_num = 0; + buf = serialize_const_flag_and_row_num(&data_column, buf, &real_need_copy_num); + + const auto& src_col = assert_cast(*data_column); + DCHECK(src_col.item_size() > 0) + << "[serialize]item size of DataTypeFixedLengthObject should be greater than 0"; + + // item size + unaligned_store(buf, src_col.item_size()); + buf += sizeof(size_t); + + auto mem_size = real_need_copy_num * src_col.item_size(); + const auto* origin_data = src_col.get_data().data(); + + // column data + if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { + memcpy(buf, origin_data, mem_size); + return buf + mem_size; + } else { + // Throw exception if mem_size is large than UINT32_MAX + auto encode_size = streamvbyte_encode(reinterpret_cast(origin_data), + cast_set(upper_int32(mem_size)), + (uint8_t*)(buf + sizeof(size_t))); + unaligned_store(buf, encode_size); + buf += sizeof(size_t); + return buf + encode_size; + } + } else if (be_exec_version >= USE_CONST_SERDE) { + // Old serialization: const flag | row num | item size | data (memcpy) bool is_const_column = is_column_const(column); unaligned_store(buf, is_const_column); buf += sizeof(bool); @@ -85,7 +116,30 @@ char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf, const char* DataTypeFixedLengthObject::deserialize(const char* buf, MutableColumnPtr* column, int be_exec_version) const { - if (be_exec_version >= USE_CONST_SERDE) { + if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) { + // New deserialization with streamvbyte decoding for large data + size_t real_have_saved_num = 0; + buf = deserialize_const_flag_and_row_num(buf, column, &real_have_saved_num); + + auto& dst_col = assert_cast(*(column->get())); + auto item_size = unaligned_load(buf); + buf += sizeof(size_t); + dst_col.set_item_size(item_size); + + auto mem_size = real_have_saved_num * item_size; + dst_col.resize(real_have_saved_num); + if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { + memcpy(dst_col.get_data().data(), buf, mem_size); + buf = buf + mem_size; + } else { + auto encode_size = unaligned_load(buf); + buf += sizeof(size_t); + streamvbyte_decode((const uint8_t*)buf, (uint32_t*)(dst_col.get_data().data()), + cast_set(upper_int32(mem_size))); + buf = buf + encode_size; + } + return buf; + } else if (be_exec_version >= USE_CONST_SERDE) { //const flag bool is_const_column = unaligned_load(buf); buf += sizeof(bool); @@ -136,7 +190,21 @@ const char* DataTypeFixedLengthObject::deserialize(const char* buf, MutableColum // data : item data1 | item data2... int64_t DataTypeFixedLengthObject::get_uncompressed_serialized_bytes(const IColumn& column, int be_exec_version) const { - if (be_exec_version >= USE_CONST_SERDE) { + if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) { + // New format size calculation with streamvbyte + auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t) + sizeof(size_t); + auto real_need_copy_num = is_column_const(column) ? 1 : column.size(); + const auto& src_col = assert_cast(column); + auto mem_size = src_col.item_size() * real_need_copy_num; + if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) { + return size + mem_size; + } else { + // Throw exception if mem_size is large than UINT32_MAX + return size + sizeof(size_t) + + std::max(mem_size, streamvbyte_max_compressedbytes( + cast_set(upper_int32(mem_size)))); + } + } else if (be_exec_version >= USE_CONST_SERDE) { auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t); const IColumn* data_column = &column; if (is_column_const(column)) { @@ -160,4 +228,4 @@ Status DataTypeFixedLengthObject::check_column(const IColumn& column) const { return check_column_non_nested_type(column); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 12ac55016cacc6..d64263392a22ce 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -189,6 +189,12 @@ Status RowGroupReader::init( _lazy_read_ctx.missing_columns_conjuncts.end()); RETURN_IF_ERROR(_rewrite_dict_predicates()); } + // _state is nullptr in some ut. + if (_state && _state->enable_adjust_conjunct_order_by_cost()) { + std::ranges::sort(_filter_conjuncts, [](const auto& a, const auto& b) { + return a->execute_cost() < b->execute_cost(); + }); + } return Status::OK(); } diff --git a/be/src/vec/exprs/vcolumn_ref.h b/be/src/vec/exprs/vcolumn_ref.h index ce0f5aaedd4289..8d9eaa66a2f94b 100644 --- a/be/src/vec/exprs/vcolumn_ref.h +++ b/be/src/vec/exprs/vcolumn_ref.h @@ -89,6 +89,8 @@ class VColumnRef final : public VExpr { return out.str(); } + double execute_cost() const override { return 0.0; } + private: int _column_id; std::atomic _gap = 0; diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 2d39319cedc7ea..d64b5a772b5ac2 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -330,6 +330,14 @@ class VCompoundPred : public VectorizedFnCall { return Status::OK(); } + double execute_cost() const override { + double cost = 0.3; + for (const auto& child : _children) { + cost += child->execute_cost(); + } + return cost; + } + private: static inline constexpr uint8_t apply_and_null(UInt8 a, UInt8 l_null, UInt8 b, UInt8 r_null) { // (<> && false) is false, (true && NULL) is NULL diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp index 7da847d8d6361d..22ea6ccff4d68c 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.cpp +++ b/be/src/vec/exprs/vectorized_agg_fn.cpp @@ -217,6 +217,8 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, state->be_exec_version(), {.is_window_function = _is_window_function, .is_foreach = is_foreach, + .enable_aggregate_function_null_v2 = + state->enable_aggregate_function_null_v2(), .column_names = std::move(column_names)}); } else { _function = AggregateFunctionSimpleFactory::instance().get( @@ -224,6 +226,8 @@ Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, state->be_exec_version(), {.is_window_function = _is_window_function, .is_foreach = is_foreach, + .enable_aggregate_function_null_v2 = + state->enable_aggregate_function_null_v2(), .column_names = std::move(column_names)}); } } @@ -285,13 +289,6 @@ Status AggFnEvaluator::execute_batch_add_selected(Block* block, size_t offset, return Status::OK(); } -Status AggFnEvaluator::streaming_agg_serialize(Block* block, BufferWritable& buf, - const size_t num_rows, Arena& arena) { - RETURN_IF_ERROR(_calc_argument_columns(block)); - _function->streaming_agg_serialize(_agg_columns.data(), buf, num_rows, arena); - return Status::OK(); -} - Status AggFnEvaluator::streaming_agg_serialize_to_column(Block* block, MutableColumnPtr& dst, const size_t num_rows, Arena& arena) { RETURN_IF_ERROR(_calc_argument_columns(block)); diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h index 52555054d3563a..a5e75fda192da8 100644 --- a/be/src/vec/exprs/vectorized_agg_fn.h +++ b/be/src/vec/exprs/vectorized_agg_fn.h @@ -82,9 +82,6 @@ class AggFnEvaluator { Status execute_batch_add_selected(Block* block, size_t offset, AggregateDataPtr* places, Arena& arena); - Status streaming_agg_serialize(Block* block, BufferWritable& buf, const size_t num_rows, - Arena& arena); - Status streaming_agg_serialize_to_column(Block* block, MutableColumnPtr& dst, const size_t num_rows, Arena& arena); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 794ff5ae00339a..9072d40bca2516 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -26,6 +26,7 @@ #include #include "common/config.h" +#include "common/exception.h" #include "common/logging.h" #include "common/status.h" #include "common/utils.h" @@ -664,5 +665,17 @@ Status VectorizedFnCall::evaluate_ann_range_search( return Status::OK(); } +double VectorizedFnCall::execute_cost() const { + if (!_function) { + throw Exception( + Status::InternalError("Function is null in expression: {}", this->debug_string())); + } + double cost = _function->execute_cost(); + for (const auto& child : _children) { + cost += child->execute_cost(); + } + return cost; +} + #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index d432cbbe3e6829..4eb907638ca786 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -66,6 +66,7 @@ class VectorizedFnCall : public VExpr { const std::string& expr_name() const override; std::string function_name() const; std::string debug_string() const override; + double execute_cost() const override; bool is_blockable() const override { return _function->is_blockable() || std::any_of(_children.begin(), _children.end(), diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 38e96abe6ee030..b6256432bf0424 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -296,6 +296,14 @@ class VExpr { return expr; } + virtual double execute_cost() const { + double cost = 1.0; + for (const auto& child : _children) { + cost += child->execute_cost(); + } + return cost; + } + // If this expr is a RuntimeFilterWrapper, this method will return an underlying rf expression virtual VExprSPtr get_impl() const { return {}; } diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index c733f5f6748e16..6e6fb17b19433b 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -475,5 +475,14 @@ uint64_t VExprContext::get_digest(uint64_t seed) const { return _root->get_digest(seed); } +double VExprContext::execute_cost() const { + if (_root == nullptr) { + // When there is no expression root, treat the cost as a base value. + // This avoids null dereferences while keeping a deterministic cost. + return 0.0; + } + return _root->execute_cost(); +} + #include "common/compile_check_end.h" } // namespace doris::vectorized diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 39943c121f6be7..d1e39a6fda6ee3 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -267,7 +267,9 @@ class VExprContext { [[nodiscard]] Status execute_const_expr(ColumnWithTypeAndName& result); - VExprSPtr root() const { return _root; } + double execute_cost() const; + + VExprSPtr root() { return _root; } void set_root(const VExprSPtr& expr) { _root = expr; } void set_index_context(std::shared_ptr index_context) { _index_context = std::move(index_context); diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h index 4f4009db3f797f..c01b3883b6eb0c 100644 --- a/be/src/vec/exprs/vliteral.h +++ b/be/src/vec/exprs/vliteral.h @@ -56,6 +56,8 @@ class VLiteral : public VExpr { const std::string& expr_name() const override { return _expr_name; } std::string debug_string() const override; + double execute_cost() const override { return 0.0; } + MOCK_FUNCTION std::string value(const DataTypeSerDe::FormatOptions& options) const; const ColumnPtr& get_column_ptr() const { return _column_ptr; } diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h b/be/src/vec/exprs/vruntimefilter_wrapper.h index ea37dddef261ef..91569e61045e67 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.h +++ b/be/src/vec/exprs/vruntimefilter_wrapper.h @@ -64,6 +64,8 @@ class VRuntimeFilterWrapper final : public VExpr { const VExprSPtrs& children() const override { return _impl->children(); } TExprNodeType::type node_type() const override { return _impl->node_type(); } + double execute_cost() const override { return _impl->execute_cost(); } + Status execute_filter(VExprContext* context, const Block* block, uint8_t* __restrict result_filter_data, size_t rows, bool accept_null, bool* can_filter_all) const override; diff --git a/be/src/vec/exprs/vslot_ref.h b/be/src/vec/exprs/vslot_ref.h index 960b4f809a0e50..2dec6b55d645a4 100644 --- a/be/src/vec/exprs/vslot_ref.h +++ b/be/src/vec/exprs/vslot_ref.h @@ -72,6 +72,8 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr { uint64_t get_digest(uint64_t seed) const override; + double execute_cost() const override { return 0.0; } + private: int _slot_id; int _column_id; diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 9efe3fee9064b0..54e469c51eb8e4 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -171,6 +171,8 @@ class IFunctionBase { virtual const DataTypes& get_argument_types() const = 0; virtual const DataTypePtr& get_return_type() const = 0; + virtual double execute_cost() const { return 1.0; } + /// Do preparations and return executable. /// sample_block should contain data types of arguments and values of constants, if relevant. virtual PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block, @@ -462,6 +464,8 @@ class DefaultFunction final : public IFunctionBase { return function; } + double execute_cost() const override { return function->execute_cost(); } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { return function->open(context, scope); } diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h index d785ea6515bbe1..b81ccc927169df 100644 --- a/be/src/vec/functions/functions_comparison.h +++ b/be/src/vec/functions/functions_comparison.h @@ -272,6 +272,8 @@ class FunctionComparison : public IFunction { FunctionComparison() = default; + double execute_cost() const override { return 0.5; } + private: template Status execute_num_type(Block& block, uint32_t result, const ColumnPtr& col_left_ptr, diff --git a/be/test/util/crc32c_test.cpp b/be/test/util/crc32c_test.cpp index 5a6a7faa3a57ea..e9795882b36bd0 100644 --- a/be/test/util/crc32c_test.cpp +++ b/be/test/util/crc32c_test.cpp @@ -22,10 +22,14 @@ #include #include #include +#include +#include +#include #include #include "gtest/gtest_pred_impl.h" +#include "util/hash_util.hpp" #include "util/slice.h" namespace doris { @@ -75,3 +79,400 @@ TEST(CRC, Extend) { } } // namespace doris + +namespace doris { + +// Helper: compute crc32c via crc32c::Crc32c for a value of type T +template +uint32_t crc32c_reference(const T& value, uint32_t seed) { + return crc32c::Extend(seed, reinterpret_cast(&value), sizeof(T)); +} + +// Helper: compute zlib crc32 for a value of type T +template +uint32_t zlib_crc32_reference(const T& value, uint32_t seed) { + return HashUtil::zlib_crc_hash(&value, sizeof(T), seed); +} + +/* +todo: fix those cases when we have a new release version; do not consider the compatibility issue +use following code to replace the old crc32c_fixed function in hash_util.hpp +template +static uint32_t crc32c_fixed(const T& value, uint32_t hash) { + uint32_t crc = hash ^ 0xFFFFFFFFU; + if constexpr (sizeof(T) == 1) { + crc = _mm_crc32_u8(crc, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 2) { + crc = _mm_crc32_u16(crc, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 4) { + crc = _mm_crc32_u32(crc, *reinterpret_cast(&value)); + } else if constexpr (sizeof(T) == 8) { + crc = (uint32_t)_mm_crc32_u64(crc, *reinterpret_cast(&value)); + } else { + return crc32c_extend(hash, (const uint8_t*)&value, sizeof(T)); + } + return crc ^ 0xFFFFFFFFU; +} +// ==================== crc32c_fixed tests ==================== +TEST(CRC32CFixed, Uint8Values) { + uint8_t values[] = {0, 1, 127, 128, 255}; + for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "uint8_t v=" << (int)v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, Uint16Values) { + uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535}; + for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0x12345678U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "uint16_t v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, Int32Values) { + int32_t values[] = {0, + 1, + -1, + 42, + -42, + 1000000, + -1000000, + std::numeric_limits::min(), + std::numeric_limits::max()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "int32_t v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, Uint32Values) { + uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 0x12345678}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "uint32_t v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, Int64Values) { + int64_t values[] = {0, + 1, + -1, + 1000000000LL, + -1000000000LL, + std::numeric_limits::min(), + std::numeric_limits::max(), + 0x0102030405060708LL, + -0x0102030405060708LL}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "int64_t v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, Uint64Values) { + uint64_t values[] = {0, + 1, + 0xFFFFFFFFFFFFFFFFULL, + 0xDEADBEEFCAFEBABEULL, + 0x0123456789ABCDEFULL, + 0xFF00FF00FF00FF00ULL}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "uint64_t v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, FloatValues) { + float values[] = {0.0f, + -0.0f, + 1.0f, + -1.0f, + 3.14f, + std::numeric_limits::min(), + std::numeric_limits::max(), + std::numeric_limits::infinity()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "float v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, DoubleValues) { + double values[] = {0.0, + -0.0, + 1.0, + -1.0, + 3.141592653589793, + 1e100, + -1e100, + std::numeric_limits::infinity()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) + << "double v=" << v << " seed=" << seed; + } + } +} + +TEST(CRC32CFixed, NullHash) { + // crc32c_null should match crc32c_fixed with int(0) + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) { + int zero = 0; + EXPECT_EQ(HashUtil::crc32c_null(seed), HashUtil::crc32c_fixed(zero, seed)); + EXPECT_EQ(HashUtil::crc32c_null(seed), crc32c_reference(zero, seed)); + } +} +*/ +// ==================== zlib_crc32_fixed tests ==================== + +TEST(ZlibCRC32Fixed, Uint8Values) { + uint8_t values[] = {0, 1, 42, 127, 128, 255}; + for (uint32_t seed : {0U, 1U, 0xFFFFFFFFU, 0xDEADBEEFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "uint8_t v=" << (int)v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Int16Values) { + int16_t values[] = {0, 1, -1, 256, -256, 32767, -32768}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x12345678U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "int16_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Uint16Values) { + uint16_t values[] = {0, 1, 255, 256, 1000, 32767, 65535}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCDEF00U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "uint16_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Int32Values) { + int32_t values[] = {0, + 1, + -1, + 42, + -42, + 1000000, + -1000000, + std::numeric_limits::min(), + std::numeric_limits::max()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xCAFEBABEU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "int32_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Uint32Values) { + uint32_t values[] = {0, 1, 0xFF, 0xFFFF, 0xFFFFFFFF, 0xDEADBEEF, 0x12345678}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xABCD1234U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "uint32_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Int64Values) { + int64_t values[] = {0, + 1, + -1, + 1000000000LL, + -1000000000LL, + std::numeric_limits::min(), + std::numeric_limits::max(), + 0x0102030405060708LL, + -0x0102030405060708LL}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x87654321U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "int64_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, Uint64Values) { + uint64_t values[] = {0, + 1, + 0xFFFFFFFFFFFFFFFFULL, + 0xDEADBEEFCAFEBABEULL, + 0x0123456789ABCDEFULL, + 0xFF00FF00FF00FF00ULL}; + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0x11111111U}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "uint64_t v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, FloatValues) { + float values[] = {0.0f, + -0.0f, + 1.0f, + -1.0f, + 3.14f, + 1e10f, + -1e10f, + std::numeric_limits::min(), + std::numeric_limits::max(), + std::numeric_limits::infinity()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "float v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, DoubleValues) { + double values[] = {0.0, + -0.0, + 1.0, + -1.0, + 3.141592653589793, + 1e100, + -1e100, + 1e-300, + std::numeric_limits::infinity()}; + for (uint32_t seed : {0U, 0xFFFFFFFFU}) { + for (auto v : values) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "double v=" << v << " seed=" << seed; + } + } +} + +TEST(ZlibCRC32Fixed, NullHash) { + // zlib_crc_hash_null should match zlib_crc32_fixed with int(0) + for (uint32_t seed : {0U, 0xFFFFFFFFU, 0xDEADBEEFU}) { + int zero = 0; + EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), HashUtil::zlib_crc32_fixed(zero, seed)); + EXPECT_EQ(HashUtil::zlib_crc_hash_null(seed), zlib_crc32_reference(zero, seed)); + } +} + +// ==================== Cross-validation: fixed vs non-fixed should differ ==================== + +TEST(CRC32Fixed, CRC32CVsZlibDiffer) { + // CRC32C and standard CRC32 use different polynomials, so results should differ + // (except possibly by coincidence on some values, but not systematically) + int32_t v = 12345678; + uint32_t seed = 0; + uint32_t crc32c_result = HashUtil::crc32c_fixed(v, seed); + uint32_t zlib_result = HashUtil::zlib_crc32_fixed(v, seed); + EXPECT_NE(crc32c_result, zlib_result) + << "CRC32C and zlib CRC32 should produce different results for non-trivial input"; +} + +// ==================== Chaining: verify incremental hashing ==================== +/* +TEST(CRC32CFixed, IncrementalChaining) { + // Hash two int32 values incrementally and compare with hashing 8 bytes at once + int32_t a = 0x11223344; + int32_t b = 0x55667788; + uint32_t seed = 0; + + uint32_t chained = HashUtil::crc32c_fixed(a, seed); + chained = HashUtil::crc32c_fixed(b, chained); + + // Reference: hash the 8 bytes sequentially via crc32c::Extend + uint8_t buf[8]; + memcpy(buf, &a, 4); + memcpy(buf + 4, &b, 4); + uint32_t reference = crc32c::Extend(seed, buf, 8); + + EXPECT_EQ(chained, reference); +} +*/ +TEST(ZlibCRC32Fixed, IncrementalChaining) { + // Hash two int32 values incrementally and compare with hashing 8 bytes at once + int32_t a = 0x11223344; + int32_t b = 0x55667788; + uint32_t seed = 0; + + uint32_t chained = HashUtil::zlib_crc32_fixed(a, seed); + chained = HashUtil::zlib_crc32_fixed(b, chained); + + // Reference: hash the 8 bytes sequentially via zlib crc32 + uint8_t buf[8]; + memcpy(buf, &a, 4); + memcpy(buf + 4, &b, 4); + uint32_t reference = (uint32_t)crc32(seed, buf, 8); + + EXPECT_EQ(chained, reference); +} +/* +// ==================== Exhaustive 1-byte test ==================== + +TEST(CRC32CFixed, AllByteValues) { + for (int i = 0; i <= 255; i++) { + uint8_t v = static_cast(i); + uint32_t seed = 0x12345678U; + EXPECT_EQ(HashUtil::crc32c_fixed(v, seed), crc32c_reference(v, seed)) << "byte=" << i; + } +} + +TEST(ZlibCRC32Fixed, AllByteValues) { + for (int i = 0; i <= 255; i++) { + uint8_t v = static_cast(i); + uint32_t seed = 0x12345678U; + EXPECT_EQ(HashUtil::zlib_crc32_fixed(v, seed), zlib_crc32_reference(v, seed)) + << "byte=" << i; + } +} + +// ==================== Sequential pattern ==================== + +TEST(CRC32CFixed, SequentialInt32) { + // Hash a sequence of increasing int32 values, verify each against reference + uint32_t seed = 0; + for (int32_t i = -500; i <= 500; i++) { + EXPECT_EQ(HashUtil::crc32c_fixed(i, seed), crc32c_reference(i, seed)) << "i=" << i; + } +} + +TEST(ZlibCRC32Fixed, SequentialInt32) { + uint32_t seed = 0; + for (int32_t i = -500; i <= 500; i++) { + EXPECT_EQ(HashUtil::zlib_crc32_fixed(i, seed), zlib_crc32_reference(i, seed)) << "i=" << i; + } +} +*/ +// ==================== Large 16-byte type fallback test ==================== + +TEST(ZlibCRC32Fixed, LargeTypeFallback) { + // __int128 is 16 bytes, should hit the fallback path to zlib crc32() + __int128 value = static_cast<__int128>(0x0102030405060708ULL) << 64 | 0x090A0B0C0D0E0F10ULL; + uint32_t seed = 0; + uint32_t fixed_result = HashUtil::zlib_crc32_fixed(value, seed); + uint32_t ref_result = HashUtil::zlib_crc_hash(&value, sizeof(value), seed); + EXPECT_EQ(fixed_result, ref_result); +} + +} // namespace doris diff --git a/be/test/vec/aggregate_functions/agg_function_test.h b/be/test/vec/aggregate_functions/agg_function_test.h index ea3f67d940aafe..47577d58a45d1b 100644 --- a/be/test/vec/aggregate_functions/agg_function_test.h +++ b/be/test/vec/aggregate_functions/agg_function_test.h @@ -163,18 +163,6 @@ struct AggregateFunctiontest : public testing::Test { std::vector places {place}; agg_fn->function()->serialize_to_column(places, 0, serialize_column, 1); } - - { - Arena arena; - auto* place = reinterpret_cast( - arena.alloc(agg_fn->function()->size_of_data())); - - agg_fn->create(place); - Defer defer([&]() { agg_fn->destroy(place); }); - agg_fn->function()->deserialize_from_column(place, *serialize_column, arena, 1); - - check_result(place); - } } { // streaming_agg_serialize_to_column deserialize_and_merge_from_column_range diff --git a/be/test/vec/aggregate_functions/agg_min_max_test.cpp b/be/test/vec/aggregate_functions/agg_min_max_test.cpp index 54ac10cd705ca2..79467129f9c047 100644 --- a/be/test/vec/aggregate_functions/agg_min_max_test.cpp +++ b/be/test/vec/aggregate_functions/agg_min_max_test.cpp @@ -112,12 +112,19 @@ TEST_P(AggMinMaxTest, min_max_decimal_test) { agg_function->streaming_agg_serialize_to_column(column, dst, agg_test_batch_size, arena); std::unique_ptr memory2(new char[agg_function->size_of_data() * agg_test_batch_size]); - AggregateDataPtr places = memory2.get(); - agg_function->deserialize_from_column(places, *dst, arena, agg_test_batch_size); + std::unique_ptr memory2_tmp( + new char[agg_function->size_of_data() * agg_test_batch_size]); + std::vector places(agg_test_batch_size); + for (size_t i = 0; i != agg_test_batch_size; ++i) { + places[i] = memory2.get() + agg_function->size_of_data() * i; + agg_function->create(places[i]); + } + agg_function->deserialize_and_merge_vec(places.data(), 0, memory2_tmp.get(), dst.get(), arena, + agg_test_batch_size); ColumnDecimal128V2 result(0, 9); for (size_t i = 0; i != agg_test_batch_size; ++i) { - agg_function->insert_result_into(places + agg_function->size_of_data() * i, result); + agg_function->insert_result_into(places[i], result); } for (size_t i = 0; i != agg_test_batch_size; ++i) { diff --git a/be/test/vec/data_types/data_type_agg_state_test.cpp b/be/test/vec/data_types/data_type_agg_state_test.cpp index 866ddf79842a63..4a775d21f2f845 100644 --- a/be/test/vec/data_types/data_type_agg_state_test.cpp +++ b/be/test/vec/data_types/data_type_agg_state_test.cpp @@ -111,7 +111,7 @@ TEST_P(DataTypeAggStateTest, CreateColumnTest) { // get_uncompressed_serialized_bytes ASSERT_EQ(datatype_agg_state_count->get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()), - 25); + 33); } void insert_data_agg_state(MutableColumns* agg_state_cols, DataTypePtr datatype_agg_state, diff --git a/be/test/vec/data_types/data_type_fixed_length_object_test.cpp b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp index ad0df6b5bd4acf..ed8b1670b443e8 100644 --- a/be/test/vec/data_types/data_type_fixed_length_object_test.cpp +++ b/be/test/vec/data_types/data_type_fixed_length_object_test.cpp @@ -103,7 +103,7 @@ TEST_P(DataTypeFixedLengthObjectTest, CreateColumnTest) { // get_uncompressed_serialized_bytes ASSERT_EQ(datatype_fixed_length->get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()), - 17); + 25); } void insert_data_fixed_length_data(MutableColumns* fixed_length_cols, @@ -132,9 +132,8 @@ TEST_P(DataTypeFixedLengthObjectTest, SerializeDeserializeTest) { auto size = datatype_fixed_length->get_uncompressed_serialized_bytes( *column, BeExecVersionManager::get_newest_version()); std::unique_ptr buf = std::make_unique(size); - auto* result = datatype_fixed_length->serialize(*column, buf.get(), - BeExecVersionManager::get_newest_version()); - ASSERT_EQ(result, buf.get() + size); + datatype_fixed_length->serialize(*column, buf.get(), + BeExecVersionManager::get_newest_version()); auto column2 = datatype_fixed_length->create_column(); datatype_fixed_length->deserialize(buf.get(), &column2, diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index d30c1355b37921..0c1aa6d882eb81 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2130,13 +2130,13 @@ public class Config extends ConfigBase { * Max data version of backends serialize block. */ @ConfField(mutable = false) - public static int max_be_exec_version = 8; + public static int max_be_exec_version = 10; /** * Min data version of backends serialize block. */ @ConfField(mutable = false) - public static int min_be_exec_version = 0; + public static int min_be_exec_version = 8; /** * Data version of backends serialize block. diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java index 1c6b53fd4e4e8a..978b90043d70f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Avg.java @@ -105,28 +105,16 @@ public FunctionSignature computePrecision(FunctionSignature signature) { } DecimalV3Type decimalV3Type = DecimalV3Type.forType(argumentType); // DecimalV3 scale lower than DEFAULT_MIN_AVG_DECIMAL128_SCALE should do cast - int precision = decimalV3Type.getPrecision(); int scale = decimalV3Type.getScale(); if (decimalV3Type.getScale() < ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE) { scale = ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE; - precision = precision - decimalV3Type.getScale() + scale; - if (enableDecimal256) { - if (precision > DecimalV3Type.MAX_DECIMAL256_PRECISION) { - precision = DecimalV3Type.MAX_DECIMAL256_PRECISION; - } - } else { - if (precision > DecimalV3Type.MAX_DECIMAL128_PRECISION) { - precision = DecimalV3Type.MAX_DECIMAL128_PRECISION; - } - } } - decimalV3Type = DecimalV3Type.createDecimalV3Type(precision, scale); return signature.withArgumentType(0, decimalV3Type) .withReturnType(DecimalV3Type.createDecimalV3Type( enableDecimal256 ? DecimalV3Type.MAX_DECIMAL256_PRECISION : DecimalV3Type.MAX_DECIMAL128_PRECISION, - decimalV3Type.getScale() - )); + scale) + ); } else { return signature; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java index d4cac4731127c0..04d1568c043faa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.catalog.ScalarType; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NeedSessionVarGuard; @@ -28,6 +29,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DecimalV3Type; import org.apache.doris.nereids.types.DoubleType; import org.apache.doris.nereids.types.FloatType; @@ -35,6 +37,7 @@ import org.apache.doris.nereids.types.LargeIntType; import org.apache.doris.nereids.types.SmallIntType; import org.apache.doris.nereids.types.TinyIntType; +import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -70,35 +73,6 @@ private ArrayAvg(ScalarFunctionParams functionParams) { super(functionParams); } - // TODO use this computePrecision if be support dynamic scale - // @Override - // public FunctionSignature computePrecision(FunctionSignature signature) { - // DataType argumentType = getArgumentType(0); - // if (argumentType instanceof ArrayType) { - // DataType argType = ((ArrayType) argumentType).getItemType(); - // DataType sigType = ((ArrayType) signature.getArgType(0)).getItemType(); - // if (sigType instanceof DecimalV3Type) { - // DecimalV3Type decimalV3Type = DecimalV3Type.forType(argType); - // // DecimalV3 scale lower than DEFAULT_MIN_AVG_DECIMAL128_SCALE should do cast - // int precision = decimalV3Type.getPrecision(); - // int scale = decimalV3Type.getScale(); - // if (decimalV3Type.getScale() < ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE) { - // scale = ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE; - // precision = precision - decimalV3Type.getScale() + scale; - // if (precision > DecimalV3Type.MAX_DECIMAL128_PRECISION) { - // precision = DecimalV3Type.MAX_DECIMAL128_PRECISION; - // } - // } - // decimalV3Type = DecimalV3Type.createDecimalV3Type(precision, scale); - // return signature.withArgumentType(0, ArrayType.of(decimalV3Type)) - // .withReturnType(ArrayType.of(DecimalV3Type.createDecimalV3Type( - // DecimalV3Type.MAX_DECIMAL128_PRECISION, decimalV3Type.getScale() - // ))); - // } - // } - // return signature; - // } - /** * array_avg needs to calculate the average of the elements in the array. * so the element type must be numeric, boolean or string. @@ -112,6 +86,33 @@ public void checkLegalityBeforeTypeCoercion() { } } + @Override + public FunctionSignature computePrecision(FunctionSignature signature) { + if (!(getArgumentType(0) instanceof ArrayType)) { + return signature; + } + DataType argumentType = ((ArrayType) getArgumentType(0)).getItemType(); + if (!(argumentType instanceof DecimalV3Type)) { + return signature; + } + boolean enableDecimal256 = false; + ConnectContext connectContext = ConnectContext.get(); + if (connectContext != null) { + enableDecimal256 = connectContext.getSessionVariable().isEnableDecimal256(); + } + DecimalV3Type decimalV3Type = DecimalV3Type.forType(argumentType); + // DecimalV3 scale lower than DEFAULT_MIN_AVG_DECIMAL128_SCALE should do cast + int scale = decimalV3Type.getScale(); + if (decimalV3Type.getScale() < ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE) { + scale = ScalarType.DEFAULT_MIN_AVG_DECIMAL128_SCALE; + } + return signature.withReturnType(DecimalV3Type.createDecimalV3Type( + enableDecimal256 ? DecimalV3Type.MAX_DECIMAL256_PRECISION + : DecimalV3Type.MAX_DECIMAL128_PRECISION, + scale) + ); + } + /** * withChildren. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index fac23dea4f19da..064f7f06ee6bf4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -315,6 +315,7 @@ public class SessionVariable implements Serializable, Writable { public static final String USE_SERIAL_EXCHANGE = "use_serial_exchange"; public static final String ENABLE_PARALLEL_SCAN = "enable_parallel_scan"; + public static final String ENABLE_AGGREGATE_FUNCTION_NULL_V2 = "enable_aggregate_function_null_v2"; public static final String ENABLE_NEW_SHUFFLE_HASH_METHOD = "enable_new_shuffle_hash_method"; @@ -1498,6 +1499,9 @@ public enum IgnoreSplitType { needForward = true) private boolean enableParallelScan = true; + @VariableMgr.VarAttr(name = ENABLE_AGGREGATE_FUNCTION_NULL_V2, fuzzy = true, needForward = true) + private boolean enableAggregateFunctionNullV2 = true; + @VariableMgr.VarAttr(name = OPTIMIZE_INDEX_SCAN_PARALLELISM, needForward = true, description = {"优化索引扫描时的 Scan 并行度,该优化目前只对 ann topn 查询生效", @@ -3174,6 +3178,9 @@ public void setDetailShapePlanNodes(String detailShapePlanNodes) { }) public boolean enablePhraseQuerySequentialOpt = true; + @VariableMgr.VarAttr(name = "enable_adjust_conjunct_order_by_cost", needForward = true) + public boolean enableAdjustConjunctOrderByCost = true; + @VariableMgr.VarAttr(name = REQUIRE_SEQUENCE_IN_INSERT, needForward = true, description = { "该变量用于控制,使用了 sequence 列的 unique key 表,insert into 操作是否要求必须提供每一行的 sequence 列的值", "This variable controls whether the INSERT INTO operation on unique key tables with a sequence" @@ -5176,6 +5183,7 @@ public TQueryOptions toThrift() { tResult.setEnableParallelScan(enableParallelScan); tResult.setEnableLeftSemiDirectReturnOpt(enableLeftSemiDirectReturnOpt); + tResult.setEnableAggregateFunctionNullV2(enableAggregateFunctionNullV2); tResult.setParallelScanMaxScannersCount(parallelScanMaxScannersCount); tResult.setParallelScanMinRowsPerScanner(parallelScanMinRowsPerScanner); tResult.setOptimizeIndexScanParallelism(optimizeIndexScanParallelism); @@ -5252,6 +5260,7 @@ public TQueryOptions toThrift() { } else { tResult.setFileCacheQueryLimitPercent(Config.file_cache_query_limit_max_percent); } + tResult.setEnableAdjustConjunctOrderByCost(enableAdjustConjunctOrderByCost); // Set Iceberg write target file size tResult.setIcebergWriteTargetFileSizeBytes(icebergWriteTargetFileSizeBytes); @@ -5575,6 +5584,10 @@ public boolean getEnableParallelScan() { return enableParallelScan; } + public boolean getEnableAggregateFunctionNullV2() { + return enableAggregateFunctionNullV2; + } + public boolean enableParallelResultSink() { return enableParallelResultSink; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index d0616dae8010fe..56e6724e900068 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -423,6 +423,8 @@ struct TQueryOptions { 179: optional bool enable_parquet_filter_by_bloom_filter = true; + 183: optional bool enable_aggregate_function_null_v2 = false; + 186: optional bool enable_streaming_agg_hash_join_force_passthrough; 187: optional bool enable_distinct_streaming_agg_force_passthrough; diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out index 6da3414e64894f..5ebef1618f9855 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_by_literal.out @@ -21,7 +21,7 @@ 2.0 -- !sql_5 -- -1.6 +1.6666 -- !sql_6 -- 5.0 @@ -294,7 +294,7 @@ true 2023-02-05 -- !sql -- -166.666 +166.6665 -- !sql -- 333.333 @@ -1005,7 +1005,7 @@ _ 2.0 -- !sql_5 -- -1.6 +1.6666 -- !sql_6 -- 5.0 @@ -1278,7 +1278,7 @@ true 2023-02-05 -- !sql -- -166.666 +166.6665 -- !sql -- 333.333 @@ -1989,7 +1989,7 @@ _ 2.0 -- !sql_5 -- -1.6 +1.6666 -- !sql_6 -- 5.0 @@ -2262,7 +2262,7 @@ true 2023-02-05 -- !sql -- -166.666 +166.6665 -- !sql -- 333.333 diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query1.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query1.out index d5077dadd43722..26b10a7eaf94f8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query1.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query1.out @@ -18,7 +18,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecGather] --------PhysicalTopN[LOCAL_SORT] ----------PhysicalProject -------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) --------------PhysicalProject ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ctr_store_sk] ------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out index da2d98e86ac2d3..652a72965220ac 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query32.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query32.out index 7992f57d1c87b3..5626809a46f304 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query32.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query47.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query47.out index c04dc536e6bbef..168e0da1abed36 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query47.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) -----------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF1 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query53.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query53.out index 6cc3c447d13449..0d45c2e12e1a05 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query53.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query53.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -------------------------------------PhysicalProject ---------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -----------------------------------------PhysicalOlapScan[item] ---------------------------------PhysicalProject -----------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) -------------------------------------PhysicalOlapScan[date_dim] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------------------PhysicalProject +------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +--------------------------------------PhysicalOlapScan[item] +------------------------------PhysicalProject +--------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query57.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query57.out index 12bdb0c331739c..262064d3d75b0f 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query57.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF1 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) -----------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[call_center] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF1 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[call_center] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query63.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query63.out index 94e35cb7458980..6ea696b8e541bf 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query63.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query63.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 -------------------------------------PhysicalProject ---------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -----------------------------------------PhysicalOlapScan[item] ---------------------------------PhysicalProject -----------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) -------------------------------------PhysicalOlapScan[date_dim] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +----------------------------------PhysicalProject +------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +--------------------------------------PhysicalOlapScan[item] +------------------------------PhysicalProject +--------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out index 39926be2872da5..dfd8b723c757c8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query81.out @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query89.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query89.out index 8b0e89ca99b508..f2b5a51e354132 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query89.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 1999)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 1999)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query92.out b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query92.out index b31b235e379e59..ff2e7b944234b5 100644 --- a/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query92.out +++ b/regression-test/data/shape_check/tpcds_sf100/noStatsRfPrune/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query1.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query1.out index 9e34c347fc0a1a..d85e1717c3de60 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query1.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query1.out @@ -18,7 +18,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecGather] --------PhysicalTopN[LOCAL_SORT] ----------PhysicalProject -------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF3 ctr_store_sk->[ctr_store_sk,s_store_sk] +------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF3 ctr_store_sk->[ctr_store_sk,s_store_sk] --------------PhysicalProject ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ctr_store_sk] ------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out index 0b4b3a8b2d41ab..a606e6b0e46eac 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query32.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query32.out index 7992f57d1c87b3..5626809a46f304 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query32.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query47.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query47.out index 2f76e9211ef4df..850d15e639038d 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query47.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) -----------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query53.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query53.out index 00c45d333ec013..d0e208d507ea7e 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query53.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query53.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -------------------------------------PhysicalProject ---------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -----------------------------------------PhysicalOlapScan[item] ---------------------------------PhysicalProject -----------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) -------------------------------------PhysicalOlapScan[date_dim] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +--------------------------------------PhysicalOlapScan[item] +------------------------------PhysicalProject +--------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query57.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query57.out index 697bd284f5701e..2193c248142367 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query57.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] -------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) -----------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[call_center] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[item] +----------------------------PhysicalProject +------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +--------------------------------PhysicalOlapScan[date_dim] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[call_center] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query63.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query63.out index 210a97832febf1..0ddc13281ee7c7 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query63.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query63.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -------------------------------------PhysicalProject ---------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -----------------------------------------PhysicalOlapScan[item] ---------------------------------PhysicalProject -----------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) -------------------------------------PhysicalOlapScan[date_dim] -----------------------------PhysicalProject -------------------------------PhysicalOlapScan[store] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +--------------------------------------PhysicalOlapScan[item] +------------------------------PhysicalProject +--------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out index 59ecfa850d8855..523ef0f216b677 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query81.out @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------hashJoin[INNER_JOIN shuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF3 ctr_state->[ctr_state] +--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF3 ctr_state->[ctr_state] ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3 diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query89.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query89.out index 552fbbd3aaf5ae..f1140e16425f92 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query89.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 1999)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 1999)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query92.out b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query92.out index b31b235e379e59..ff2e7b944234b5 100644 --- a/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query92.out +++ b/regression-test/data/shape_check/tpcds_sf100/no_stats_shape/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query1.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query1.out index 777b75ff910eef..d113312e3a90e1 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query1.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query1.out @@ -22,7 +22,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------PhysicalOlapScan[customer] apply RFs: RF3 --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out index 7749fbe25590fa..0c9dabe6acd889 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query32.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query32.out index 7992f57d1c87b3..5626809a46f304 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query32.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query47.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query47.out index 0a70cbcf51c3a2..308ba0651c0641 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query47.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query53.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query53.out index 89dc632eb527c4..1dadda9a7cebc3 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query53.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query53.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query57.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query57.out index ca1f63bfb07616..29cf69ca2fb7aa 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query57.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[call_center] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[call_center] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query63.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query63.out index 9653f6c52199aa..d3dc8b76ab1fe2 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query63.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query63.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out index 76543856508ac0..6f5b2017f23a61 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query81.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query89.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query89.out index 8b0e89ca99b508..f2b5a51e354132 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query89.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 1999)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 1999)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query92.out b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query92.out index b31b235e379e59..ff2e7b944234b5 100644 --- a/regression-test/data/shape_check/tpcds_sf100/rf_prune/query92.out +++ b/regression-test/data/shape_check/tpcds_sf100/rf_prune/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query1.out b/regression-test/data/shape_check/tpcds_sf100/shape/query1.out index 05ecb4a6ff3820..d82ab3b7b3a71a 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query1.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query1.out @@ -22,7 +22,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------PhysicalOlapScan[customer] apply RFs: RF3 --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 RF2 diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query30.out b/regression-test/data/shape_check/tpcds_sf100/shape/query30.out index 002bd9732c26b7..501252be2e9ae0 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query32.out b/regression-test/data/shape_check/tpcds_sf100/shape/query32.out index 7992f57d1c87b3..5626809a46f304 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query32.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query47.out b/regression-test/data/shape_check/tpcds_sf100/shape/query47.out index fba9743e0f2353..23284283b00ccb 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query47.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ss_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[store] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query53.out b/regression-test/data/shape_check/tpcds_sf100/shape/query53.out index 04920e65ac6894..8123493edfe790 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query53.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query53.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query57.out b/regression-test/data/shape_check/tpcds_sf100/shape/query57.out index 152c2c884779f7..249c6244f53d00 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query57.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[cs_item_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[item] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[call_center] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[cs_item_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[item] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[call_center] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query63.out b/regression-test/data/shape_check/tpcds_sf100/shape/query63.out index d4fb4990da98b8..bc33a3efc70028 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query63.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query63.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query81.out b/regression-test/data/shape_check/tpcds_sf100/shape/query81.out index 5baf0f85350f03..769d5e23bc3db8 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query81.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query89.out b/regression-test/data/shape_check/tpcds_sf100/shape/query89.out index 552fbbd3aaf5ae..f1140e16425f92 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query89.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 1999)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Electronics', 'Jewelry', 'Shoes'),i_class IN ('athletic', 'portable', 'semi-precious')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'maternity', 'rock')]] and i_category IN ('Electronics', 'Jewelry', 'Men', 'Music', 'Shoes', 'Women') and i_class IN ('accessories', 'athletic', 'maternity', 'portable', 'rock', 'semi-precious')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 1999)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf100/shape/query92.out b/regression-test/data/shape_check/tpcds_sf100/shape/query92.out index b31b235e379e59..ff2e7b944234b5 100644 --- a/regression-test/data/shape_check/tpcds_sf100/shape/query92.out +++ b/regression-test/data/shape_check/tpcds_sf100/shape/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query1.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query1.out index d6cf263a9ea7e9..a61989e92f9cf8 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query1.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query1.out @@ -22,7 +22,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------PhysicalOlapScan[customer] apply RFs: RF3 --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 RF2 diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out index 64abd569c96874..e113f39e684164 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ctr_customer_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF3 RF4 diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query32.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query32.out index c5af5d4511fa19..b9554f006b4ba4 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query32.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query47.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query47.out index 9a2258d852cebf..d35771e6aef29e 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query47.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2000),AND[(date_dim.d_year = 1999),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2001),(date_dim.d_moy = 1)]] and d_year IN (1999, 2000, 2001)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[item] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2000),AND[(date_dim.d_year = 1999),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2001),(date_dim.d_moy = 1)]] and d_year IN (1999, 2000, 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query57.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query57.out index a2a31216a3d458..769ccf67b17b76 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query57.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF1 cc_call_center_sk->[cs_call_center_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[call_center] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[item] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF1 cc_call_center_sk->[cs_call_center_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[call_center] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out index ed0d2cb8c75887..dc6759a4716103 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query81.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ctr_customer_sk->[c_customer_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query89.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query89.out index a1e8e771fe13f5..ebda092b0512f0 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query89.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query89.out @@ -9,26 +9,25 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('audio', 'history', 'school-uniforms')],AND[i_category IN ('Men', 'Shoes', 'Sports'),i_class IN ('pants', 'tennis', 'womens')]] and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Shoes', 'Sports') and i_class IN ('audio', 'history', 'pants', 'school-uniforms', 'tennis', 'womens')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 2001)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('audio', 'history', 'school-uniforms')],AND[i_category IN ('Men', 'Shoes', 'Sports'),i_class IN ('pants', 'tennis', 'womens')]] and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Shoes', 'Sports') and i_class IN ('audio', 'history', 'pants', 'school-uniforms', 'tennis', 'womens')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] Hint log: Used: leading(store_sales item date_dim store ) diff --git a/regression-test/data/shape_check/tpcds_sf1000/hint/query92.out b/regression-test/data/shape_check/tpcds_sf1000/hint/query92.out index fc4882788d648f..1471354c4e951d 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/hint/query92.out +++ b/regression-test/data/shape_check/tpcds_sf1000/hint/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query1.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query1.out index 7a66b56b70ee27..469e6bf7aa10ff 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query1.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query1.out @@ -22,7 +22,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------PhysicalOlapScan[customer] apply RFs: RF3 --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF2 ctr_store_sk->[ctr_store_sk,s_store_sk] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ctr_store_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF1 RF2 diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out index ffdbebdce5d08e..60a8899c09777a 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query30.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query32.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query32.out index c413940debc6e7..cb7cc0e46364ed 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query32.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query47.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query47.out index 0428d0e8670918..3e2807c6a6a175 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query47.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query47.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2000),AND[(date_dim.d_year = 1999),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2001),(date_dim.d_moy = 1)]] and d_year IN (1999, 2000, 2001)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[item] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2000),AND[(date_dim.d_year = 1999),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2001),(date_dim.d_moy = 1)]] and d_year IN (1999, 2000, 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query53.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query53.out index d2467a65e93e09..731528245cb0a8 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query53.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query53.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query57.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query57.out index 00c01451579574..99a5ab814d3589 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query57.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query57.out @@ -7,25 +7,24 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalQuickSort[LOCAL_SORT] ----------PhysicalWindow ------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] -----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF1 cc_call_center_sk->[cs_call_center_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[call_center] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[item] +--------------hashAgg[GLOBAL] +----------------PhysicalDistribute[DistributionSpecHash] +------------------hashAgg[LOCAL] +--------------------PhysicalProject +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[cs_item_sk] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF1 cc_call_center_sk->[cs_call_center_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[cs_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 +--------------------------------PhysicalProject +----------------------------------filter(OR[(date_dim.d_year = 2001),AND[(date_dim.d_year = 2000),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2002),(date_dim.d_moy = 1)]] and d_year IN (2000, 2001, 2002)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[call_center] +------------------------PhysicalProject +--------------------------PhysicalOlapScan[item] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query63.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query63.out index bbbb80bc4b68e0..e5ef3755356965 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query63.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query63.out @@ -4,29 +4,28 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalDistribute[DistributionSpecHash] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter(d_month_seq IN (1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalDistribute[DistributionSpecHash] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter(d_month_seq IN (1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out index 88c217db90e759..51c8fb41cb9360 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query81.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalDistribute[DistributionSpecGather] ------------PhysicalTopN[LOCAL_SORT] --------------PhysicalProject -----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN broadcast] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF4 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 ca_address_sk->[c_current_addr_sk] ----------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query89.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query89.out index e18adf036aa80e..948e8f265a00e6 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query89.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('audio', 'history', 'school-uniforms')],AND[i_category IN ('Men', 'Shoes', 'Sports'),i_class IN ('pants', 'tennis', 'womens')]] and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Shoes', 'Sports') and i_class IN ('audio', 'history', 'pants', 'school-uniforms', 'tennis', 'womens')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 2001)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('audio', 'history', 'school-uniforms')],AND[i_category IN ('Men', 'Shoes', 'Sports'),i_class IN ('pants', 'tennis', 'womens')]] and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Shoes', 'Sports') and i_class IN ('audio', 'history', 'pants', 'school-uniforms', 'tennis', 'womens')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 2001)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf1000/shape/query92.out b/regression-test/data/shape_check/tpcds_sf1000/shape/query92.out index 39a6db24528f22..f44949b6b8cece 100644 --- a/regression-test/data/shape_check/tpcds_sf1000/shape/query92.out +++ b/regression-test/data/shape_check/tpcds_sf1000/shape/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query1.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query1.out index 25c947e5fbd5c3..a289ae9b2c55d1 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query1.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query1.out @@ -18,7 +18,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ------PhysicalDistribute[DistributionSpecGather] --------PhysicalTopN[LOCAL_SORT] ----------PhysicalProject -------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF3 ctr_store_sk->[ctr_store_sk,s_store_sk] +------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_store_sk = ctr2.ctr_store_sk)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF3 ctr_store_sk->[ctr_store_sk,s_store_sk] --------------PhysicalProject ----------------hashJoin[INNER_JOIN broadcast] hashCondition=((store.s_store_sk = ctr1.ctr_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ctr_store_sk] ------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query30.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query30.out index 001572c66e55b3..3d91ea14800141 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query30.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query30.out @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------------PhysicalProject ----------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 c_customer_sk->[ctr_customer_sk] ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF2 ctr_state->[ctr_state] +--------------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF2 ctr_state->[ctr_state] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3 ----------------------hashAgg[GLOBAL] ------------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query32.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query32.out index 0a251a341c24df..b08ae2d4c58926 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query32.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query32.out @@ -7,7 +7,7 @@ PhysicalResultSink --------PhysicalDistribute[DistributionSpecGather] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(cs_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +--------------filter((cast(cs_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cs_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) ----------------PhysicalWindow ------------------PhysicalQuickSort[LOCAL_SORT] --------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query47.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query47.out index d1c72cdf6f5766..1993741b92a3fe 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query47.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query47.out @@ -2,30 +2,28 @@ -- !ds_shape_47 -- PhysicalCteAnchor ( cteId=CTEId#0 ) --PhysicalCteProducer ( cteId=CTEId#0 ) -----PhysicalProject -------PhysicalWindow ---------PhysicalQuickSort[LOCAL_SORT] -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] +----PhysicalWindow +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalWindow +----------PhysicalQuickSort[LOCAL_SORT] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalOlapScan[item] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[store] +----------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[store] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query53.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query53.out index 08f3ba6e090871..4d739298eab9cf 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query53.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query53.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -------------------------------------PhysicalProject ---------------------------------------filter(d_month_seq IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)) -----------------------------------------PhysicalOlapScan[date_dim] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) ---------------------------------PhysicalOlapScan[item] +--------filter((if((avg_quarterly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_quarterly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_quarterly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter(d_month_seq IN (1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store] +--------------------------PhysicalProject +----------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query57.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query57.out index 697bd284f5701e..70c0a6b711ddb4 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query57.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query57.out @@ -2,30 +2,28 @@ -- !ds_shape_57 -- PhysicalCteAnchor ( cteId=CTEId#0 ) --PhysicalCteProducer ( cteId=CTEId#0 ) -----PhysicalProject -------PhysicalWindow ---------PhysicalQuickSort[LOCAL_SORT] -----------PhysicalWindow -------------PhysicalQuickSort[LOCAL_SORT] ---------------PhysicalProject -----------------hashAgg[GLOBAL] -------------------PhysicalDistribute[DistributionSpecHash] ---------------------hashAgg[LOCAL] +----PhysicalWindow +------PhysicalQuickSort[LOCAL_SORT] +--------PhysicalWindow +----------PhysicalQuickSort[LOCAL_SORT] +------------hashAgg[GLOBAL] +--------------PhysicalDistribute[DistributionSpecHash] +----------------hashAgg[LOCAL] +------------------PhysicalProject +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((call_center.cc_call_center_sk = catalog_sales.cs_call_center_sk)) otherCondition=() build RFs:RF2 cc_call_center_sk->[cs_call_center_sk] +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[cs_sold_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] ------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[cs_item_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF0 RF1 RF2 ------------------------------PhysicalProject ---------------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) -----------------------------------PhysicalOlapScan[date_dim] +--------------------------------PhysicalOlapScan[item] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[call_center] +----------------------------filter(OR[(date_dim.d_year = 1999),AND[(date_dim.d_year = 1998),(date_dim.d_moy = 12)],AND[(date_dim.d_year = 2000),(date_dim.d_moy = 1)]] and d_year IN (1998, 1999, 2000)) +------------------------------PhysicalOlapScan[date_dim] +----------------------PhysicalProject +------------------------PhysicalOlapScan[call_center] --PhysicalResultSink ----PhysicalProject ------PhysicalTopN[MERGE_SORT] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query63.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query63.out index 1ad6a72b1612f8..cc1e6cd144ce99 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query63.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query63.out @@ -4,28 +4,27 @@ PhysicalResultSink --PhysicalTopN[MERGE_SORT] ----PhysicalDistribute[DistributionSpecGather] ------PhysicalTopN[LOCAL_SORT] ---------PhysicalProject -----------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) -------------PhysicalWindow ---------------PhysicalQuickSort[LOCAL_SORT] -----------------PhysicalProject -------------------hashAgg[GLOBAL] ---------------------PhysicalDistribute[DistributionSpecHash] -----------------------hashAgg[LOCAL] -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] ---------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------PhysicalProject ---------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 -------------------------------------PhysicalProject ---------------------------------------filter(d_month_seq IN (1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222)) -----------------------------------------PhysicalOlapScan[date_dim] ---------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store] -----------------------------PhysicalProject -------------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) ---------------------------------PhysicalOlapScan[item] +--------filter((if((avg_monthly_sales > 0.0000), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) +----------PhysicalWindow +------------PhysicalQuickSort[LOCAL_SORT] +--------------PhysicalProject +----------------hashAgg[GLOBAL] +------------------PhysicalDistribute[DistributionSpecHash] +--------------------hashAgg[LOCAL] +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF1 s_store_sk->[ss_store_sk] +------------------------------PhysicalProject +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +----------------------------------PhysicalProject +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +----------------------------------PhysicalProject +------------------------------------filter(d_month_seq IN (1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[store] +--------------------------PhysicalProject +----------------------------filter(OR[AND[i_category IN ('Books', 'Children', 'Electronics'),i_class IN ('personal', 'portable', 'reference', 'self-help'),i_brand IN ('exportiunivamalg #9', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9')],AND[i_category IN ('Men', 'Music', 'Women'),i_class IN ('accessories', 'classical', 'fragrances', 'pants'),i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')]] and i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'exportiunivamalg #9', 'importoamalg #1', 'scholaramalgamalg #14', 'scholaramalgamalg #7', 'scholaramalgamalg #9') and i_category IN ('Books', 'Children', 'Electronics', 'Men', 'Music', 'Women') and i_class IN ('accessories', 'classical', 'fragrances', 'pants', 'personal', 'portable', 'reference', 'self-help')) +------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query81.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query81.out index 887315b443ab47..6a20ddc45af954 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query81.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query81.out @@ -24,7 +24,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------PhysicalProject ------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 ca_address_sk->[c_current_addr_sk] --------------PhysicalProject -----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(cast(ctr_total_return as DECIMALV3(38, 4))) * 1.2))) build RFs:RF3 ctr_state->[ctr_state] +----------------hashJoin[INNER_JOIN shuffleBucket] hashCondition=((ctr1.ctr_state = ctr2.ctr_state)) otherCondition=((cast(ctr_total_return as DECIMALV3(38, 5)) > (avg(ctr_total_return) * 1.2))) build RFs:RF3 ctr_state->[ctr_state] ------------------PhysicalProject --------------------hashJoin[INNER_JOIN shuffle] hashCondition=((ctr1.ctr_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 c_customer_sk->[ctr_customer_sk] ----------------------PhysicalCteConsumer ( cteId=CTEId#0 ) apply RFs: RF2 RF3 diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query89.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query89.out index db4f8eeac80499..2d764abc53ef09 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query89.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query89.out @@ -9,24 +9,23 @@ PhysicalResultSink ------------filter((if(( not (avg_monthly_sales = 0.0000)), (cast(abs((sum_sales - cast(avg_monthly_sales as DECIMALV3(38, 2)))) as DECIMALV3(38, 10)) / avg_monthly_sales), NULL) > 0.100000)) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] ---------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ---------------------------------------PhysicalProject -----------------------------------------filter(OR[AND[i_category IN ('Books', 'Home', 'Music'),i_class IN ('classical', 'fiction', 'glassware')],AND[i_category IN ('Jewelry', 'Sports', 'Women'),i_class IN ('baseball', 'dresses', 'semi-precious')]] and i_category IN ('Books', 'Home', 'Jewelry', 'Music', 'Sports', 'Women') and i_class IN ('baseball', 'classical', 'dresses', 'fiction', 'glassware', 'semi-precious')) -------------------------------------------PhysicalOlapScan[item] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_year = 2000)) ---------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[store] +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF2 s_store_sk->[ss_store_sk] +----------------------------PhysicalProject +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF0 i_item_sk->[ss_item_sk] +------------------------------------PhysicalProject +--------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 +------------------------------------PhysicalProject +--------------------------------------filter(OR[AND[i_category IN ('Books', 'Home', 'Music'),i_class IN ('classical', 'fiction', 'glassware')],AND[i_category IN ('Jewelry', 'Sports', 'Women'),i_class IN ('baseball', 'dresses', 'semi-precious')]] and i_category IN ('Books', 'Home', 'Jewelry', 'Music', 'Sports', 'Women') and i_class IN ('baseball', 'classical', 'dresses', 'fiction', 'glassware', 'semi-precious')) +----------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------filter((date_dim.d_year = 2000)) +------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[store] diff --git a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query92.out b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query92.out index 138971f0b42575..bea3077e4fef9d 100644 --- a/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query92.out +++ b/regression-test/data/shape_check/tpcds_sf10t_orc/shape/query92.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(cast(ws_ext_discount_amt as DECIMALV3(9, 4))) OVER(PARTITION BY i_item_sk)))) +------------filter((cast(ws_ext_discount_amt as DECIMALV3(38, 5)) > (1.3 * avg(ws_ext_discount_amt) OVER(PARTITION BY i_item_sk)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpch_sf1000/hint/q17.out b/regression-test/data/shape_check/tpch_sf1000/hint/q17.out index fdfed1403a5833..7846d2f3b5e569 100644 --- a/regression-test/data/shape_check/tpch_sf1000/hint/q17.out +++ b/regression-test/data/shape_check/tpch_sf1000/hint/q17.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(cast(l_quantity as DECIMALV3(17, 4))) OVER(PARTITION BY p_partkey)))) +------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(l_quantity) OVER(PARTITION BY p_partkey)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q17.out b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q17.out index 5574a3599beb7c..6c1bc1d0fe8fe1 100644 --- a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q17.out +++ b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q17.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(cast(l_quantity as DECIMALV3(17, 4))) OVER(PARTITION BY p_partkey)))) +------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(l_quantity) OVER(PARTITION BY p_partkey)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalProject diff --git a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q17.out b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q17.out index 850c567ab4aa39..1fe52b08aa8035 100644 --- a/regression-test/data/shape_check/tpch_sf1000/rf_prune/q17.out +++ b/regression-test/data/shape_check/tpch_sf1000/rf_prune/q17.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(cast(l_quantity as DECIMALV3(17, 4))) OVER(PARTITION BY p_partkey)))) +------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(l_quantity) OVER(PARTITION BY p_partkey)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpch_sf1000/shape/q17.out b/regression-test/data/shape_check/tpch_sf1000/shape/q17.out index 850c567ab4aa39..1fe52b08aa8035 100644 --- a/regression-test/data/shape_check/tpch_sf1000/shape/q17.out +++ b/regression-test/data/shape_check/tpch_sf1000/shape/q17.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(cast(l_quantity as DECIMALV3(17, 4))) OVER(PARTITION BY p_partkey)))) +------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(l_quantity) OVER(PARTITION BY p_partkey)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q17.out b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q17.out index 5574a3599beb7c..6c1bc1d0fe8fe1 100644 --- a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q17.out +++ b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q17.out @@ -6,7 +6,7 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------hashAgg[LOCAL] ----------PhysicalProject -------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(cast(l_quantity as DECIMALV3(17, 4))) OVER(PARTITION BY p_partkey)))) +------------filter((cast(l_quantity as DECIMALV3(38, 5)) < (0.2 * avg(l_quantity) OVER(PARTITION BY p_partkey)))) --------------PhysicalWindow ----------------PhysicalQuickSort[LOCAL_SORT] ------------------PhysicalProject