be/src/exprs/vdirect_in_predicate.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <algorithm> |
21 | | #include <vector> |
22 | | |
23 | | #include "common/logging.h" |
24 | | #include "common/status.h" |
25 | | #include "core/field.h" |
26 | | #include "core/string_ref.h" |
27 | | #include "core/types.h" |
28 | | #include "exprs/expr_zonemap_filter.h" |
29 | | #include "exprs/hybrid_set.h" |
30 | | #include "exprs/vexpr.h" |
31 | | #include "exprs/vin_predicate.h" |
32 | | #include "exprs/vliteral.h" |
33 | | #include "exprs/vslot_ref.h" |
34 | | |
35 | | namespace doris { |
36 | | |
37 | | class VDirectInPredicate final : public VExpr { |
38 | | ENABLE_FACTORY_CREATOR(VDirectInPredicate); |
39 | | |
40 | | public: |
41 | | VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter, |
42 | | bool hybrid_set_values_match_child_type = true) |
43 | 1.24k | : VExpr(node), |
44 | 1.24k | _filter(filter), |
45 | 1.24k | _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type), |
46 | 1.24k | _expr_name("direct_in_predicate") {} |
47 | 1.24k | ~VDirectInPredicate() override = default; |
48 | | |
49 | | #ifdef BE_TEST |
50 | | VDirectInPredicate() = default; |
51 | | #endif |
52 | | |
53 | | Status prepare(RuntimeState* state, const RowDescriptor& row_desc, |
54 | 1.19k | VExprContext* context) override { |
55 | 1.19k | RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); |
56 | 1.19k | RETURN_IF_ERROR(_materialize_for_zonemap_filter()); |
57 | 1.19k | _prepare_finished = true; |
58 | 1.19k | return Status::OK(); |
59 | 1.19k | } |
60 | | |
61 | | Status open(RuntimeState* state, VExprContext* context, |
62 | 1.59k | FunctionContext::FunctionStateScope scope) override { |
63 | 1.59k | DCHECK(_prepare_finished); |
64 | 1.59k | RETURN_IF_ERROR(VExpr::open(state, context, scope)); |
65 | 1.59k | _open_finished = true; |
66 | 1.59k | return Status::OK(); |
67 | 1.59k | } |
68 | | |
69 | | Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, |
70 | 0 | size_t count, ColumnPtr& result_column) const override { |
71 | 0 | return _do_execute(context, block, nullptr, selector, count, result_column, nullptr); |
72 | 0 | } |
73 | | |
74 | | Status execute_runtime_filter(VExprContext* context, const Block* block, |
75 | | const uint8_t* __restrict filter, size_t count, |
76 | 517 | ColumnPtr& result_column, ColumnPtr* arg_column) const override { |
77 | 517 | return _do_execute(context, block, filter, nullptr, count, result_column, arg_column); |
78 | 517 | } |
79 | | |
80 | 1.19k | const std::string& expr_name() const override { return _expr_name; } |
81 | | |
82 | 1.33k | std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; } |
83 | | |
84 | 0 | ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override { |
85 | 0 | return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values, |
86 | 0 | _seg_filter_min, _seg_filter_max); |
87 | 0 | } |
88 | | |
89 | 275 | bool can_evaluate_zonemap_filter() const override { |
90 | 275 | return _zonemap_materialized && |
91 | 275 | expr_zonemap::can_eval_in_zonemap(get_child(0), _seg_filter_values, _seg_filter_min, |
92 | 274 | _seg_filter_max); |
93 | 275 | } |
94 | | |
95 | 6 | bool get_slot_in_expr(VExprSPtr& new_root) const { |
96 | 6 | if (!_hybrid_set_values_match_child_type) { |
97 | 1 | return false; |
98 | 1 | } |
99 | 5 | if (!get_child(0)->is_slot_ref()) { |
100 | 0 | return false; |
101 | 0 | } |
102 | | |
103 | 5 | auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get()); |
104 | 5 | auto slot_data_type = remove_nullable(slot_ref->data_type()); |
105 | 5 | { |
106 | 5 | TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN); |
107 | 5 | TExprNode node; |
108 | 5 | node.__set_type(type_desc); |
109 | 5 | node.__set_node_type(TExprNodeType::IN_PRED); |
110 | 5 | node.in_predicate.__set_is_not_in(false); |
111 | 5 | node.__set_opcode(TExprOpcode::FILTER_IN); |
112 | | // VdirectInPredicate assume is_nullable = false. |
113 | 5 | node.__set_is_nullable(false); |
114 | 5 | new_root = VInPredicate::create_shared(node); |
115 | 5 | } |
116 | 5 | { |
117 | | // add slot |
118 | 5 | new_root->add_child(children().at(0)); |
119 | 5 | } |
120 | 5 | { |
121 | 5 | auto iter = get_set_func()->begin(); |
122 | 17 | while (iter->has_next()) { |
123 | 12 | DCHECK(iter->get_value() != nullptr); |
124 | 12 | const void* value = iter->get_value(); |
125 | | |
126 | 12 | TExprNode node = _create_texpr_node_from_hybrid_set_value( |
127 | 12 | value, slot_data_type->get_primitive_type(), |
128 | 12 | slot_data_type->get_precision(), slot_data_type->get_scale()); |
129 | 12 | new_root->add_child(VLiteral::create_shared(node)); |
130 | 12 | iter->next(); |
131 | 12 | } |
132 | 5 | } |
133 | 5 | return true; |
134 | 5 | } |
135 | | |
136 | 1.18k | uint64_t get_digest(uint64_t seed) const override { |
137 | 1.18k | seed = _children[0]->get_digest(seed); |
138 | 1.18k | if (seed) { |
139 | 1.18k | return _filter->get_digest(seed); |
140 | 1.18k | } |
141 | 0 | return seed; |
142 | 1.18k | } |
143 | | |
144 | | private: |
145 | | Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter, |
146 | | const Selector* selector, size_t count, ColumnPtr& result_column, |
147 | 517 | ColumnPtr* arg_column) const { |
148 | 517 | DCHECK(_open_finished || block == nullptr); |
149 | 517 | DCHECK(!(filter != nullptr && selector != nullptr)) |
150 | 0 | << "filter and selector can not be both set"; |
151 | 517 | ColumnPtr argument_column; |
152 | 517 | RETURN_IF_ERROR( |
153 | 517 | _children[0]->execute_column(context, block, selector, count, argument_column)); |
154 | 517 | argument_column = argument_column->convert_to_full_column_if_const(); |
155 | | |
156 | 517 | if (arg_column != nullptr) { |
157 | 517 | *arg_column = argument_column; |
158 | 517 | } |
159 | | |
160 | 517 | size_t sz = argument_column->size(); |
161 | 517 | auto res_data_column = ColumnUInt8::create(sz); |
162 | 517 | res_data_column->resize(sz); |
163 | | |
164 | 517 | if (argument_column->is_nullable()) { |
165 | 201 | auto column_nested = static_cast<const ColumnNullable*>(argument_column.get()) |
166 | 201 | ->get_nested_column_ptr(); |
167 | 201 | const auto& null_map = |
168 | 201 | static_cast<const ColumnNullable*>(argument_column.get())->get_null_map_data(); |
169 | 201 | _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(), |
170 | 201 | filter); |
171 | 316 | } else { |
172 | 316 | _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter); |
173 | 316 | } |
174 | | |
175 | 517 | DCHECK(!_data_type->is_nullable()); |
176 | 517 | result_column = std::move(res_data_column); |
177 | 517 | return Status::OK(); |
178 | 517 | } |
179 | | |
180 | 1.20k | Status _materialize_for_zonemap_filter() { |
181 | 1.20k | if (!_hybrid_set_values_match_child_type) { |
182 | 1 | _zonemap_materialized = false; |
183 | 1 | return Status::OK(); |
184 | 1 | } |
185 | 1.20k | DORIS_CHECK(_filter != nullptr); |
186 | 1.20k | auto& filter = *_filter; |
187 | 1.20k | const auto& data_type = remove_nullable(get_child(0)->data_type()); |
188 | 1.20k | _seg_filter_values.clear(); |
189 | 1.20k | auto* iterator = filter.begin(); |
190 | 422k | while (iterator->has_next()) { |
191 | 421k | const void* value = iterator->get_value(); |
192 | 421k | if (value != nullptr) { |
193 | 421k | TExprNode literal_node = _create_texpr_node_from_hybrid_set_value( |
194 | 421k | value, remove_nullable(data_type)->get_primitive_type(), |
195 | 421k | remove_nullable(data_type)->get_precision(), |
196 | 421k | remove_nullable(data_type)->get_scale()); |
197 | 421k | auto literal = VLiteral::create_shared(literal_node); |
198 | 421k | Field field; |
199 | 421k | literal->get_column_ptr()->get(0, field); |
200 | 421k | _seg_filter_values.emplace_back(std::move(field)); |
201 | 421k | } |
202 | 421k | iterator->next(); |
203 | 421k | } |
204 | 1.20k | if (_seg_filter_values.empty()) { |
205 | 69 | _zonemap_materialized = true; |
206 | 69 | return Status::OK(); |
207 | 69 | } |
208 | 1.13k | auto minmax = std::ranges::minmax_element(_seg_filter_values, expr_zonemap::field_less); |
209 | 1.13k | _seg_filter_min = *minmax.min; |
210 | 1.13k | _seg_filter_max = *minmax.max; |
211 | 1.13k | _zonemap_materialized = true; |
212 | 1.13k | return Status::OK(); |
213 | 1.20k | } |
214 | | |
215 | | static TExprNode _create_texpr_node_from_hybrid_set_value(const void* data, |
216 | | const PrimitiveType& type, |
217 | 420k | int precision, int scale) { |
218 | 420k | if (is_string_type(type)) { |
219 | 121 | const auto* value = reinterpret_cast<const StringRef*>(data); |
220 | 121 | auto field = Field::create_field<TYPE_STRING>(String(value->data, value->size)); |
221 | 121 | return create_texpr_node_from(field, type, precision, scale); |
222 | 121 | } |
223 | 420k | return create_texpr_node_from(data, type, precision, scale); |
224 | 420k | } |
225 | | |
226 | | std::shared_ptr<HybridSetBase> _filter; |
227 | | // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the |
228 | | // child slot keeps the original logical type. Such values must not be materialized as child-type |
229 | | // literals for zonemap pruning or slot-IN rewrite. |
230 | | bool _hybrid_set_values_match_child_type = true; |
231 | | std::string _expr_name; |
232 | | bool _zonemap_materialized = false; |
233 | | std::vector<Field> _seg_filter_values; |
234 | | Field _seg_filter_min; |
235 | | Field _seg_filter_max; |
236 | | }; |
237 | | |
238 | | } // namespace doris |