be/src/exprs/vdirect_in_predicate.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <utility> |
21 | | #include <vector> |
22 | | |
23 | | #include "common/logging.h" |
24 | | #include "common/status.h" |
25 | | #include "core/field.h" |
26 | | #include "core/types.h" |
27 | | #include "exprs/expr_zonemap_filter.h" |
28 | | #include "exprs/hybrid_set.h" |
29 | | #include "exprs/vexpr.h" |
30 | | #include "exprs/vin_predicate.h" |
31 | | #include "exprs/vliteral.h" |
32 | | #include "exprs/vslot_ref.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | class VDirectInPredicate final : public VExpr { |
37 | | ENABLE_FACTORY_CREATOR(VDirectInPredicate); |
38 | | |
39 | | public: |
40 | | // `hybrid_set_values_match_child_type` tells whether values in `filter` can be interpreted with |
41 | | // the child expression type. Parquet/ORC dictionary-filter rewrites evaluate the original |
42 | | // logical predicate against dictionary entries and then rewrite it to matched physical |
43 | | // dictionary codes, for example `col IN ('a', 'b')` becomes `dict_code IN (0, 1)`. In that |
44 | | // shape the HybridSet stores TYPE_INT dictionary codes while the child slot still has the |
45 | | // original logical type such as STRING. Callers must pass false to disable zonemap |
46 | | // materialization and slot-IN rewrite that would otherwise rebuild child-typed literals from |
47 | | // dictionary codes. |
48 | | VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter, |
49 | | bool hybrid_set_values_match_child_type = true) |
50 | 9.40k | : VExpr(node), |
51 | 9.40k | _filter(filter), |
52 | 9.40k | _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type), |
53 | 9.40k | _expr_name("direct_in_predicate") {} |
54 | 9.41k | ~VDirectInPredicate() override = default; |
55 | | |
56 | | #ifdef BE_TEST |
57 | | VDirectInPredicate() = default; |
58 | | #endif |
59 | | |
60 | | Status prepare(RuntimeState* state, const RowDescriptor& row_desc, |
61 | 4.14k | VExprContext* context) override { |
62 | 4.14k | RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); |
63 | 4.14k | RETURN_IF_ERROR(_materialize_for_zonemap_filter()); |
64 | 4.14k | _prepare_finished = true; |
65 | 4.14k | return Status::OK(); |
66 | 4.14k | } |
67 | | |
68 | | Status open(RuntimeState* state, VExprContext* context, |
69 | 9.98k | FunctionContext::FunctionStateScope scope) override { |
70 | 9.98k | DCHECK(_prepare_finished); |
71 | 9.98k | RETURN_IF_ERROR(VExpr::open(state, context, scope)); |
72 | 9.98k | _open_finished = true; |
73 | 9.98k | return Status::OK(); |
74 | 9.98k | } |
75 | | |
76 | | Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, |
77 | 1.07k | size_t count, ColumnPtr& result_column) const override { |
78 | 1.07k | return _do_execute(context, block, nullptr, selector, count, result_column, nullptr); |
79 | 1.07k | } |
80 | | |
81 | | Status execute_runtime_filter(VExprContext* context, const Block* block, |
82 | | const uint8_t* __restrict filter, size_t count, |
83 | 16.5k | ColumnPtr& result_column, ColumnPtr* arg_column) const override { |
84 | 16.5k | return _do_execute(context, block, filter, nullptr, count, result_column, arg_column); |
85 | 16.5k | } |
86 | | |
87 | 4.00k | const std::string& expr_name() const override { return _expr_name; } |
88 | | |
89 | 3.35k | std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; } |
90 | | |
91 | 5 | ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override { |
92 | 5 | return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values, |
93 | 5 | _seg_filter_min, _seg_filter_max); |
94 | 5 | } |
95 | | |
96 | 819 | bool can_evaluate_zonemap_filter() const override { |
97 | 819 | return _zonemap_materialized && |
98 | 819 | std::dynamic_pointer_cast<VSlotRef>(get_child(0)) != nullptr; |
99 | 819 | } |
100 | | |
101 | 6.27k | Status clone_node(VExprSPtr* cloned_expr) const override { |
102 | 6.27k | DORIS_CHECK(cloned_expr != nullptr); |
103 | 6.27k | *cloned_expr = VDirectInPredicate::create_shared(clone_texpr_node(), _filter, |
104 | 6.27k | _hybrid_set_values_match_child_type); |
105 | 6.27k | return Status::OK(); |
106 | 6.27k | } |
107 | | |
108 | 240 | bool get_slot_in_expr(VExprSPtr& new_root) const { |
109 | 240 | if (!_hybrid_set_values_match_child_type) { |
110 | 1 | return false; |
111 | 1 | } |
112 | 239 | if (!get_child(0)->is_slot_ref()) { |
113 | 0 | return false; |
114 | 0 | } |
115 | | |
116 | 239 | auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get()); |
117 | 239 | auto slot_data_type = remove_nullable(slot_ref->data_type()); |
118 | 239 | { |
119 | 239 | TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN); |
120 | 239 | TExprNode node; |
121 | 239 | node.__set_type(type_desc); |
122 | 239 | node.__set_node_type(TExprNodeType::IN_PRED); |
123 | 239 | node.in_predicate.__set_is_not_in(false); |
124 | 239 | node.__set_opcode(TExprOpcode::FILTER_IN); |
125 | | // VdirectInPredicate assume is_nullable = false. |
126 | 239 | node.__set_is_nullable(false); |
127 | 239 | new_root = VInPredicate::create_shared(node); |
128 | 239 | } |
129 | 239 | { |
130 | | // add slot |
131 | 239 | new_root->add_child(children().at(0)); |
132 | 239 | } |
133 | 239 | { |
134 | 239 | auto iter = get_set_func()->begin(); |
135 | 1.42k | while (iter->has_next()) { |
136 | 1.18k | DCHECK(iter->get_value() != nullptr); |
137 | 1.18k | const void* value = iter->get_value(); |
138 | | |
139 | 1.18k | TExprNode node = expr_zonemap::create_texpr_node_from_hybrid_set_value( |
140 | 1.18k | value, slot_data_type->get_primitive_type(), |
141 | 1.18k | slot_data_type->get_precision(), slot_data_type->get_scale()); |
142 | 1.18k | new_root->add_child(VLiteral::create_shared(node)); |
143 | 1.18k | iter->next(); |
144 | 1.18k | } |
145 | 239 | } |
146 | 239 | return true; |
147 | 239 | } |
148 | | |
149 | 2.48k | uint64_t get_digest(uint64_t seed) const override { |
150 | 2.48k | seed = _children[0]->get_digest(seed); |
151 | 2.48k | if (seed) { |
152 | 2.48k | return _filter->get_digest(seed); |
153 | 2.48k | } |
154 | 8 | return seed; |
155 | 2.48k | } |
156 | | |
157 | | private: |
158 | | Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter, |
159 | | const Selector* selector, size_t count, ColumnPtr& result_column, |
160 | 17.6k | ColumnPtr* arg_column) const { |
161 | 17.6k | DCHECK(_open_finished || block == nullptr); |
162 | 17.6k | DCHECK(!(filter != nullptr && selector != nullptr)) |
163 | 0 | << "filter and selector can not be both set"; |
164 | 17.6k | ColumnPtr argument_column; |
165 | 17.6k | RETURN_IF_ERROR( |
166 | 17.6k | _children[0]->execute_column(context, block, selector, count, argument_column)); |
167 | 17.6k | argument_column = argument_column->convert_to_full_column_if_const(); |
168 | | |
169 | 17.6k | if (arg_column != nullptr) { |
170 | 16.5k | *arg_column = argument_column; |
171 | 16.5k | } |
172 | | |
173 | 17.6k | size_t sz = argument_column->size(); |
174 | 17.6k | auto res_data_column = ColumnUInt8::create(sz); |
175 | 17.6k | res_data_column->resize(sz); |
176 | | |
177 | 17.6k | if (const auto* nullable = check_and_get_column<ColumnNullable>(argument_column.get())) { |
178 | 17.6k | auto column_nested = nullable->get_nested_column_ptr(); |
179 | 17.6k | const auto& null_map = nullable->get_null_map_data(); |
180 | 17.6k | _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(), |
181 | 17.6k | filter); |
182 | 17.6k | } else { |
183 | 22 | _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter); |
184 | 22 | } |
185 | | |
186 | 17.6k | DCHECK(!_data_type->is_nullable()); |
187 | 17.6k | result_column = std::move(res_data_column); |
188 | 17.6k | return Status::OK(); |
189 | 17.6k | } |
190 | | |
191 | 4.14k | Status _materialize_for_zonemap_filter() { |
192 | 4.14k | if (!_hybrid_set_values_match_child_type) { |
193 | 137 | _zonemap_materialized = false; |
194 | 137 | return Status::OK(); |
195 | 137 | } |
196 | 4.00k | DORIS_CHECK(_filter != nullptr); |
197 | 4.00k | auto& filter = *_filter; |
198 | 4.00k | const auto& data_type = remove_nullable(get_child(0)->data_type()); |
199 | 4.00k | expr_zonemap::InZonemapMaterializedSet materialized; |
200 | 4.00k | RETURN_IF_ERROR(expr_zonemap::materialize_hybrid_set_for_zonemap_filter(filter, data_type, |
201 | 4.00k | &materialized)); |
202 | 4.00k | _seg_filter_values = std::move(materialized.values); |
203 | 4.00k | _seg_filter_min = std::move(materialized.min_value); |
204 | 4.00k | _seg_filter_max = std::move(materialized.max_value); |
205 | 4.00k | _zonemap_materialized = true; |
206 | 4.00k | return Status::OK(); |
207 | 4.00k | } |
208 | | |
209 | | std::shared_ptr<HybridSetBase> _filter; |
210 | | // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the |
211 | | // child slot keeps the original logical type. Such values must not be materialized as child-type |
212 | | // literals for zonemap pruning or slot-IN rewrite. |
213 | | bool _hybrid_set_values_match_child_type = true; |
214 | | std::string _expr_name; |
215 | | bool _zonemap_materialized = false; |
216 | | std::vector<Field> _seg_filter_values; |
217 | | Field _seg_filter_min; |
218 | | Field _seg_filter_max; |
219 | | }; |
220 | | |
221 | | } // namespace doris |