Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <utility> |
21 | | #include <vector> |
22 | | |
23 | | #include "common/logging.h" |
24 | | #include "common/status.h" |
25 | | #include "core/field.h" |
26 | | #include "core/types.h" |
27 | | #include "exprs/expr_zonemap_filter.h" |
28 | | #include "exprs/hybrid_set.h" |
29 | | #include "exprs/vexpr.h" |
30 | | #include "exprs/vin_predicate.h" |
31 | | #include "exprs/vliteral.h" |
32 | | #include "exprs/vslot_ref.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | class VDirectInPredicate final : public VExpr { |
37 | | ENABLE_FACTORY_CREATOR(VDirectInPredicate); |
38 | | |
39 | | public: |
40 | | // `hybrid_set_values_match_child_type` tells whether values in `filter` can be interpreted with |
41 | | // the child expression type. Parquet/ORC dictionary-filter rewrites evaluate the original |
42 | | // logical predicate against dictionary entries and then rewrite it to matched physical |
43 | | // dictionary codes, for example `col IN ('a', 'b')` becomes `dict_code IN (0, 1)`. In that |
44 | | // shape the HybridSet stores TYPE_INT dictionary codes while the child slot still has the |
45 | | // original logical type such as STRING. Callers must pass false to disable zonemap |
46 | | // materialization and slot-IN rewrite that would otherwise rebuild child-typed literals from |
47 | | // dictionary codes. |
48 | | VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter, |
49 | | bool hybrid_set_values_match_child_type) |
50 | 3.38k | : VExpr(node), |
51 | 3.38k | _filter(filter), |
52 | 3.38k | _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type), |
53 | 3.38k | _expr_name("direct_in_predicate") {} |
54 | 3.39k | ~VDirectInPredicate() override = default; |
55 | | |
56 | | #ifdef BE_TEST |
57 | | VDirectInPredicate() = default; |
58 | | #endif |
59 | | |
60 | | Status prepare(RuntimeState* state, const RowDescriptor& row_desc, |
61 | 3.32k | VExprContext* context) override { |
62 | 3.32k | RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); |
63 | 3.32k | RETURN_IF_ERROR(_materialize_for_zonemap_filter()); |
64 | 3.32k | _prepare_finished = true; |
65 | 3.32k | return Status::OK(); |
66 | 3.32k | } |
67 | | |
68 | | Status open(RuntimeState* state, VExprContext* context, |
69 | 9.45k | FunctionContext::FunctionStateScope scope) override { |
70 | 9.45k | DCHECK(_prepare_finished); |
71 | 9.45k | RETURN_IF_ERROR(VExpr::open(state, context, scope)); |
72 | 9.45k | _open_finished = true; |
73 | 9.45k | return Status::OK(); |
74 | 9.45k | } |
75 | | |
76 | | Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, |
77 | 986 | size_t count, ColumnPtr& result_column) const override { |
78 | 986 | return _do_execute(context, block, nullptr, selector, count, result_column, nullptr); |
79 | 986 | } |
80 | | |
81 | | Status execute_runtime_filter(VExprContext* context, const Block* block, |
82 | | const uint8_t* __restrict filter, size_t count, |
83 | 25.3k | ColumnPtr& result_column, ColumnPtr* arg_column) const override { |
84 | 25.3k | return _do_execute(context, block, filter, nullptr, count, result_column, arg_column); |
85 | 25.3k | } |
86 | | |
87 | 3.18k | const std::string& expr_name() const override { return _expr_name; } |
88 | | |
89 | 3.70k | std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; } |
90 | | |
91 | 5.96k | ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override { |
92 | 5.96k | return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values, |
93 | 5.96k | _seg_filter_min, _seg_filter_max); |
94 | 5.96k | } |
95 | | |
96 | 8.73k | bool can_evaluate_zonemap_filter() const override { |
97 | 8.73k | return _zonemap_materialized && |
98 | 8.73k | std::dynamic_pointer_cast<VSlotRef>(get_child(0)) != nullptr; |
99 | 8.73k | } |
100 | | |
101 | 242 | bool get_slot_in_expr(VExprSPtr& new_root) const { |
102 | 242 | if (!_hybrid_set_values_match_child_type) { |
103 | 1 | return false; |
104 | 1 | } |
105 | 241 | if (!get_child(0)->is_slot_ref()) { |
106 | 0 | return false; |
107 | 0 | } |
108 | | |
109 | 241 | auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get()); |
110 | 241 | auto slot_data_type = remove_nullable(slot_ref->data_type()); |
111 | 241 | { |
112 | 241 | TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN); |
113 | 241 | TExprNode node; |
114 | 241 | node.__set_type(type_desc); |
115 | 241 | node.__set_node_type(TExprNodeType::IN_PRED); |
116 | 241 | node.in_predicate.__set_is_not_in(false); |
117 | 241 | node.__set_opcode(TExprOpcode::FILTER_IN); |
118 | | // VdirectInPredicate assume is_nullable = false. |
119 | 241 | node.__set_is_nullable(false); |
120 | 241 | new_root = VInPredicate::create_shared(node); |
121 | 241 | } |
122 | 241 | { |
123 | | // add slot |
124 | 241 | new_root->add_child(children().at(0)); |
125 | 241 | } |
126 | 241 | { |
127 | 241 | auto iter = get_set_func()->begin(); |
128 | 1.42k | while (iter->has_next()) { |
129 | 1.18k | DCHECK(iter->get_value() != nullptr); |
130 | 1.18k | const void* value = iter->get_value(); |
131 | | |
132 | 1.18k | TExprNode node = expr_zonemap::create_texpr_node_from_hybrid_set_value( |
133 | 1.18k | value, slot_data_type->get_primitive_type(), |
134 | 1.18k | slot_data_type->get_precision(), slot_data_type->get_scale()); |
135 | 1.18k | new_root->add_child(VLiteral::create_shared(node)); |
136 | 1.18k | iter->next(); |
137 | 1.18k | } |
138 | 241 | } |
139 | 241 | return true; |
140 | 241 | } |
141 | | |
142 | 2.85k | uint64_t get_digest(uint64_t seed) const override { |
143 | 2.85k | seed = _children[0]->get_digest(seed); |
144 | 2.85k | if (seed) { |
145 | 2.85k | return _filter->get_digest(seed); |
146 | 2.85k | } |
147 | 2 | return seed; |
148 | 2.85k | } |
149 | | |
150 | | private: |
151 | | Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter, |
152 | | const Selector* selector, size_t count, ColumnPtr& result_column, |
153 | 26.3k | ColumnPtr* arg_column) const { |
154 | 26.3k | DCHECK(_open_finished || block == nullptr); |
155 | 26.3k | DCHECK(!(filter != nullptr && selector != nullptr)) |
156 | 0 | << "filter and selector can not be both set"; |
157 | 26.3k | ColumnPtr argument_column; |
158 | 26.3k | RETURN_IF_ERROR( |
159 | 26.3k | _children[0]->execute_column(context, block, selector, count, argument_column)); |
160 | 26.3k | argument_column = argument_column->convert_to_full_column_if_const(); |
161 | | |
162 | 26.3k | if (arg_column != nullptr) { |
163 | 25.3k | *arg_column = argument_column; |
164 | 25.3k | } |
165 | | |
166 | 26.3k | size_t sz = argument_column->size(); |
167 | 26.3k | auto res_data_column = ColumnUInt8::create(sz); |
168 | 26.3k | res_data_column->resize(sz); |
169 | | |
170 | 26.3k | if (const auto* nullable = check_and_get_column<ColumnNullable>(argument_column.get())) { |
171 | 26.0k | auto column_nested = nullable->get_nested_column_ptr(); |
172 | 26.0k | const auto& null_map = nullable->get_null_map_data(); |
173 | 26.0k | _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(), |
174 | 26.0k | filter); |
175 | 26.0k | } else { |
176 | 334 | _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter); |
177 | 334 | } |
178 | | |
179 | 26.3k | DCHECK(!_data_type->is_nullable()); |
180 | 26.3k | result_column = std::move(res_data_column); |
181 | 26.3k | return Status::OK(); |
182 | 26.3k | } |
183 | | |
184 | 3.32k | Status _materialize_for_zonemap_filter() { |
185 | 3.32k | if (!_hybrid_set_values_match_child_type) { |
186 | 145 | _zonemap_materialized = false; |
187 | 145 | return Status::OK(); |
188 | 145 | } |
189 | 3.18k | DORIS_CHECK(_filter != nullptr); |
190 | 3.18k | auto& filter = *_filter; |
191 | 3.18k | const auto& data_type = remove_nullable(get_child(0)->data_type()); |
192 | 3.18k | expr_zonemap::InZonemapMaterializedSet materialized; |
193 | 3.18k | RETURN_IF_ERROR(expr_zonemap::materialize_hybrid_set_for_zonemap_filter(filter, data_type, |
194 | 3.18k | &materialized)); |
195 | 3.18k | _seg_filter_values = std::move(materialized.values); |
196 | 3.18k | _seg_filter_min = std::move(materialized.min_value); |
197 | 3.18k | _seg_filter_max = std::move(materialized.max_value); |
198 | 3.18k | _zonemap_materialized = true; |
199 | 3.18k | return Status::OK(); |
200 | 3.18k | } |
201 | | |
202 | | std::shared_ptr<HybridSetBase> _filter; |
203 | | // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the |
204 | | // child slot keeps the original logical type. Such values must not be materialized as child-type |
205 | | // literals for zonemap pruning or slot-IN rewrite. |
206 | | bool _hybrid_set_values_match_child_type = true; |
207 | | std::string _expr_name; |
208 | | bool _zonemap_materialized = false; |
209 | | std::vector<Field> _seg_filter_values; |
210 | | Field _seg_filter_min; |
211 | | Field _seg_filter_max; |
212 | | }; |
213 | | |
214 | | } // namespace doris |