be/src/exprs/vdirect_in_predicate.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <utility> |
21 | | #include <vector> |
22 | | |
23 | | #include "common/logging.h" |
24 | | #include "common/status.h" |
25 | | #include "core/field.h" |
26 | | #include "core/types.h" |
27 | | #include "exprs/expr_zonemap_filter.h" |
28 | | #include "exprs/hybrid_set.h" |
29 | | #include "exprs/vexpr.h" |
30 | | #include "exprs/vin_predicate.h" |
31 | | #include "exprs/vliteral.h" |
32 | | #include "exprs/vslot_ref.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | class VDirectInPredicate final : public VExpr { |
37 | | ENABLE_FACTORY_CREATOR(VDirectInPredicate); |
38 | | |
39 | | public: |
40 | | VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter, |
41 | | bool hybrid_set_values_match_child_type = true) |
42 | 1.11k | : VExpr(node), |
43 | 1.11k | _filter(filter), |
44 | 1.11k | _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type), |
45 | 1.11k | _expr_name("direct_in_predicate") {} |
46 | 1.11k | ~VDirectInPredicate() override = default; |
47 | | |
48 | | #ifdef BE_TEST |
49 | | VDirectInPredicate() = default; |
50 | | #endif |
51 | | |
52 | | Status prepare(RuntimeState* state, const RowDescriptor& row_desc, |
53 | 1.06k | VExprContext* context) override { |
54 | 1.06k | RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context)); |
55 | 1.06k | _prepare_finished = true; |
56 | 1.06k | return Status::OK(); |
57 | 1.06k | } |
58 | | |
59 | | Status open(RuntimeState* state, VExprContext* context, |
60 | 6.58k | FunctionContext::FunctionStateScope scope) override { |
61 | 6.58k | DCHECK(_prepare_finished); |
62 | 6.58k | RETURN_IF_ERROR(VExpr::open(state, context, scope)); |
63 | 6.58k | RETURN_IF_ERROR(_materialize_for_zonemap_filter()); |
64 | 6.58k | _open_finished = true; |
65 | 6.58k | return Status::OK(); |
66 | 6.58k | } |
67 | | |
68 | | Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector, |
69 | 986 | size_t count, ColumnPtr& result_column) const override { |
70 | 986 | return _do_execute(context, block, nullptr, selector, count, result_column, nullptr); |
71 | 986 | } |
72 | | |
73 | | Status execute_runtime_filter(VExprContext* context, const Block* block, |
74 | | const uint8_t* __restrict filter, size_t count, |
75 | 12.2k | ColumnPtr& result_column, ColumnPtr* arg_column) const override { |
76 | 12.2k | return _do_execute(context, block, filter, nullptr, count, result_column, arg_column); |
77 | 12.2k | } |
78 | | |
79 | 922 | const std::string& expr_name() const override { return _expr_name; } |
80 | | |
81 | 1.65k | std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; } |
82 | | |
83 | 3.56k | ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override { |
84 | 3.56k | return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values, |
85 | 3.56k | _seg_filter_min, _seg_filter_max); |
86 | 3.56k | } |
87 | | |
88 | 5.77k | bool can_evaluate_zonemap_filter() const override { |
89 | 5.77k | return _zonemap_materialized && |
90 | 5.77k | std::dynamic_pointer_cast<VSlotRef>(get_child(0)) != nullptr; |
91 | 5.77k | } |
92 | | |
93 | 242 | bool get_slot_in_expr(VExprSPtr& new_root) const { |
94 | 242 | if (!_hybrid_set_values_match_child_type) { |
95 | 1 | return false; |
96 | 1 | } |
97 | 241 | if (!get_child(0)->is_slot_ref()) { |
98 | 0 | return false; |
99 | 0 | } |
100 | | |
101 | 241 | auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get()); |
102 | 241 | auto slot_data_type = remove_nullable(slot_ref->data_type()); |
103 | 241 | { |
104 | 241 | TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN); |
105 | 241 | TExprNode node; |
106 | 241 | node.__set_type(type_desc); |
107 | 241 | node.__set_node_type(TExprNodeType::IN_PRED); |
108 | 241 | node.in_predicate.__set_is_not_in(false); |
109 | 241 | node.__set_opcode(TExprOpcode::FILTER_IN); |
110 | | // VdirectInPredicate assume is_nullable = false. |
111 | 241 | node.__set_is_nullable(false); |
112 | 241 | new_root = VInPredicate::create_shared(node); |
113 | 241 | } |
114 | 241 | { |
115 | | // add slot |
116 | 241 | new_root->add_child(children().at(0)); |
117 | 241 | } |
118 | 241 | { |
119 | 241 | auto iter = get_set_func()->begin(); |
120 | 1.42k | while (iter->has_next()) { |
121 | 1.18k | DCHECK(iter->get_value() != nullptr); |
122 | 1.18k | const void* value = iter->get_value(); |
123 | | |
124 | 1.18k | TExprNode node = expr_zonemap::create_texpr_node_from_hybrid_set_value( |
125 | 1.18k | value, slot_data_type->get_primitive_type(), |
126 | 1.18k | slot_data_type->get_precision(), slot_data_type->get_scale()); |
127 | 1.18k | new_root->add_child(VLiteral::create_shared(node)); |
128 | 1.18k | iter->next(); |
129 | 1.18k | } |
130 | 241 | } |
131 | 241 | return true; |
132 | 241 | } |
133 | | |
134 | 748 | uint64_t get_digest(uint64_t seed) const override { |
135 | 748 | seed = _children[0]->get_digest(seed); |
136 | 748 | if (seed) { |
137 | 748 | return _filter->get_digest(seed); |
138 | 748 | } |
139 | 0 | return seed; |
140 | 748 | } |
141 | | |
142 | | private: |
143 | | Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter, |
144 | | const Selector* selector, size_t count, ColumnPtr& result_column, |
145 | 13.1k | ColumnPtr* arg_column) const { |
146 | 13.1k | DCHECK(_open_finished || block == nullptr); |
147 | 13.1k | DCHECK(!(filter != nullptr && selector != nullptr)) |
148 | 0 | << "filter and selector can not be both set"; |
149 | 13.1k | ColumnPtr argument_column; |
150 | 13.1k | RETURN_IF_ERROR( |
151 | 13.1k | _children[0]->execute_column(context, block, selector, count, argument_column)); |
152 | 13.1k | argument_column = argument_column->convert_to_full_column_if_const(); |
153 | | |
154 | 13.1k | if (arg_column != nullptr) { |
155 | 12.2k | *arg_column = argument_column; |
156 | 12.2k | } |
157 | | |
158 | 13.1k | size_t sz = argument_column->size(); |
159 | 13.1k | auto res_data_column = ColumnUInt8::create(sz); |
160 | 13.1k | res_data_column->resize(sz); |
161 | | |
162 | 13.1k | if (const auto* nullable = check_and_get_column<ColumnNullable>(argument_column.get())) { |
163 | 13.1k | auto column_nested = nullable->get_nested_column_ptr(); |
164 | 13.1k | const auto& null_map = nullable->get_null_map_data(); |
165 | 13.1k | _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(), |
166 | 13.1k | filter); |
167 | 13.1k | } else { |
168 | 0 | _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter); |
169 | 0 | } |
170 | | |
171 | 13.1k | DCHECK(!_data_type->is_nullable()); |
172 | 13.1k | result_column = std::move(res_data_column); |
173 | 13.1k | return Status::OK(); |
174 | 13.1k | } |
175 | | |
176 | 6.58k | Status _materialize_for_zonemap_filter() { |
177 | 6.58k | if (!_hybrid_set_values_match_child_type) { |
178 | 145 | _zonemap_materialized = false; |
179 | 145 | return Status::OK(); |
180 | 145 | } |
181 | 6.44k | DORIS_CHECK(_filter != nullptr); |
182 | 6.44k | auto& filter = *_filter; |
183 | 6.44k | const auto& data_type = remove_nullable(get_child(0)->data_type()); |
184 | 6.44k | expr_zonemap::InZonemapMaterializedSet materialized; |
185 | 6.44k | RETURN_IF_ERROR(expr_zonemap::materialize_hybrid_set_for_zonemap_filter(filter, data_type, |
186 | 6.44k | &materialized)); |
187 | 6.44k | _seg_filter_values = std::move(materialized.values); |
188 | 6.44k | _seg_filter_min = std::move(materialized.min_value); |
189 | 6.44k | _seg_filter_max = std::move(materialized.max_value); |
190 | 6.44k | _zonemap_materialized = true; |
191 | 6.44k | return Status::OK(); |
192 | 6.44k | } |
193 | | |
194 | | std::shared_ptr<HybridSetBase> _filter; |
195 | | // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the |
196 | | // child slot keeps the original logical type. Such values must not be materialized as child-type |
197 | | // literals for zonemap pruning or slot-IN rewrite. |
198 | | bool _hybrid_set_values_match_child_type = true; |
199 | | std::string _expr_name; |
200 | | bool _zonemap_materialized = false; |
201 | | std::vector<Field> _seg_filter_values; |
202 | | Field _seg_filter_min; |
203 | | Field _seg_filter_max; |
204 | | }; |
205 | | |
206 | | } // namespace doris |