Coverage Report

Created: 2026-06-12 03:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vdirect_in_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <vector>
22
23
#include "common/logging.h"
24
#include "common/status.h"
25
#include "core/field.h"
26
#include "core/string_ref.h"
27
#include "core/types.h"
28
#include "exprs/expr_zonemap_filter.h"
29
#include "exprs/hybrid_set.h"
30
#include "exprs/vexpr.h"
31
#include "exprs/vin_predicate.h"
32
#include "exprs/vliteral.h"
33
#include "exprs/vslot_ref.h"
34
35
namespace doris {
36
37
class VDirectInPredicate final : public VExpr {
38
    ENABLE_FACTORY_CREATOR(VDirectInPredicate);
39
40
public:
41
    VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter,
42
                       bool hybrid_set_values_match_child_type = true)
43
1.24k
            : VExpr(node),
44
1.24k
              _filter(filter),
45
1.24k
              _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type),
46
1.24k
              _expr_name("direct_in_predicate") {}
47
1.24k
    ~VDirectInPredicate() override = default;
48
49
#ifdef BE_TEST
50
    VDirectInPredicate() = default;
51
#endif
52
53
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
54
1.19k
                   VExprContext* context) override {
55
1.19k
        RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context));
56
1.19k
        RETURN_IF_ERROR(_materialize_for_zonemap_filter());
57
1.19k
        _prepare_finished = true;
58
1.19k
        return Status::OK();
59
1.19k
    }
60
61
    Status open(RuntimeState* state, VExprContext* context,
62
1.59k
                FunctionContext::FunctionStateScope scope) override {
63
1.59k
        DCHECK(_prepare_finished);
64
1.59k
        RETURN_IF_ERROR(VExpr::open(state, context, scope));
65
1.59k
        _open_finished = true;
66
1.59k
        return Status::OK();
67
1.59k
    }
68
69
    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
70
0
                               size_t count, ColumnPtr& result_column) const override {
71
0
        return _do_execute(context, block, nullptr, selector, count, result_column, nullptr);
72
0
    }
73
74
    Status execute_runtime_filter(VExprContext* context, const Block* block,
75
                                  const uint8_t* __restrict filter, size_t count,
76
517
                                  ColumnPtr& result_column, ColumnPtr* arg_column) const override {
77
517
        return _do_execute(context, block, filter, nullptr, count, result_column, arg_column);
78
517
    }
79
80
1.19k
    const std::string& expr_name() const override { return _expr_name; }
81
82
1.33k
    std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; }
83
84
0
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override {
85
0
        return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values,
86
0
                                             _seg_filter_min, _seg_filter_max);
87
0
    }
88
89
275
    bool can_evaluate_zonemap_filter() const override {
90
275
        return _zonemap_materialized &&
91
275
               expr_zonemap::can_eval_in_zonemap(get_child(0), _seg_filter_values, _seg_filter_min,
92
274
                                                 _seg_filter_max);
93
275
    }
94
95
6
    bool get_slot_in_expr(VExprSPtr& new_root) const {
96
6
        if (!_hybrid_set_values_match_child_type) {
97
1
            return false;
98
1
        }
99
5
        if (!get_child(0)->is_slot_ref()) {
100
0
            return false;
101
0
        }
102
103
5
        auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
104
5
        auto slot_data_type = remove_nullable(slot_ref->data_type());
105
5
        {
106
5
            TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN);
107
5
            TExprNode node;
108
5
            node.__set_type(type_desc);
109
5
            node.__set_node_type(TExprNodeType::IN_PRED);
110
5
            node.in_predicate.__set_is_not_in(false);
111
5
            node.__set_opcode(TExprOpcode::FILTER_IN);
112
            // VdirectInPredicate assume is_nullable = false.
113
5
            node.__set_is_nullable(false);
114
5
            new_root = VInPredicate::create_shared(node);
115
5
        }
116
5
        {
117
            // add slot
118
5
            new_root->add_child(children().at(0));
119
5
        }
120
5
        {
121
5
            auto iter = get_set_func()->begin();
122
17
            while (iter->has_next()) {
123
12
                DCHECK(iter->get_value() != nullptr);
124
12
                const void* value = iter->get_value();
125
126
12
                TExprNode node = _create_texpr_node_from_hybrid_set_value(
127
12
                        value, slot_data_type->get_primitive_type(),
128
12
                        slot_data_type->get_precision(), slot_data_type->get_scale());
129
12
                new_root->add_child(VLiteral::create_shared(node));
130
12
                iter->next();
131
12
            }
132
5
        }
133
5
        return true;
134
5
    }
135
136
1.18k
    uint64_t get_digest(uint64_t seed) const override {
137
1.18k
        seed = _children[0]->get_digest(seed);
138
1.18k
        if (seed) {
139
1.18k
            return _filter->get_digest(seed);
140
1.18k
        }
141
0
        return seed;
142
1.18k
    }
143
144
private:
145
    Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter,
146
                       const Selector* selector, size_t count, ColumnPtr& result_column,
147
517
                       ColumnPtr* arg_column) const {
148
517
        DCHECK(_open_finished || block == nullptr);
149
517
        DCHECK(!(filter != nullptr && selector != nullptr))
150
0
                << "filter and selector can not be both set";
151
517
        ColumnPtr argument_column;
152
517
        RETURN_IF_ERROR(
153
517
                _children[0]->execute_column(context, block, selector, count, argument_column));
154
517
        argument_column = argument_column->convert_to_full_column_if_const();
155
156
517
        if (arg_column != nullptr) {
157
517
            *arg_column = argument_column;
158
517
        }
159
160
517
        size_t sz = argument_column->size();
161
517
        auto res_data_column = ColumnUInt8::create(sz);
162
517
        res_data_column->resize(sz);
163
164
517
        if (argument_column->is_nullable()) {
165
201
            auto column_nested = static_cast<const ColumnNullable*>(argument_column.get())
166
201
                                         ->get_nested_column_ptr();
167
201
            const auto& null_map =
168
201
                    static_cast<const ColumnNullable*>(argument_column.get())->get_null_map_data();
169
201
            _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(),
170
201
                                         filter);
171
316
        } else {
172
316
            _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter);
173
316
        }
174
175
517
        DCHECK(!_data_type->is_nullable());
176
517
        result_column = std::move(res_data_column);
177
517
        return Status::OK();
178
517
    }
179
180
1.20k
    Status _materialize_for_zonemap_filter() {
181
1.20k
        if (!_hybrid_set_values_match_child_type) {
182
1
            _zonemap_materialized = false;
183
1
            return Status::OK();
184
1
        }
185
1.20k
        DORIS_CHECK(_filter != nullptr);
186
1.20k
        auto& filter = *_filter;
187
1.20k
        const auto& data_type = remove_nullable(get_child(0)->data_type());
188
1.20k
        _seg_filter_values.clear();
189
1.20k
        auto* iterator = filter.begin();
190
422k
        while (iterator->has_next()) {
191
421k
            const void* value = iterator->get_value();
192
421k
            if (value != nullptr) {
193
421k
                TExprNode literal_node = _create_texpr_node_from_hybrid_set_value(
194
421k
                        value, remove_nullable(data_type)->get_primitive_type(),
195
421k
                        remove_nullable(data_type)->get_precision(),
196
421k
                        remove_nullable(data_type)->get_scale());
197
421k
                auto literal = VLiteral::create_shared(literal_node);
198
421k
                Field field;
199
421k
                literal->get_column_ptr()->get(0, field);
200
421k
                _seg_filter_values.emplace_back(std::move(field));
201
421k
            }
202
421k
            iterator->next();
203
421k
        }
204
1.20k
        if (_seg_filter_values.empty()) {
205
69
            _zonemap_materialized = true;
206
69
            return Status::OK();
207
69
        }
208
1.13k
        auto minmax = std::ranges::minmax_element(_seg_filter_values, expr_zonemap::field_less);
209
1.13k
        _seg_filter_min = *minmax.min;
210
1.13k
        _seg_filter_max = *minmax.max;
211
1.13k
        _zonemap_materialized = true;
212
1.13k
        return Status::OK();
213
1.20k
    }
214
215
    static TExprNode _create_texpr_node_from_hybrid_set_value(const void* data,
216
                                                              const PrimitiveType& type,
217
420k
                                                              int precision, int scale) {
218
420k
        if (is_string_type(type)) {
219
121
            const auto* value = reinterpret_cast<const StringRef*>(data);
220
121
            auto field = Field::create_field<TYPE_STRING>(String(value->data, value->size));
221
121
            return create_texpr_node_from(field, type, precision, scale);
222
121
        }
223
420k
        return create_texpr_node_from(data, type, precision, scale);
224
420k
    }
225
226
    std::shared_ptr<HybridSetBase> _filter;
227
    // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the
228
    // child slot keeps the original logical type. Such values must not be materialized as child-type
229
    // literals for zonemap pruning or slot-IN rewrite.
230
    bool _hybrid_set_values_match_child_type = true;
231
    std::string _expr_name;
232
    bool _zonemap_materialized = false;
233
    std::vector<Field> _seg_filter_values;
234
    Field _seg_filter_min;
235
    Field _seg_filter_max;
236
};
237
238
} // namespace doris