Coverage Report

Created: 2026-07-02 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vdirect_in_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <utility>
21
#include <vector>
22
23
#include "common/logging.h"
24
#include "common/status.h"
25
#include "core/field.h"
26
#include "core/types.h"
27
#include "exprs/expr_zonemap_filter.h"
28
#include "exprs/hybrid_set.h"
29
#include "exprs/vexpr.h"
30
#include "exprs/vin_predicate.h"
31
#include "exprs/vliteral.h"
32
#include "exprs/vslot_ref.h"
33
34
namespace doris {
35
36
class VDirectInPredicate final : public VExpr {
37
    ENABLE_FACTORY_CREATOR(VDirectInPredicate);
38
39
public:
40
    // `hybrid_set_values_match_child_type` tells whether values in `filter` can be interpreted with
41
    // the child expression type. Parquet/ORC dictionary-filter rewrites evaluate the original
42
    // logical predicate against dictionary entries and then rewrite it to matched physical
43
    // dictionary codes, for example `col IN ('a', 'b')` becomes `dict_code IN (0, 1)`. In that
44
    // shape the HybridSet stores TYPE_INT dictionary codes while the child slot still has the
45
    // original logical type such as STRING. Callers must pass false to disable zonemap
46
    // materialization and slot-IN rewrite that would otherwise rebuild child-typed literals from
47
    // dictionary codes.
48
    VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter,
49
                       bool hybrid_set_values_match_child_type = true)
50
9.40k
            : VExpr(node),
51
9.40k
              _filter(filter),
52
9.40k
              _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type),
53
9.40k
              _expr_name("direct_in_predicate") {}
54
9.41k
    ~VDirectInPredicate() override = default;
55
56
#ifdef BE_TEST
57
    VDirectInPredicate() = default;
58
#endif
59
60
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
61
4.14k
                   VExprContext* context) override {
62
4.14k
        RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context));
63
4.14k
        RETURN_IF_ERROR(_materialize_for_zonemap_filter());
64
4.14k
        _prepare_finished = true;
65
4.14k
        return Status::OK();
66
4.14k
    }
67
68
    Status open(RuntimeState* state, VExprContext* context,
69
9.98k
                FunctionContext::FunctionStateScope scope) override {
70
9.98k
        DCHECK(_prepare_finished);
71
9.98k
        RETURN_IF_ERROR(VExpr::open(state, context, scope));
72
9.98k
        _open_finished = true;
73
9.98k
        return Status::OK();
74
9.98k
    }
75
76
    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
77
1.07k
                               size_t count, ColumnPtr& result_column) const override {
78
1.07k
        return _do_execute(context, block, nullptr, selector, count, result_column, nullptr);
79
1.07k
    }
80
81
    Status execute_runtime_filter(VExprContext* context, const Block* block,
82
                                  const uint8_t* __restrict filter, size_t count,
83
16.5k
                                  ColumnPtr& result_column, ColumnPtr* arg_column) const override {
84
16.5k
        return _do_execute(context, block, filter, nullptr, count, result_column, arg_column);
85
16.5k
    }
86
87
4.00k
    const std::string& expr_name() const override { return _expr_name; }
88
89
3.35k
    std::shared_ptr<HybridSetBase> get_set_func() const override { return _filter; }
90
91
5
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx) const override {
92
5
        return expr_zonemap::eval_in_zonemap(ctx, get_child(0), false, _seg_filter_values,
93
5
                                             _seg_filter_min, _seg_filter_max);
94
5
    }
95
96
819
    bool can_evaluate_zonemap_filter() const override {
97
819
        return _zonemap_materialized &&
98
819
               std::dynamic_pointer_cast<VSlotRef>(get_child(0)) != nullptr;
99
819
    }
100
101
6.27k
    Status clone_node(VExprSPtr* cloned_expr) const override {
102
6.27k
        DORIS_CHECK(cloned_expr != nullptr);
103
6.27k
        *cloned_expr = VDirectInPredicate::create_shared(clone_texpr_node(), _filter,
104
6.27k
                                                         _hybrid_set_values_match_child_type);
105
6.27k
        return Status::OK();
106
6.27k
    }
107
108
240
    bool get_slot_in_expr(VExprSPtr& new_root) const {
109
240
        if (!_hybrid_set_values_match_child_type) {
110
1
            return false;
111
1
        }
112
239
        if (!get_child(0)->is_slot_ref()) {
113
0
            return false;
114
0
        }
115
116
239
        auto* slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
117
239
        auto slot_data_type = remove_nullable(slot_ref->data_type());
118
239
        {
119
239
            TTypeDesc type_desc = create_type_desc(PrimitiveType::TYPE_BOOLEAN);
120
239
            TExprNode node;
121
239
            node.__set_type(type_desc);
122
239
            node.__set_node_type(TExprNodeType::IN_PRED);
123
239
            node.in_predicate.__set_is_not_in(false);
124
239
            node.__set_opcode(TExprOpcode::FILTER_IN);
125
            // VdirectInPredicate assume is_nullable = false.
126
239
            node.__set_is_nullable(false);
127
239
            new_root = VInPredicate::create_shared(node);
128
239
        }
129
239
        {
130
            // add slot
131
239
            new_root->add_child(children().at(0));
132
239
        }
133
239
        {
134
239
            auto iter = get_set_func()->begin();
135
1.42k
            while (iter->has_next()) {
136
1.18k
                DCHECK(iter->get_value() != nullptr);
137
1.18k
                const void* value = iter->get_value();
138
139
1.18k
                TExprNode node = expr_zonemap::create_texpr_node_from_hybrid_set_value(
140
1.18k
                        value, slot_data_type->get_primitive_type(),
141
1.18k
                        slot_data_type->get_precision(), slot_data_type->get_scale());
142
1.18k
                new_root->add_child(VLiteral::create_shared(node));
143
1.18k
                iter->next();
144
1.18k
            }
145
239
        }
146
239
        return true;
147
239
    }
148
149
2.48k
    uint64_t get_digest(uint64_t seed) const override {
150
2.48k
        seed = _children[0]->get_digest(seed);
151
2.48k
        if (seed) {
152
2.48k
            return _filter->get_digest(seed);
153
2.48k
        }
154
8
        return seed;
155
2.48k
    }
156
157
private:
158
    Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter,
159
                       const Selector* selector, size_t count, ColumnPtr& result_column,
160
17.6k
                       ColumnPtr* arg_column) const {
161
17.6k
        DCHECK(_open_finished || block == nullptr);
162
17.6k
        DCHECK(!(filter != nullptr && selector != nullptr))
163
0
                << "filter and selector can not be both set";
164
17.6k
        ColumnPtr argument_column;
165
17.6k
        RETURN_IF_ERROR(
166
17.6k
                _children[0]->execute_column(context, block, selector, count, argument_column));
167
17.6k
        argument_column = argument_column->convert_to_full_column_if_const();
168
169
17.6k
        if (arg_column != nullptr) {
170
16.5k
            *arg_column = argument_column;
171
16.5k
        }
172
173
17.6k
        size_t sz = argument_column->size();
174
17.6k
        auto res_data_column = ColumnUInt8::create(sz);
175
17.6k
        res_data_column->resize(sz);
176
177
17.6k
        if (const auto* nullable = check_and_get_column<ColumnNullable>(argument_column.get())) {
178
17.6k
            auto column_nested = nullable->get_nested_column_ptr();
179
17.6k
            const auto& null_map = nullable->get_null_map_data();
180
17.6k
            _filter->find_batch_nullable(*column_nested, sz, null_map, res_data_column->get_data(),
181
17.6k
                                         filter);
182
17.6k
        } else {
183
22
            _filter->find_batch(*argument_column, sz, res_data_column->get_data(), filter);
184
22
        }
185
186
17.6k
        DCHECK(!_data_type->is_nullable());
187
17.6k
        result_column = std::move(res_data_column);
188
17.6k
        return Status::OK();
189
17.6k
    }
190
191
4.14k
    Status _materialize_for_zonemap_filter() {
192
4.14k
        if (!_hybrid_set_values_match_child_type) {
193
137
            _zonemap_materialized = false;
194
137
            return Status::OK();
195
137
        }
196
4.00k
        DORIS_CHECK(_filter != nullptr);
197
4.00k
        auto& filter = *_filter;
198
4.00k
        const auto& data_type = remove_nullable(get_child(0)->data_type());
199
4.00k
        expr_zonemap::InZonemapMaterializedSet materialized;
200
4.00k
        RETURN_IF_ERROR(expr_zonemap::materialize_hybrid_set_for_zonemap_filter(filter, data_type,
201
4.00k
                                                                                &materialized));
202
4.00k
        _seg_filter_values = std::move(materialized.values);
203
4.00k
        _seg_filter_min = std::move(materialized.min_value);
204
4.00k
        _seg_filter_max = std::move(materialized.max_value);
205
4.00k
        _zonemap_materialized = true;
206
4.00k
        return Status::OK();
207
4.00k
    }
208
209
    std::shared_ptr<HybridSetBase> _filter;
210
    // Dictionary-filter rewrites may store physical dictionary codes in the HybridSet while the
211
    // child slot keeps the original logical type. Such values must not be materialized as child-type
212
    // literals for zonemap pruning or slot-IN rewrite.
213
    bool _hybrid_set_values_match_child_type = true;
214
    std::string _expr_name;
215
    bool _zonemap_materialized = false;
216
    std::vector<Field> _seg_filter_values;
217
    Field _seg_filter_min;
218
    Field _seg_filter_max;
219
};
220
221
} // namespace doris