Coverage Report

Created: 2026-03-14 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/like_column_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
#pragma once
18
19
#include <glog/logging.h>
20
#include <stdint.h>
21
22
#include <boost/iterator/iterator_facade.hpp>
23
#include <functional>
24
#include <memory>
25
#include <ostream>
26
#include <string>
27
#include <utility>
28
29
#include "common/status.h"
30
#include "core/column/column.h"
31
#include "core/column/column_dictionary.h"
32
#include "core/column/column_nullable.h"
33
#include "core/string_ref.h"
34
#include "core/types.h"
35
#include "exprs/function/like.h"
36
#include "storage/index/bloom_filter/bloom_filter.h"
37
#include "storage/predicate/column_predicate.h"
38
39
namespace roaring {
40
class Roaring;
41
} // namespace roaring
42
43
namespace doris {
44
class FunctionContext;
45
46
template <PrimitiveType T>
47
class LikeColumnPredicate final : public ColumnPredicate {
48
public:
49
    ENABLE_FACTORY_CREATOR(LikeColumnPredicate);
50
    LikeColumnPredicate(bool opposite, uint32_t column_id, std::string col_name,
51
                        doris::FunctionContext* fn_ctx, doris::StringRef val);
52
0
    ~LikeColumnPredicate() override = default;
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EED2Ev
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EED2Ev
53
    LikeColumnPredicate(const LikeColumnPredicate<T>& other, uint32_t col_id)
54
0
            : ColumnPredicate(other, col_id) {
55
0
        _origin = other._origin;
56
0
        pattern = other.pattern;
57
0
        _state = other._state;
58
0
        _opposite = other._opposite;
59
0
    }
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EEC2ERKS2_j
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EEC2ERKS2_j
60
    LikeColumnPredicate(const LikeColumnPredicate<T>& other) = delete;
61
0
    std::shared_ptr<ColumnPredicate> clone(uint32_t col_id) const override {
62
0
        return LikeColumnPredicate<T>::create_shared(*this, col_id);
63
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE5cloneEj
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE5cloneEj
64
0
    std::string debug_string() const override {
65
0
        fmt::memory_buffer debug_string_buffer;
66
0
        fmt::format_to(debug_string_buffer, "LikeColumnPredicate({}, pattern={}, origin={})",
67
0
                       ColumnPredicate::debug_string(), pattern, _origin);
68
0
        return fmt::to_string(debug_string_buffer);
69
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE12debug_stringB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE12debug_stringB5cxx11Ev
70
71
0
    PredicateType type() const override { return PredicateType::EQ; }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE4typeEv
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE4typeEv
72
    void evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const override;
73
74
    void evaluate_and_vec(const IColumn& column, uint16_t size, bool* flags) const override;
75
76
0
    std::string get_search_str() const override {
77
0
        return std::string(reinterpret_cast<const char*>(pattern.data), pattern.size);
78
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE14get_search_strB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE14get_search_strB5cxx11Ev
79
0
    bool is_opposite() const { return _opposite; }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE11is_oppositeEv
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE11is_oppositeEv
80
81
0
    void set_page_ng_bf(std::unique_ptr<segment_v2::BloomFilter> src) override {
82
0
        _page_ng_bf = std::move(src);
83
0
    }
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE14set_page_ng_bfESt10unique_ptrINS_10segment_v211BloomFilterESt14default_deleteIS5_EE
Unexecuted instantiation: _ZN5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE14set_page_ng_bfESt10unique_ptrINS_10segment_v211BloomFilterESt14default_deleteIS5_EE
84
0
    bool evaluate_and(const BloomFilter* bf) const override {
85
        // like predicate can not use normal bf, just return true to accept
86
0
        if (!bf->is_ngram_bf()) return true;
87
0
        if (_page_ng_bf) {
88
0
            return bf->contains(*_page_ng_bf);
89
0
        }
90
0
        return true;
91
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE12evaluate_andEPKNS_10segment_v211BloomFilterE
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE12evaluate_andEPKNS_10segment_v211BloomFilterE
92
0
    bool can_do_bloom_filter(bool ngram) const override { return ngram; }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE19can_do_bloom_filterEb
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE19can_do_bloom_filterEb
93
94
private:
95
    uint16_t _evaluate_inner(const IColumn& column, uint16_t* sel, uint16_t size) const override;
96
97
    template <bool is_and>
98
0
    void _evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const {
99
0
        if (column.is_nullable()) {
100
0
            auto* nullable_col = check_and_get_column<ColumnNullable>(column);
101
0
            auto& null_map_data = nullable_col->get_null_map_column().get_data();
102
0
            auto& nested_col = nullable_col->get_nested_column();
103
0
            if (nested_col.is_column_dictionary()) {
104
0
                auto* nested_col_ptr = check_and_get_column<ColumnDictI32>(nested_col);
105
0
                const auto& dict_res = _find_code_from_dictionary_column(*nested_col_ptr);
106
0
                auto& data_array = nested_col_ptr->get_data();
107
0
                for (uint16_t i = 0; i < size; i++) {
108
0
                    if (null_map_data[i]) {
109
0
                        if constexpr (is_and) {
110
0
                            flags[i] &= _opposite;
111
0
                        } else {
112
0
                            flags[i] = _opposite;
113
0
                        }
114
0
                        continue;
115
0
                    }
116
117
0
                    unsigned char flag = dict_res[data_array[i]];
118
0
                    if constexpr (is_and) {
119
0
                        flags[i] &= _opposite ^ flag;
120
0
                    } else {
121
0
                        flags[i] = _opposite ^ flag;
122
0
                    }
123
0
                }
124
0
            } else {
125
0
                throw Exception(Status::FatalError(
126
0
                        "vectorized (not) like predicates should be dict column"));
127
0
            }
128
0
        } else {
129
0
            if (column.is_column_dictionary()) {
130
0
                auto* nested_col_ptr = check_and_get_column<ColumnDictI32>(column);
131
0
                auto& data_array = nested_col_ptr->get_data();
132
0
                const auto& dict_res = _find_code_from_dictionary_column(*nested_col_ptr);
133
0
                for (uint16_t i = 0; i < size; i++) {
134
0
                    unsigned char flag = dict_res[data_array[i]];
135
0
                    if constexpr (is_and) {
136
0
                        flags[i] &= _opposite ^ flag;
137
0
                    } else {
138
0
                        flags[i] = _opposite ^ flag;
139
0
                    }
140
0
                }
141
0
            } else {
142
0
                throw Exception(Status::FatalError(
143
0
                        "vectorized (not) like predicates should be dict column"));
144
0
            }
145
0
        }
146
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE13_evaluate_vecILb0EEEvRKNS_7IColumnEtPb
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE13_evaluate_vecILb1EEEvRKNS_7IColumnEtPb
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE13_evaluate_vecILb0EEEvRKNS_7IColumnEtPb
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE13_evaluate_vecILb1EEEvRKNS_7IColumnEtPb
147
    std::vector<bool> __attribute__((flatten))
148
0
    _find_code_from_dictionary_column(const ColumnDictI32& column) const {
149
0
        std::vector<bool> res;
150
0
        if (_segment_id_to_cached_res_flags.if_contains(
151
0
                    column.get_rowset_segment_id(),
152
0
                    [&res](const auto& pair) { res = pair.second; })) {
Unexecuted instantiation: _ZZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE33_find_code_from_dictionary_columnERKNS_13ColumnDictI32EENKUlRKT_E_clISt4pairIKSB_INS_8RowsetIdEjESt6vectorIbSaIbEEEEEDaS8_
Unexecuted instantiation: _ZZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE33_find_code_from_dictionary_columnERKNS_13ColumnDictI32EENKUlRKT_E_clISt4pairIKSB_INS_8RowsetIdEjESt6vectorIbSaIbEEEEEDaS8_
153
0
            return res;
154
0
        }
155
156
0
        std::vector<bool> tmp_res(column.dict_size(), false);
157
0
        for (int i = 0; i < column.dict_size(); i++) {
158
0
            StringRef cell_value = column.get_shrink_value(i);
159
0
            unsigned char flag = 0;
160
0
            THROW_IF_ERROR((_state->scalar_function)(
161
0
                    &_like_state, StringRef(cell_value.data, cell_value.size), pattern, &flag));
162
0
            tmp_res[i] = flag;
163
0
        }
164
        // Sometimes the dict is not initialized when run comparison predicate here, for example,
165
        // the full page is null, then the reader will skip read, so that the dictionary is not
166
        // inited. The cached code is wrong during this case, because the following page maybe not
167
        // null, and the dict should have items in the future.
168
        //
169
        // Cached code may have problems, so that add a config here, if not opened, then
170
        // we will return the code and not cache it.
171
0
        if (!column.is_dict_empty() && config::enable_low_cardinality_cache_code) {
172
0
            _segment_id_to_cached_res_flags.emplace(
173
0
                    std::pair {column.get_rowset_segment_id(), tmp_res});
174
0
        }
175
176
0
        return tmp_res;
177
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE15EE33_find_code_from_dictionary_columnERKNS_13ColumnDictI32E
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicateILNS_13PrimitiveTypeE23EE33_find_code_from_dictionary_columnERKNS_13ColumnDictI32E
178
179
    mutable phmap::parallel_flat_hash_map<
180
            std::pair<RowsetId, uint32_t>, std::vector<bool>,
181
            phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
182
            phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
183
            std::allocator<std::pair<const std::pair<RowsetId, uint32_t>, int32_t>>, 4,
184
            std::shared_mutex>
185
            _segment_id_to_cached_res_flags;
186
187
    std::string _origin;
188
    // lifetime controlled by scan node
189
    using StateType = LikeState;
190
    StringRef pattern;
191
192
    StateType* _state = nullptr;
193
194
    // A separate scratch region is required for every concurrent caller of the
195
    // Hyperscan API. So here _like_state is separate for each instance of
196
    // LikeColumnPredicate.
197
    LikeSearchState _like_state;
198
    std::shared_ptr<segment_v2::BloomFilter> _page_ng_bf; // for ngram-bf index
199
};
200
201
} // namespace doris