Coverage Report

Created: 2026-06-17 02:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/like_column_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
#pragma once
18
19
#include <glog/logging.h>
20
#include <stdint.h>
21
22
#include <boost/iterator/iterator_facade.hpp>
23
#include <functional>
24
#include <memory>
25
#include <ostream>
26
#include <string>
27
#include <utility>
28
29
#include "common/status.h"
30
#include "core/column/column.h"
31
#include "core/column/column_dictionary.h"
32
#include "core/column/column_nullable.h"
33
#include "core/string_ref.h"
34
#include "core/types.h"
35
#include "exprs/function/like.h"
36
#include "storage/index/bloom_filter/bloom_filter.h"
37
#include "storage/predicate/column_predicate.h"
38
39
namespace roaring {
40
class Roaring;
41
} // namespace roaring
42
43
namespace doris {
44
class FunctionContext;
45
46
class LikeColumnPredicate final : public ColumnPredicate {
47
public:
48
    ENABLE_FACTORY_CREATOR(LikeColumnPredicate);
49
    LikeColumnPredicate(bool opposite, uint32_t column_id, std::string col_name,
50
                        doris::FunctionContext* fn_ctx, doris::StringRef val);
51
0
    ~LikeColumnPredicate() override = default;
52
    LikeColumnPredicate(const LikeColumnPredicate& other, uint32_t col_id)
53
0
            : ColumnPredicate(other, col_id) {
54
0
        _origin = other._origin;
55
0
        pattern = other.pattern;
56
0
        _state = other._state;
57
0
        _opposite = other._opposite;
58
0
    }
59
    LikeColumnPredicate(const LikeColumnPredicate& other) = delete;
60
0
    std::shared_ptr<ColumnPredicate> clone(uint32_t col_id) const override {
61
0
        return LikeColumnPredicate::create_shared(*this, col_id);
62
0
    }
63
0
    std::string debug_string() const override {
64
0
        fmt::memory_buffer debug_string_buffer;
65
0
        fmt::format_to(debug_string_buffer, "LikeColumnPredicate({}, pattern={}, origin={})",
66
0
                       ColumnPredicate::debug_string(), pattern, _origin);
67
0
        return fmt::to_string(debug_string_buffer);
68
0
    }
69
70
0
    PredicateType type() const override { return PredicateType::EQ; }
71
    void evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const override;
72
73
    void evaluate_and_vec(const IColumn& column, uint16_t size, bool* flags) const override;
74
75
0
    std::string get_search_str() const override {
76
0
        return std::string(reinterpret_cast<const char*>(pattern.data), pattern.size);
77
0
    }
78
0
    bool is_opposite() const { return _opposite; }
79
80
0
    void set_page_ng_bf(std::unique_ptr<segment_v2::BloomFilter> src) override {
81
0
        _page_ng_bf = std::move(src);
82
0
    }
83
0
    bool evaluate_and(const BloomFilter* bf) const override {
84
        // like predicate can not use normal bf, just return true to accept
85
0
        if (!bf->is_ngram_bf()) return true;
86
0
        if (_page_ng_bf) {
87
0
            return bf->contains(*_page_ng_bf);
88
0
        }
89
0
        return true;
90
0
    }
91
0
    bool can_do_bloom_filter(bool ngram) const override { return ngram; }
92
93
private:
94
    uint16_t _evaluate_inner(const IColumn& column, uint16_t* sel, uint16_t size) const override;
95
96
    template <bool is_and>
97
0
    void _evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const {
98
0
        if (is_column_nullable(column)) {
99
0
            auto* nullable_col = assert_cast<const ColumnNullable*>(&column);
100
0
            auto& null_map_data = nullable_col->get_null_map_column().get_data();
101
0
            auto& nested_col = nullable_col->get_nested_column();
102
0
            if (nested_col.is_column_dictionary()) {
103
0
                auto* nested_col_ptr = assert_cast<const ColumnDictI32*>(&nested_col);
104
0
                const auto& dict_res = _find_code_from_dictionary_column(*nested_col_ptr);
105
0
                auto& data_array = nested_col_ptr->get_data();
106
0
                for (uint16_t i = 0; i < size; i++) {
107
0
                    if (null_map_data[i]) {
108
0
                        if constexpr (is_and) {
109
0
                            flags[i] &= _opposite;
110
0
                        } else {
111
0
                            flags[i] = _opposite;
112
0
                        }
113
0
                        continue;
114
0
                    }
115
116
0
                    unsigned char flag = dict_res[data_array[i]];
117
0
                    if constexpr (is_and) {
118
0
                        flags[i] &= _opposite ^ flag;
119
0
                    } else {
120
0
                        flags[i] = _opposite ^ flag;
121
0
                    }
122
0
                }
123
0
            } else {
124
0
                throw Exception(Status::FatalError(
125
0
                        "vectorized (not) like predicates should be dict column"));
126
0
            }
127
0
        } else {
128
0
            if (column.is_column_dictionary()) {
129
0
                auto* nested_col_ptr = assert_cast<const ColumnDictI32*>(&column);
130
0
                auto& data_array = nested_col_ptr->get_data();
131
0
                const auto& dict_res = _find_code_from_dictionary_column(*nested_col_ptr);
132
0
                for (uint16_t i = 0; i < size; i++) {
133
0
                    unsigned char flag = dict_res[data_array[i]];
134
0
                    if constexpr (is_and) {
135
0
                        flags[i] &= _opposite ^ flag;
136
0
                    } else {
137
0
                        flags[i] = _opposite ^ flag;
138
0
                    }
139
0
                }
140
0
            } else {
141
0
                throw Exception(Status::FatalError(
142
0
                        "vectorized (not) like predicates should be dict column"));
143
0
            }
144
0
        }
145
0
    }
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicate13_evaluate_vecILb0EEEvRKNS_7IColumnEtPb
Unexecuted instantiation: _ZNK5doris19LikeColumnPredicate13_evaluate_vecILb1EEEvRKNS_7IColumnEtPb
146
    std::vector<bool> __attribute__((flatten))
147
0
    _find_code_from_dictionary_column(const ColumnDictI32& column) const {
148
0
        std::vector<bool> res;
149
0
        if (_segment_id_to_cached_res_flags.if_contains(
150
0
                    column.get_rowset_segment_id(),
151
0
                    [&res](const auto& pair) { res = pair.second; })) {
152
0
            return res;
153
0
        }
154
155
0
        std::vector<bool> tmp_res(column.dict_size(), false);
156
0
        for (int i = 0; i < column.dict_size(); i++) {
157
0
            StringRef cell_value = column.get_value(i);
158
0
            unsigned char flag = 0;
159
0
            THROW_IF_ERROR((_state->scalar_function)(
160
0
                    &_like_state, StringRef(cell_value.data, cell_value.size), pattern, &flag));
161
0
            tmp_res[i] = flag;
162
0
        }
163
        // Sometimes the dict is not initialized when run comparison predicate here, for example,
164
        // the full page is null, then the reader will skip read, so that the dictionary is not
165
        // inited. The cached code is wrong during this case, because the following page maybe not
166
        // null, and the dict should have items in the future.
167
        //
168
        // Cached code may have problems, so that add a config here, if not opened, then
169
        // we will return the code and not cache it.
170
0
        if (!column.is_dict_empty() && config::enable_low_cardinality_cache_code) {
171
0
            _segment_id_to_cached_res_flags.emplace(
172
0
                    std::pair {column.get_rowset_segment_id(), tmp_res});
173
0
        }
174
175
0
        return tmp_res;
176
0
    }
177
178
    mutable phmap::parallel_flat_hash_map<
179
            std::pair<RowsetId, uint32_t>, std::vector<bool>,
180
            phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
181
            phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
182
            std::allocator<std::pair<const std::pair<RowsetId, uint32_t>, int32_t>>, 4,
183
            std::shared_mutex>
184
            _segment_id_to_cached_res_flags;
185
186
    std::string _origin;
187
    // lifetime controlled by scan node
188
    using StateType = LikeState;
189
    StringRef pattern;
190
191
    StateType* _state = nullptr;
192
193
    // A separate scratch region is required for every concurrent caller of the
194
    // Hyperscan API. So here _like_state is separate for each instance of
195
    // LikeColumnPredicate.
196
    LikeSearchState _like_state;
197
    std::shared_ptr<segment_v2::BloomFilter> _page_ng_bf; // for ngram-bf index
198
};
199
200
} // namespace doris