Coverage Report

Created: 2026-06-09 15:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/block_column_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <algorithm>
25
#include <ostream>
26
#include <set>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "common/factory_creator.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "format/parquet/parquet_predicate.h"
35
#include "storage/olap_common.h"
36
#include "storage/predicate/column_predicate.h"
37
38
namespace roaring {
39
class Roaring;
40
} // namespace roaring
41
42
namespace doris {
43
namespace segment_v2 {
44
class BloomFilter;
45
class InvertedIndexIterator;
46
} // namespace segment_v2
47
48
// Block Column Predicate support do column predicate and support OR and AND predicate
49
// Block Column Predicate will replace column predicate as a unified external vectorized interface
50
// in the future
51
// TODO: support do predicate on Bitmap and ZoneMap, So we can use index of column to do predicate on
52
// page and segment
53
class BlockColumnPredicate {
54
public:
55
6.98k
    BlockColumnPredicate() = default;
56
6.98k
    virtual ~BlockColumnPredicate() = default;
57
58
    virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 0;
59
60
    virtual void get_all_column_predicate(
61
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const = 0;
62
63
0
    virtual ScanFilterHandle scan_filter_handle() const { return {}; }
64
0
    virtual bool has_scan_filter() const { return static_cast<bool>(scan_filter_handle()); }
65
    virtual void record_scan_filter(ScanFilterStage stage, int64_t input_rows,
66
0
                                    int64_t output_rows) const {
67
0
        if (auto handle = scan_filter_handle()) {
68
0
            handle.stats->record(stage, input_rows, output_rows);
69
0
        }
70
0
    }
71
72
0
    virtual uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const {
73
0
        return selected_size;
74
0
    }
75
    virtual void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
76
0
                              bool* flags) const {}
77
    virtual void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
78
0
                             bool* flags) const {}
79
80
0
    virtual void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const {}
81
82
0
    virtual bool support_zonemap() const { return true; }
83
84
0
    virtual bool evaluate_and(const segment_v2::ZoneMap& zone_map) const {
85
0
        throw Exception(Status::FatalError("should not reach here"));
86
0
    }
87
88
0
    virtual bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const {
89
0
        throw Exception(Status::FatalError("should not reach here"));
90
0
    }
91
92
    /**
93
     * For Parquet page indexes, since the number of rows filtered by each column's page index is not the same,
94
     * a `RowRanges` is needed to represent the range of rows to be read after filtering. If no rows need to
95
     * be read, it returns false; otherwise, it returns true. Because the page index needs to be
96
     * parsed, `CachedPageIndexStat` is used to avoid repeatedly parsing the page index information
97
     * of the same column.
98
     */
99
    virtual bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
100
0
                              RowRanges* row_ranges) const {
101
0
        throw Exception(Status::FatalError("should not reach here"));
102
0
    }
103
104
0
    virtual bool evaluate_and(const segment_v2::BloomFilter* bf) const {
105
0
        throw Exception(Status::FatalError("should not reach here"));
106
0
    }
107
108
0
    virtual bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const {
109
0
        throw Exception(Status::FatalError("should not reach here"));
110
0
    }
111
112
0
    virtual bool can_do_bloom_filter(bool ngram) const { return false; }
113
114
    //evaluate predicate on inverted
115
    virtual Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator,
116
0
                            uint32_t num_rows, roaring::Roaring* bitmap) const {
117
0
        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED>(
118
0
                "Not Implemented evaluate with inverted index, please check the predicate");
119
0
    }
120
};
121
122
class SingleColumnBlockPredicate : public BlockColumnPredicate {
123
    ENABLE_FACTORY_CREATOR(SingleColumnBlockPredicate);
124
125
public:
126
    explicit SingleColumnBlockPredicate(const std::shared_ptr<const ColumnPredicate>& pre)
127
1.04k
            : _predicate(pre) {}
128
129
5.56k
    void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
130
5.56k
        column_id_set.insert(_predicate->column_id());
131
5.56k
    }
132
133
    void get_all_column_predicate(
134
467
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const override {
135
467
        predicate_set.insert(_predicate);
136
467
    }
137
138
0
    ScanFilterHandle scan_filter_handle() const override {
139
0
        return _predicate->scan_filter_handle();
140
0
    }
141
142
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
143
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
144
                      bool* flags) const override;
145
0
    bool support_zonemap() const override { return _predicate->support_zonemap(); }
146
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override;
147
64
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
148
64
        return _predicate->evaluate_and(statistic);
149
64
    }
150
151
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
152
0
                      RowRanges* row_ranges) const override {
153
0
        return _predicate->evaluate_and(statistic, row_ranges);
154
0
    }
155
    bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
156
    bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
157
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
158
                     bool* flags) const override;
159
160
    void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const override;
161
162
0
    bool can_do_bloom_filter(bool ngram) const override {
163
0
        return _predicate->can_do_bloom_filter(ngram);
164
0
    }
165
166
private:
167
    const std::shared_ptr<const ColumnPredicate> _predicate = nullptr;
168
};
169
170
class MutilColumnBlockPredicate : public BlockColumnPredicate {
171
public:
172
5.93k
    MutilColumnBlockPredicate() = default;
173
174
5.93k
    ~MutilColumnBlockPredicate() override = default;
175
176
0
    bool support_zonemap() const override {
177
0
        for (const auto& child_block_predicate : _block_column_predicate_vec) {
178
0
            if (!child_block_predicate->support_zonemap()) {
179
0
                return false;
180
0
            }
181
0
        }
182
183
0
        return true;
184
0
    }
185
186
439
    void add_column_predicate(std::unique_ptr<BlockColumnPredicate> column_predicate) {
187
439
        _block_column_predicate_vec.push_back(std::move(column_predicate));
188
439
    }
189
190
5.67k
    size_t num_of_column_predicate() const { return _block_column_predicate_vec.size(); }
191
192
0
    bool has_scan_filter() const override {
193
0
        for (const auto& child_block_predicate : _block_column_predicate_vec) {
194
0
            if (child_block_predicate->has_scan_filter()) {
195
0
                return true;
196
0
            }
197
0
        }
198
0
        return false;
199
0
    }
200
201
    void record_scan_filter(ScanFilterStage stage, int64_t input_rows,
202
0
                            int64_t output_rows) const override {
203
0
        for (const auto& child_block_predicate : _block_column_predicate_vec) {
204
0
            child_block_predicate->record_scan_filter(stage, input_rows, output_rows);
205
0
        }
206
0
    }
207
208
30.6k
    void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
209
30.6k
        for (auto& child_block_predicate : _block_column_predicate_vec) {
210
5.56k
            child_block_predicate->get_all_column_ids(column_id_set);
211
5.56k
        }
212
30.6k
    }
213
214
    void get_all_column_predicate(
215
2.82k
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const override {
216
2.82k
        for (auto& child_block_predicate : _block_column_predicate_vec) {
217
467
            child_block_predicate->get_all_column_predicate(predicate_set);
218
467
        }
219
2.82k
    }
220
221
protected:
222
    std::vector<std::unique_ptr<BlockColumnPredicate>> _block_column_predicate_vec;
223
};
224
225
class OrBlockColumnPredicate : public MutilColumnBlockPredicate {
226
    ENABLE_FACTORY_CREATOR(OrBlockColumnPredicate);
227
228
public:
229
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
230
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
231
                      bool* flags) const override;
232
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
233
                     bool* flags) const override;
234
3
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
235
3
        if (num_of_column_predicate() == 1) {
236
1
            return _block_column_predicate_vec[0]->evaluate_and(statistic);
237
2
        } else {
238
4
            for (int i = 0; i < num_of_column_predicate(); ++i) {
239
3
                if (_block_column_predicate_vec[i]->evaluate_and(statistic)) {
240
1
                    return true;
241
1
                }
242
3
            }
243
1
            return false;
244
2
        }
245
3
    }
246
247
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
248
                      RowRanges* row_ranges) const override;
249
250
    // note(wb) we didnt't implement evaluate_vec method here, because storage layer only support AND predicate now;
251
};
252
253
class AndBlockColumnPredicate : public MutilColumnBlockPredicate {
254
    ENABLE_FACTORY_CREATOR(AndBlockColumnPredicate);
255
256
public:
257
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
258
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
259
                      bool* flags) const override;
260
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
261
                     bool* flags) const override;
262
263
    void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const override;
264
265
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override;
266
267
    bool evaluate_and_with_scan_filter(const segment_v2::ZoneMap& zone_map, ScanFilterStage stage,
268
                                       int64_t input_rows) const;
269
270
    bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
271
272
    bool evaluate_and_with_scan_filter(const segment_v2::BloomFilter* bf, ScanFilterStage stage,
273
                                       int64_t input_rows) const;
274
275
    bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
276
277
    bool evaluate_and_with_scan_filter(const StringRef* dict_words, const size_t dict_num,
278
                                       ScanFilterStage stage, int64_t input_rows) const;
279
280
4
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
281
8
        for (auto& block_column_predicate : _block_column_predicate_vec) {
282
8
            if (!block_column_predicate->evaluate_and(statistic)) {
283
2
                return false;
284
2
            }
285
8
        }
286
2
        return true;
287
4
    }
288
289
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
290
                      RowRanges* row_ranges) const override;
291
292
0
    bool can_do_bloom_filter(bool ngram) const override {
293
0
        for (auto& pred : _block_column_predicate_vec) {
294
0
            if (!pred->can_do_bloom_filter(ngram)) {
295
0
                return false;
296
0
            }
297
0
        }
298
0
        return true;
299
0
    }
300
301
    Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator,
302
                    uint32_t num_rows, roaring::Roaring* bitmap) const override;
303
};
304
305
} //namespace doris