Coverage Report

Created: 2026-03-16 14:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/block_column_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <algorithm>
25
#include <ostream>
26
#include <set>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "common/factory_creator.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "format/parquet/parquet_predicate.h"
35
#include "storage/olap_common.h"
36
#include "storage/predicate/column_predicate.h"
37
38
namespace roaring {
39
class Roaring;
40
} // namespace roaring
41
42
namespace doris {
43
namespace segment_v2 {
44
class BloomFilter;
45
class InvertedIndexIterator;
46
} // namespace segment_v2
47
48
// Block Column Predicate support do column predicate and support OR and AND predicate
49
// Block Column Predicate will replace column predicate as a unified external vectorized interface
50
// in the future
51
// TODO: support do predicate on Bitmap and ZoneMap, So we can use index of column to do predicate on
52
// page and segment
53
class BlockColumnPredicate {
54
public:
55
9.71k
    BlockColumnPredicate() = default;
56
9.71k
    virtual ~BlockColumnPredicate() = default;
57
58
    virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 0;
59
60
    virtual void get_all_column_predicate(
61
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const = 0;
62
63
0
    virtual uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const {
64
0
        return selected_size;
65
0
    }
66
    virtual void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
67
0
                              bool* flags) const {}
68
    virtual void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
69
0
                             bool* flags) const {}
70
71
0
    virtual void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const {}
72
73
0
    virtual bool support_zonemap() const { return true; }
74
75
0
    virtual bool evaluate_and(const segment_v2::ZoneMap& zone_map) const {
76
0
        throw Exception(Status::FatalError("should not reach here"));
77
0
    }
78
79
0
    virtual bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const {
80
0
        throw Exception(Status::FatalError("should not reach here"));
81
0
    }
82
83
    /**
84
     * For Parquet page indexes, since the number of rows filtered by each column's page index is not the same,
85
     * a `RowRanges` is needed to represent the range of rows to be read after filtering. If no rows need to
86
     * be read, it returns false; otherwise, it returns true. Because the page index needs to be
87
     * parsed, `CachedPageIndexStat` is used to avoid repeatedly parsing the page index information
88
     * of the same column.
89
     */
90
    virtual bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
91
0
                              RowRanges* row_ranges) const {
92
0
        throw Exception(Status::FatalError("should not reach here"));
93
0
    }
94
95
0
    virtual bool evaluate_and(const segment_v2::BloomFilter* bf) const {
96
0
        throw Exception(Status::FatalError("should not reach here"));
97
0
    }
98
99
0
    virtual bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const {
100
0
        throw Exception(Status::FatalError("should not reach here"));
101
0
    }
102
103
0
    virtual bool can_do_bloom_filter(bool ngram) const { return false; }
104
105
    //evaluate predicate on inverted
106
    virtual Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator,
107
0
                            uint32_t num_rows, roaring::Roaring* bitmap) const {
108
0
        return Status::Error<ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED>(
109
0
                "Not Implemented evaluate with inverted index, please check the predicate");
110
0
    }
111
};
112
113
class SingleColumnBlockPredicate : public BlockColumnPredicate {
114
    ENABLE_FACTORY_CREATOR(SingleColumnBlockPredicate);
115
116
public:
117
    explicit SingleColumnBlockPredicate(const std::shared_ptr<const ColumnPredicate>& pre)
118
1.04k
            : _predicate(pre) {}
119
120
9.36k
    void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
121
9.36k
        column_id_set.insert(_predicate->column_id());
122
9.36k
    }
123
124
    void get_all_column_predicate(
125
467
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const override {
126
467
        predicate_set.insert(_predicate);
127
467
    }
128
129
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
130
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
131
                      bool* flags) const override;
132
0
    bool support_zonemap() const override { return _predicate->support_zonemap(); }
133
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override;
134
64
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
135
64
        return _predicate->evaluate_and(statistic);
136
64
    }
137
138
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
139
0
                      RowRanges* row_ranges) const override {
140
0
        return _predicate->evaluate_and(statistic, row_ranges);
141
0
    }
142
    bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
143
    bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
144
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
145
                     bool* flags) const override;
146
147
    void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const override;
148
149
0
    bool can_do_bloom_filter(bool ngram) const override {
150
0
        return _predicate->can_do_bloom_filter(ngram);
151
0
    }
152
153
private:
154
    const std::shared_ptr<const ColumnPredicate> _predicate = nullptr;
155
};
156
157
class MutilColumnBlockPredicate : public BlockColumnPredicate {
158
public:
159
8.67k
    MutilColumnBlockPredicate() = default;
160
161
8.67k
    ~MutilColumnBlockPredicate() override = default;
162
163
0
    bool support_zonemap() const override {
164
0
        for (const auto& child_block_predicate : _block_column_predicate_vec) {
165
0
            if (!child_block_predicate->support_zonemap()) {
166
0
                return false;
167
0
            }
168
0
        }
169
170
0
        return true;
171
0
    }
172
173
438
    void add_column_predicate(std::unique_ptr<BlockColumnPredicate> column_predicate) {
174
438
        _block_column_predicate_vec.push_back(std::move(column_predicate));
175
438
    }
176
177
11.2k
    size_t num_of_column_predicate() const { return _block_column_predicate_vec.size(); }
178
179
50.8k
    void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
180
50.8k
        for (auto& child_block_predicate : _block_column_predicate_vec) {
181
9.36k
            child_block_predicate->get_all_column_ids(column_id_set);
182
9.36k
        }
183
50.8k
    }
184
185
    void get_all_column_predicate(
186
5.63k
            std::set<std::shared_ptr<const ColumnPredicate>>& predicate_set) const override {
187
5.63k
        for (auto& child_block_predicate : _block_column_predicate_vec) {
188
467
            child_block_predicate->get_all_column_predicate(predicate_set);
189
467
        }
190
5.63k
    }
191
192
protected:
193
    std::vector<std::unique_ptr<BlockColumnPredicate>> _block_column_predicate_vec;
194
};
195
196
class OrBlockColumnPredicate : public MutilColumnBlockPredicate {
197
    ENABLE_FACTORY_CREATOR(OrBlockColumnPredicate);
198
199
public:
200
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
201
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
202
                      bool* flags) const override;
203
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
204
                     bool* flags) const override;
205
3
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
206
3
        if (num_of_column_predicate() == 1) {
207
1
            return _block_column_predicate_vec[0]->evaluate_and(statistic);
208
2
        } else {
209
4
            for (int i = 0; i < num_of_column_predicate(); ++i) {
210
3
                if (_block_column_predicate_vec[i]->evaluate_and(statistic)) {
211
1
                    return true;
212
1
                }
213
3
            }
214
1
            return false;
215
2
        }
216
3
    }
217
218
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
219
                      RowRanges* row_ranges) const override;
220
221
    // note(wb) we didnt't implement evaluate_vec method here, because storage layer only support AND predicate now;
222
};
223
224
class AndBlockColumnPredicate : public MutilColumnBlockPredicate {
225
    ENABLE_FACTORY_CREATOR(AndBlockColumnPredicate);
226
227
public:
228
    uint16_t evaluate(MutableColumns& block, uint16_t* sel, uint16_t selected_size) const override;
229
    void evaluate_and(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
230
                      bool* flags) const override;
231
    void evaluate_or(MutableColumns& block, uint16_t* sel, uint16_t selected_size,
232
                     bool* flags) const override;
233
234
    void evaluate_vec(MutableColumns& block, uint16_t size, bool* flags) const override;
235
236
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override;
237
238
    bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
239
240
    bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
241
242
4
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
243
8
        for (auto& block_column_predicate : _block_column_predicate_vec) {
244
8
            if (!block_column_predicate->evaluate_and(statistic)) {
245
2
                return false;
246
2
            }
247
8
        }
248
2
        return true;
249
4
    }
250
251
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
252
                      RowRanges* row_ranges) const override;
253
254
0
    bool can_do_bloom_filter(bool ngram) const override {
255
0
        for (auto& pred : _block_column_predicate_vec) {
256
0
            if (!pred->can_do_bloom_filter(ngram)) {
257
0
                return false;
258
0
            }
259
0
        }
260
0
        return true;
261
0
    }
262
263
    Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator,
264
                    uint32_t num_rows, roaring::Roaring* bitmap) const override;
265
};
266
267
} //namespace doris