Coverage Report

Created: 2026-03-14 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/accept_null_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstdint>
21
#include <memory>
22
23
#include "common/factory_creator.h"
24
#include "core/column/column_dictionary.h"
25
#include "storage/index/bloom_filter/bloom_filter.h"
26
#include "storage/index/inverted/inverted_index_cache.h"
27
#include "storage/index/inverted/inverted_index_reader.h"
28
#include "storage/predicate/column_predicate.h"
29
30
namespace doris {
31
32
/**
33
 * A wrapper predicate that delegate to nested predicate
34
 *  but pass (set/return true) for NULL value rows.
35
 *
36
 * At parent, it's used for topn runtime predicate.
37
 * Eg: original input indexs is '1,2,3,7,8,9' and value of index9 is null, we get nested predicate output index is '1,2,3', but we finally output '1,2,3,9'
38
*/
39
class AcceptNullPredicate : public ColumnPredicate {
40
    ENABLE_FACTORY_CREATOR(AcceptNullPredicate);
41
42
public:
43
    AcceptNullPredicate(const std::shared_ptr<ColumnPredicate>& nested)
44
7
            : ColumnPredicate(nested->column_id(), nested->col_name(), nested->primitive_type(),
45
7
                              nested->opposite()),
46
7
              _nested {nested} {}
47
    AcceptNullPredicate(const AcceptNullPredicate& other, uint32_t col_id)
48
1
            : ColumnPredicate(other, col_id),
49
1
              _nested(assert_cast<const AcceptNullPredicate&>(other)._nested
50
1
                              ? assert_cast<const AcceptNullPredicate&>(other)._nested->clone(
51
1
                                        col_id)
52
1
                              : nullptr) {}
53
    AcceptNullPredicate(const AcceptNullPredicate& other) = delete;
54
8
    ~AcceptNullPredicate() override = default;
55
1
    std::shared_ptr<ColumnPredicate> clone(uint32_t col_id) const override {
56
1
        return AcceptNullPredicate::create_shared(*this, col_id);
57
1
    }
58
1
    std::string debug_string() const override {
59
1
        auto n = _nested;
60
1
        fmt::memory_buffer debug_string_buffer;
61
1
        fmt::format_to(debug_string_buffer, "AcceptNullPredicate({}, nested={})",
62
1
                       ColumnPredicate::debug_string(), n ? n->debug_string() : "null");
63
1
        return fmt::to_string(debug_string_buffer);
64
1
    }
65
66
2
    PredicateType type() const override { return _nested->type(); }
67
68
    Status evaluate(const IndexFieldNameAndTypePair& name_with_type, IndexIterator* iterator,
69
4
                    uint32_t num_rows, roaring::Roaring* bitmap) const override {
70
4
        roaring::Roaring null_rows_in_bitmap;
71
4
        if (iterator != nullptr) {
72
3
            bool has_null = DORIS_TRY(iterator->has_null());
73
3
            if (has_null) {
74
2
                InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
75
2
                RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
76
2
                auto null_bitmap = null_bitmap_cache_handle.get_bitmap();
77
2
                if (null_bitmap) {
78
2
                    null_rows_in_bitmap = *bitmap & *null_bitmap;
79
2
                }
80
2
            }
81
3
        }
82
4
        RETURN_IF_ERROR(_nested->evaluate(name_with_type, iterator, num_rows, bitmap));
83
4
        *bitmap |= null_rows_in_bitmap;
84
4
        return Status::OK();
85
4
    }
86
87
    void evaluate_and(const IColumn& column, const uint16_t* sel, uint16_t size,
88
0
                      bool* flags) const override {
89
0
        if (column.has_null()) {
90
0
            std::vector<uint8_t> original_flags(size);
91
0
            memcpy(original_flags.data(), flags, size);
92
93
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
94
0
            _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, flags);
95
0
            const auto& nullmap = nullable_col.get_null_map_data();
96
0
            for (uint16_t i = 0; i < size; ++i) {
97
0
                flags[i] |= (original_flags[i] && nullmap[sel[i]]);
98
0
            }
99
0
        } else {
100
0
            _nested->evaluate_and(column, sel, size, flags);
101
0
        }
102
0
    }
103
104
    void evaluate_or(const IColumn& column, const uint16_t* sel, uint16_t size,
105
0
                     bool* flags) const override {
106
0
        DCHECK(false) << "should not reach here";
107
0
    }
108
109
0
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override {
110
        // there is null in range, accept it
111
0
        if (zone_map.has_null) {
112
0
            return true;
113
0
        }
114
0
        return _nested->evaluate_and(zone_map);
115
0
    }
116
117
0
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
118
0
        return _nested->evaluate_and(statistic) || statistic->has_null;
119
0
    }
120
121
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
122
0
                      RowRanges* row_ranges) const override {
123
0
        _nested->evaluate_and(statistic, row_ranges);
124
0
        ParquetPredicate::PageIndexStat* stat = nullptr;
125
0
        if (!(statistic->get_stat_func)(&stat, column_id())) {
126
0
            return true;
127
0
        }
128
129
0
        for (int page_id = 0; page_id < stat->num_of_pages; page_id++) {
130
0
            if (stat->has_null[page_id]) {
131
0
                row_ranges->add(stat->ranges[page_id]);
132
0
            }
133
0
        }
134
0
        return row_ranges->count() > 0;
135
0
    }
136
137
0
    bool evaluate_del(const segment_v2::ZoneMap& zone_map) const override {
138
0
        return _nested->evaluate_del(zone_map);
139
0
    }
140
141
0
    bool evaluate_and(const BloomFilter* bf) const override { return _nested->evaluate_and(bf); }
142
143
0
    bool can_do_bloom_filter(bool ngram) const override {
144
0
        return _nested->can_do_bloom_filter(ngram);
145
0
    }
146
147
0
    void evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const override {
148
0
        if (column.has_null()) {
149
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
150
0
            _nested->evaluate_vec(nullable_col.get_nested_column(), size, flags);
151
0
            for (uint16_t i = 0; i < size; ++i) {
152
0
                if (!flags[i] && nullable_col.is_null_at(i)) {
153
                    // set true for NULL rows
154
0
                    flags[i] = true;
155
0
                }
156
0
            }
157
0
        } else {
158
0
            _nested->evaluate_vec(column, size, flags);
159
0
        }
160
0
    }
161
162
0
    void evaluate_and_vec(const IColumn& column, uint16_t size, bool* flags) const override {
163
0
        if (column.has_null()) {
164
            // copy original flags
165
0
            std::vector<uint8_t> original_flags(size);
166
0
            memcpy(original_flags.data(), flags, size);
167
168
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
169
            // call evaluate_and_vec and restore true for NULL rows
170
0
            _nested->evaluate_and_vec(nullable_col.get_nested_column(), size, flags);
171
0
            for (uint16_t i = 0; i < size; ++i) {
172
0
                if (original_flags[i] && !flags[i] && nullable_col.is_null_at(i)) {
173
0
                    flags[i] = true;
174
0
                }
175
0
            }
176
0
        } else {
177
0
            _nested->evaluate_and_vec(column, size, flags);
178
0
        }
179
0
    }
180
181
0
    std::string get_search_str() const override { return _nested->get_search_str(); }
182
183
private:
184
0
    uint16_t _evaluate_inner(const IColumn& column, uint16_t* sel, uint16_t size) const override {
185
0
        if (column.has_null()) {
186
0
            if (size == 0) {
187
0
                return 0;
188
0
            }
189
            // create selected_flags
190
0
            uint16_t max_idx = sel[size - 1];
191
0
            std::vector<uint16_t> old_sel(size);
192
0
            memcpy(old_sel.data(), sel, sizeof(uint16_t) * size);
193
194
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
195
            // call nested predicate evaluate
196
0
            uint16_t new_size = _nested->evaluate(nullable_col.get_nested_column(), sel, size);
197
198
            // process NULL values
199
0
            if (new_size < size) {
200
0
                std::vector<uint8_t> selected(max_idx + 1, 0);
201
0
                const auto* nullmap = nullable_col.get_null_map_data().data();
202
                // add rows selected by _nested->evaluate
203
0
                for (uint16_t i = 0; i < new_size; ++i) {
204
0
                    uint16_t row_idx = sel[i];
205
0
                    selected[row_idx] = true;
206
0
                }
207
                // reset null from original data
208
0
                for (uint16_t i = 0; i < size; ++i) {
209
0
                    uint16_t row_idx = old_sel[i];
210
0
                    selected[row_idx] |= nullmap[row_idx];
211
0
                }
212
213
                // recaculate new_size and sel array
214
0
                new_size = 0;
215
0
                for (uint16_t row_idx = 0; row_idx < max_idx + 1; ++row_idx) {
216
0
                    if (selected[row_idx]) {
217
0
                        sel[new_size++] = row_idx;
218
0
                    }
219
0
                }
220
0
            }
221
0
            return new_size;
222
0
        }
223
0
        return _nested->evaluate(column, sel, size);
224
0
    }
225
226
    std::shared_ptr<ColumnPredicate> _nested;
227
};
228
229
} //namespace doris