Coverage Report

Created: 2026-05-09 19:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/predicate/accept_null_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstdint>
21
#include <memory>
22
23
#include "common/factory_creator.h"
24
#include "core/column/column_dictionary.h"
25
#include "storage/index/bloom_filter/bloom_filter.h"
26
#include "storage/index/inverted/inverted_index_cache.h"
27
#include "storage/index/inverted/inverted_index_reader.h"
28
#include "storage/predicate/column_predicate.h"
29
30
namespace doris {
31
32
/**
33
 * A wrapper predicate that delegate to nested predicate
34
 *  but pass (set/return true) for NULL value rows.
35
 *
36
 * At parent, it's used for topn runtime predicate.
37
 * Eg: original input indexs is '1,2,3,7,8,9' and value of index9 is null, we get nested predicate output index is '1,2,3', but we finally output '1,2,3,9'
38
*/
39
class AcceptNullPredicate : public ColumnPredicate {
40
    ENABLE_FACTORY_CREATOR(AcceptNullPredicate);
41
42
public:
43
    AcceptNullPredicate(const std::shared_ptr<ColumnPredicate>& nested)
44
7
            : ColumnPredicate(nested->column_id(), nested->col_name(), nested->primitive_type(),
45
7
                              nested->opposite()),
46
7
              _nested {nested} {}
47
    AcceptNullPredicate(const AcceptNullPredicate& other, uint32_t col_id)
48
1
            : ColumnPredicate(other, col_id),
49
1
              _nested(other._nested ? other._nested->clone(col_id) : nullptr) {}
50
    AcceptNullPredicate(const AcceptNullPredicate& other) = delete;
51
8
    ~AcceptNullPredicate() override = default;
52
1
    std::shared_ptr<ColumnPredicate> clone(uint32_t col_id) const override {
53
1
        return AcceptNullPredicate::create_shared(*this, col_id);
54
1
    }
55
1
    std::string debug_string() const override {
56
1
        auto n = _nested;
57
1
        fmt::memory_buffer debug_string_buffer;
58
1
        fmt::format_to(debug_string_buffer, "AcceptNullPredicate({}, nested={})",
59
1
                       ColumnPredicate::debug_string(), n ? n->debug_string() : "null");
60
1
        return fmt::to_string(debug_string_buffer);
61
1
    }
62
63
2
    PredicateType type() const override { return _nested->type(); }
64
65
    Status evaluate(const IndexFieldNameAndTypePair& name_with_type, IndexIterator* iterator,
66
4
                    uint32_t num_rows, roaring::Roaring* bitmap) const override {
67
4
        roaring::Roaring null_rows_in_bitmap;
68
4
        if (iterator != nullptr) {
69
3
            bool has_null = DORIS_TRY(iterator->has_null());
70
3
            if (has_null) {
71
2
                InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
72
2
                RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
73
2
                auto null_bitmap = null_bitmap_cache_handle.get_bitmap();
74
2
                if (null_bitmap) {
75
2
                    null_rows_in_bitmap = *bitmap & *null_bitmap;
76
2
                }
77
2
            }
78
3
        }
79
4
        RETURN_IF_ERROR(_nested->evaluate(name_with_type, iterator, num_rows, bitmap));
80
4
        *bitmap |= null_rows_in_bitmap;
81
4
        return Status::OK();
82
4
    }
83
84
    void evaluate_and(const IColumn& column, const uint16_t* sel, uint16_t size,
85
0
                      bool* flags) const override {
86
0
        if (column.has_null()) {
87
0
            std::vector<uint8_t> original_flags(size);
88
0
            memcpy(original_flags.data(), flags, size);
89
90
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
91
0
            _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, flags);
92
0
            const auto& nullmap = nullable_col.get_null_map_data();
93
0
            for (uint16_t i = 0; i < size; ++i) {
94
0
                flags[i] |= (original_flags[i] && nullmap[sel[i]]);
95
0
            }
96
0
        } else {
97
0
            _nested->evaluate_and(column, sel, size, flags);
98
0
        }
99
0
    }
100
101
    void evaluate_or(const IColumn& column, const uint16_t* sel, uint16_t size,
102
0
                     bool* flags) const override {
103
0
        DCHECK(false) << "should not reach here";
104
0
    }
105
106
0
    bool evaluate_and(const segment_v2::ZoneMap& zone_map) const override {
107
        // there is null in range, accept it
108
0
        if (zone_map.has_null) {
109
0
            return true;
110
0
        }
111
0
        return _nested->evaluate_and(zone_map);
112
0
    }
113
114
0
    bool evaluate_and(ParquetPredicate::ColumnStat* statistic) const override {
115
0
        return _nested->evaluate_and(statistic) || statistic->has_null;
116
0
    }
117
118
    bool evaluate_and(ParquetPredicate::CachedPageIndexStat* statistic,
119
0
                      RowRanges* row_ranges) const override {
120
0
        _nested->evaluate_and(statistic, row_ranges);
121
0
        ParquetPredicate::PageIndexStat* stat = nullptr;
122
0
        if (!(statistic->get_stat_func)(&stat, column_id())) {
123
0
            return true;
124
0
        }
125
126
0
        for (int page_id = 0; page_id < stat->num_of_pages; page_id++) {
127
0
            if (stat->has_null[page_id]) {
128
0
                row_ranges->add(stat->ranges[page_id]);
129
0
            }
130
0
        }
131
0
        return row_ranges->count() > 0;
132
0
    }
133
134
0
    bool evaluate_del(const segment_v2::ZoneMap& zone_map) const override {
135
0
        return _nested->evaluate_del(zone_map);
136
0
    }
137
138
0
    bool evaluate_and(const BloomFilter* bf) const override { return _nested->evaluate_and(bf); }
139
140
0
    bool can_do_bloom_filter(bool ngram) const override {
141
0
        return _nested->can_do_bloom_filter(ngram);
142
0
    }
143
144
0
    void evaluate_vec(const IColumn& column, uint16_t size, bool* flags) const override {
145
0
        if (column.has_null()) {
146
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
147
0
            _nested->evaluate_vec(nullable_col.get_nested_column(), size, flags);
148
0
            for (uint16_t i = 0; i < size; ++i) {
149
0
                if (!flags[i] && nullable_col.is_null_at(i)) {
150
                    // set true for NULL rows
151
0
                    flags[i] = true;
152
0
                }
153
0
            }
154
0
        } else {
155
0
            _nested->evaluate_vec(column, size, flags);
156
0
        }
157
0
    }
158
159
0
    void evaluate_and_vec(const IColumn& column, uint16_t size, bool* flags) const override {
160
0
        if (column.has_null()) {
161
            // copy original flags
162
0
            std::vector<uint8_t> original_flags(size);
163
0
            memcpy(original_flags.data(), flags, size);
164
165
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
166
            // call evaluate_and_vec and restore true for NULL rows
167
0
            _nested->evaluate_and_vec(nullable_col.get_nested_column(), size, flags);
168
0
            for (uint16_t i = 0; i < size; ++i) {
169
0
                if (original_flags[i] && !flags[i] && nullable_col.is_null_at(i)) {
170
0
                    flags[i] = true;
171
0
                }
172
0
            }
173
0
        } else {
174
0
            _nested->evaluate_and_vec(column, size, flags);
175
0
        }
176
0
    }
177
178
0
    std::string get_search_str() const override { return _nested->get_search_str(); }
179
180
private:
181
0
    uint16_t _evaluate_inner(const IColumn& column, uint16_t* sel, uint16_t size) const override {
182
0
        if (column.has_null()) {
183
0
            if (size == 0) {
184
0
                return 0;
185
0
            }
186
            // create selected_flags
187
0
            uint16_t max_idx = sel[size - 1];
188
0
            std::vector<uint16_t> old_sel(size);
189
0
            memcpy(old_sel.data(), sel, sizeof(uint16_t) * size);
190
191
0
            const auto& nullable_col = assert_cast<const ColumnNullable&>(column);
192
            // call nested predicate evaluate
193
0
            uint16_t new_size = _nested->evaluate(nullable_col.get_nested_column(), sel, size);
194
195
            // process NULL values
196
0
            if (new_size < size) {
197
0
                std::vector<uint8_t> selected(max_idx + 1, 0);
198
0
                const auto* nullmap = nullable_col.get_null_map_data().data();
199
                // add rows selected by _nested->evaluate
200
0
                for (uint16_t i = 0; i < new_size; ++i) {
201
0
                    uint16_t row_idx = sel[i];
202
0
                    selected[row_idx] = true;
203
0
                }
204
                // reset null from original data
205
0
                for (uint16_t i = 0; i < size; ++i) {
206
0
                    uint16_t row_idx = old_sel[i];
207
0
                    selected[row_idx] |= nullmap[row_idx];
208
0
                }
209
210
                // recaculate new_size and sel array
211
0
                new_size = 0;
212
0
                for (uint16_t row_idx = 0; row_idx < max_idx + 1; ++row_idx) {
213
0
                    if (selected[row_idx]) {
214
0
                        sel[new_size++] = row_idx;
215
0
                    }
216
0
                }
217
0
            }
218
0
            return new_size;
219
0
        }
220
0
        return _nested->evaluate(column, sel, size);
221
0
    }
222
223
    std::shared_ptr<ColumnPredicate> _nested;
224
};
225
226
} //namespace doris