Coverage Report

Created: 2026-03-13 12:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/equality_delete.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format/table/equality_delete.h"
19
20
#include "exprs/create_predicate_function.h"
21
22
namespace doris {
23
#include "common/compile_check_begin.h"
24
25
std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl(
26
1.70k
        const Block* delete_block, const std::vector<int>& delete_col_ids) {
27
1.70k
    DCHECK_EQ(delete_block->columns(), delete_col_ids.size());
28
1.70k
    if (delete_block->columns() == 1) {
29
776
        return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids);
30
928
    } else {
31
928
        return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids);
32
928
    }
33
1.70k
}
34
35
776
Status SimpleEqualityDelete::_build_set() {
36
776
    COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
37
776
    if (_delete_block->columns() != 1) [[unlikely]] {
38
0
        return Status::InternalError("Simple equality delete can be only applied with one column");
39
0
    }
40
776
    auto& column_and_type = _delete_block->get_by_position(0);
41
776
    auto delete_column_type = remove_nullable(column_and_type.type)->get_primitive_type();
42
776
    _hybrid_set.reset(create_set(delete_column_type, _delete_block->rows(), false));
43
776
    _hybrid_set->insert_fixed_len(column_and_type.column, 0);
44
776
    return Status::OK();
45
776
}
46
47
Status SimpleEqualityDelete::filter_data_block(
48
        Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
49
        const std::unordered_map<int, std::string>& id_to_block_column_name,
50
1.12k
        IColumn::Filter& filter) {
51
1.12k
    SCOPED_TIMER(equality_delete_time);
52
1.12k
    DCHECK(_delete_col_ids.size() == 1);
53
1.12k
    auto column_field_id = _delete_col_ids[0];
54
55
1.12k
    auto column_and_type = data_block->get_by_position(
56
1.12k
            col_name_to_block_idx->at(id_to_block_column_name.at(column_field_id)));
57
58
1.12k
    size_t rows = data_block->rows();
59
    //     _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
60
1.12k
    if (_single_filter == nullptr) {
61
776
        _single_filter = std::make_unique<IColumn::Filter>(rows, 0);
62
776
    } else {
63
        // reset the array capacity and fill all elements using the 0
64
352
        _single_filter->assign(rows, UInt8(0));
65
352
    }
66
1.12k
    if (column_and_type.column->is_nullable()) {
67
1.12k
        const NullMap& null_map =
68
1.12k
                reinterpret_cast<const ColumnNullable*>(column_and_type.column.get())
69
1.12k
                        ->get_null_map_data();
70
1.12k
        _hybrid_set->find_batch_nullable(
71
1.12k
                remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map,
72
1.12k
                *_single_filter);
73
1.12k
        if (_hybrid_set->contain_null()) {
74
0
            auto* filter_data = _single_filter->data();
75
0
            for (size_t i = 0; i < rows; ++i) {
76
0
                filter_data[i] = filter_data[i] || null_map[i];
77
0
            }
78
0
        }
79
1.12k
    } else {
80
0
        _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows,
81
0
                                *_single_filter);
82
0
    }
83
    // should reverse _filter
84
1.12k
    auto* filter_data = filter.data();
85
2.52k
    for (size_t i = 0; i < rows; ++i) {
86
1.39k
        filter_data[i] &= !_single_filter->data()[i];
87
1.39k
    }
88
1.12k
    return Status::OK();
89
1.12k
}
90
91
928
Status MultiEqualityDelete::_build_set() {
92
928
    COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
93
928
    size_t rows = _delete_block->rows();
94
928
    _delete_hashes.clear();
95
928
    _delete_hashes.resize(rows, 0);
96
1.88k
    for (ColumnPtr column : _delete_block->get_columns()) {
97
1.88k
        column->update_hashes_with_value(_delete_hashes.data(), nullptr);
98
1.88k
    }
99
3.60k
    for (size_t i = 0; i < rows; ++i) {
100
2.68k
        _delete_hash_map.insert({_delete_hashes[i], i});
101
2.68k
    }
102
928
    _data_column_index.resize(_delete_block->columns());
103
928
    return Status::OK();
104
928
}
105
106
Status MultiEqualityDelete::filter_data_block(
107
        Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
108
        const std::unordered_map<int, std::string>& id_to_block_column_name,
109
1.35k
        IColumn::Filter& filter) {
110
1.35k
    SCOPED_TIMER(equality_delete_time);
111
1.35k
    DCHECK_EQ(_delete_block->get_columns_with_type_and_name().size(), _delete_col_ids.size());
112
1.35k
    size_t column_index = 0;
113
114
4.10k
    for (size_t idx = 0; idx < _delete_block->get_columns_with_type_and_name().size(); ++idx) {
115
2.74k
        auto delete_col = _delete_block->get_columns_with_type_and_name()[idx];
116
2.74k
        auto delete_col_id = _delete_col_ids[idx];
117
118
2.74k
        DCHECK(id_to_block_column_name.contains(delete_col_id));
119
2.74k
        const auto& block_column_name = id_to_block_column_name.at(delete_col_id);
120
2.74k
        if (!col_name_to_block_idx->contains(block_column_name)) [[unlikely]] {
121
0
            return Status::InternalError("Column '{}' not found in data block: {}",
122
0
                                         block_column_name, data_block->dump_structure());
123
0
        }
124
2.74k
        auto column_and_type =
125
2.74k
                data_block->safe_get_by_position(col_name_to_block_idx->at(block_column_name));
126
2.74k
        if (!delete_col.type->equals(*column_and_type.type)) [[unlikely]] {
127
0
            return Status::InternalError(
128
0
                    "Not support type change in column '{}', src type: {}, target type: {}",
129
0
                    block_column_name, delete_col.type->get_name(),
130
0
                    column_and_type.type->get_name());
131
0
        }
132
2.74k
        _data_column_index[column_index++] = col_name_to_block_idx->at(block_column_name);
133
2.74k
    }
134
1.35k
    size_t rows = data_block->rows();
135
1.35k
    _data_hashes.clear();
136
1.35k
    _data_hashes.resize(rows, 0);
137
2.74k
    for (size_t index : _data_column_index) {
138
2.74k
        data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(),
139
2.74k
                                                                            nullptr);
140
2.74k
    }
141
1.35k
    auto* filter_data = filter.data();
142
3.00k
    for (size_t i = 0; i < rows; ++i) {
143
1.64k
        for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]),
144
1.64k
                  end = _delete_hash_map.upper_bound(_data_hashes[i]);
145
1.89k
             beg != end; ++beg) {
146
976
            if (filter[i] && _equal(data_block, i, beg->second)) {
147
732
                filter_data[i] = 0;
148
732
                break;
149
732
            }
150
976
        }
151
1.64k
    }
152
153
1.35k
    return Status::OK();
154
1.35k
}
155
156
bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index,
157
732
                                 size_t delete_row_index) {
158
2.22k
    for (size_t i = 0; i < _delete_block->columns(); ++i) {
159
1.48k
        ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column;
160
1.48k
        ColumnPtr delete_col = _delete_block->get_by_position(i).column;
161
1.48k
        if (data_col->compare_at(data_row_index, delete_row_index, *delete_col, -1) != 0) {
162
0
            return false;
163
0
        }
164
1.48k
    }
165
732
    return true;
166
732
}
167
168
#include "common/compile_check_end.h"
169
} // namespace doris