Coverage Report

Created: 2026-03-14 13:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/equality_delete.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "core/block/block.h"
19
#include "exprs/hybrid_set.h"
20
#include "runtime/runtime_profile.h"
21
22
namespace doris {
23
#include "common/compile_check_begin.h"
24
25
/**
26
 * Support Iceberg equality delete.
27
 * If there's only one delete column in delete file, use `SimpleEqualityDelete`,
28
 * which uses optimized `HybridSetBase` to build the hash set.
29
 * If there are more delete columns in delete file, use `MultiEqualityDelete`,
30
 * which generates a hash column from all delete columns, and only compare the values
31
 * when the hash values are the same.
32
 */
33
class EqualityDeleteBase {
34
protected:
35
    RuntimeProfile::Counter* num_delete_rows;
36
    RuntimeProfile::Counter* build_set_time;
37
    RuntimeProfile::Counter* equality_delete_time;
38
39
    const Block* _delete_block;
40
    std::vector<int> _delete_col_ids;
41
42
    virtual Status _build_set() = 0;
43
44
public:
45
    EqualityDeleteBase(const Block* delete_block, const std::vector<int> delete_col_ids)
46
0
            : _delete_block(delete_block), _delete_col_ids(delete_col_ids) {}
47
0
    virtual ~EqualityDeleteBase() = default;
48
49
0
    Status init(RuntimeProfile* profile) {
50
0
        static const char* delete_profile = "EqualityDelete";
51
0
        ADD_TIMER_WITH_LEVEL(profile, delete_profile, 1);
52
0
        num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "NumRowsInDeleteFile", TUnit::UNIT,
53
0
                                                       delete_profile, 1);
54
0
        build_set_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "BuildHashSetTime", delete_profile, 1);
55
0
        equality_delete_time =
56
0
                ADD_CHILD_TIMER_WITH_LEVEL(profile, "EqualityDeleteFilterTime", delete_profile, 1);
57
0
        SCOPED_TIMER(build_set_time);
58
0
        return _build_set();
59
0
    }
60
61
    virtual Status filter_data_block(
62
            Block* data_block,
63
            const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
64
            const std::unordered_map<int, std::string>& id_to_block_column_name,
65
            IColumn::Filter& filter) = 0;
66
67
    static std::unique_ptr<EqualityDeleteBase> get_delete_impl(
68
            const Block* delete_block, const std::vector<int>& delete_col_ids);
69
};
70
71
class SimpleEqualityDelete : public EqualityDeleteBase {
72
protected:
73
    std::shared_ptr<HybridSetBase> _hybrid_set;
74
    std::unique_ptr<IColumn::Filter> _single_filter;
75
76
    Status _build_set() override;
77
78
public:
79
    SimpleEqualityDelete(const Block* delete_block, const std::vector<int>& delete_col_ids)
80
0
            : EqualityDeleteBase(delete_block, delete_col_ids) {}
81
82
    Status filter_data_block(Block* data_block,
83
                             const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
84
                             const std::unordered_map<int, std::string>& id_to_block_column_name,
85
                             IColumn::Filter& filter) override;
86
};
87
88
/**
89
 * `MultiEqualityDelete` will generate the hash column for delete block and data block.
90
 */
91
class MultiEqualityDelete : public EqualityDeleteBase {
92
protected:
93
    // hash column for delete block
94
    std::vector<uint64_t> _delete_hashes;
95
    // hash column for data block
96
    std::vector<uint64_t> _data_hashes;
97
    // hash code => row index
98
    // if hash values are equal, then compare the real values
99
    // the row index records the row number of the delete row in delete block
100
    std::multimap<uint64_t, size_t> _delete_hash_map;
101
    // the delete column indexes in data block
102
    std::vector<size_t> _data_column_index;
103
104
    Status _build_set() override;
105
106
    bool _equal(Block* data_block, size_t data_row_index, size_t delete_row_index);
107
108
public:
109
    MultiEqualityDelete(const Block* delete_block, const std::vector<int>& delete_col_ids)
110
0
            : EqualityDeleteBase(delete_block, delete_col_ids) {}
111
112
    Status filter_data_block(Block* data_block,
113
                             const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
114
                             const std::unordered_map<int, std::string>& id_to_block_column_name,
115
                             IColumn::Filter& filter) override;
116
};
117
118
#include "common/compile_check_end.h"
119
} // namespace doris