Coverage Report

Created: 2026-04-14 12:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/table/equality_delete.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "core/block/block.h"
19
#include "exprs/hybrid_set.h"
20
#include "runtime/runtime_profile.h"
21
22
namespace doris {
23
24
/**
25
 * Support Iceberg equality delete.
26
 * If there's only one delete column in delete file, use `SimpleEqualityDelete`,
27
 * which uses optimized `HybridSetBase` to build the hash set.
28
 * If there are more delete columns in delete file, use `MultiEqualityDelete`,
29
 * which generates a hash column from all delete columns, and only compare the values
30
 * when the hash values are the same.
31
 */
32
class EqualityDeleteBase {
33
protected:
34
    RuntimeProfile::Counter* num_delete_rows;
35
    RuntimeProfile::Counter* build_set_time;
36
    RuntimeProfile::Counter* equality_delete_time;
37
38
    const Block* _delete_block;
39
    std::vector<int> _delete_col_ids;
40
41
    virtual Status _build_set() = 0;
42
43
public:
44
    EqualityDeleteBase(const Block* delete_block, const std::vector<int> delete_col_ids)
45
1.70k
            : _delete_block(delete_block), _delete_col_ids(delete_col_ids) {}
46
1.70k
    virtual ~EqualityDeleteBase() = default;
47
48
1.70k
    Status init(RuntimeProfile* profile) {
49
1.70k
        static const char* delete_profile = "EqualityDelete";
50
1.70k
        ADD_TIMER_WITH_LEVEL(profile, delete_profile, 1);
51
1.70k
        num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "NumRowsInDeleteFile", TUnit::UNIT,
52
1.70k
                                                       delete_profile, 1);
53
1.70k
        build_set_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "BuildHashSetTime", delete_profile, 1);
54
1.70k
        equality_delete_time =
55
1.70k
                ADD_CHILD_TIMER_WITH_LEVEL(profile, "EqualityDeleteFilterTime", delete_profile, 1);
56
1.70k
        SCOPED_TIMER(build_set_time);
57
1.70k
        return _build_set();
58
1.70k
    }
59
60
    virtual Status filter_data_block(
61
            Block* data_block,
62
            const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
63
            const std::unordered_map<int, std::string>& id_to_block_column_name,
64
            IColumn::Filter& filter) = 0;
65
66
    static std::unique_ptr<EqualityDeleteBase> get_delete_impl(
67
            const Block* delete_block, const std::vector<int>& delete_col_ids);
68
};
69
70
class SimpleEqualityDelete : public EqualityDeleteBase {
71
protected:
72
    std::shared_ptr<HybridSetBase> _hybrid_set;
73
    std::unique_ptr<IColumn::Filter> _single_filter;
74
75
    Status _build_set() override;
76
77
public:
78
    SimpleEqualityDelete(const Block* delete_block, const std::vector<int>& delete_col_ids)
79
776
            : EqualityDeleteBase(delete_block, delete_col_ids) {}
80
81
    Status filter_data_block(Block* data_block,
82
                             const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
83
                             const std::unordered_map<int, std::string>& id_to_block_column_name,
84
                             IColumn::Filter& filter) override;
85
};
86
87
/**
88
 * `MultiEqualityDelete` will generate the hash column for delete block and data block.
89
 */
90
class MultiEqualityDelete : public EqualityDeleteBase {
91
protected:
92
    // hash column for delete block
93
    std::vector<uint64_t> _delete_hashes;
94
    // hash column for data block
95
    std::vector<uint64_t> _data_hashes;
96
    // hash code => row index
97
    // if hash values are equal, then compare the real values
98
    // the row index records the row number of the delete row in delete block
99
    std::multimap<uint64_t, size_t> _delete_hash_map;
100
    // the delete column indexes in data block
101
    std::vector<size_t> _data_column_index;
102
103
    Status _build_set() override;
104
105
    bool _equal(Block* data_block, size_t data_row_index, size_t delete_row_index);
106
107
public:
108
    MultiEqualityDelete(const Block* delete_block, const std::vector<int>& delete_col_ids)
109
928
            : EqualityDeleteBase(delete_block, delete_col_ids) {}
110
111
    Status filter_data_block(Block* data_block,
112
                             const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
113
                             const std::unordered_map<int, std::string>& id_to_block_column_name,
114
                             IColumn::Filter& filter) override;
115
};
116
117
} // namespace doris