Coverage Report

Created: 2026-04-10 04:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/rowid_conversion.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <map>
21
#include <vector>
22
23
#include "common/cast_set.h"
24
#include "runtime/thread_context.h"
25
#include "storage/olap_common.h"
26
#include "storage/utils.h"
27
28
namespace doris {
29
30
// For unique key merge on write table, we should update delete bitmap
31
// of destination rowset when compaction finished.
32
// Through the row id correspondence between the source rowset and the
33
// destination rowset, we can quickly update the delete bitmap of the
34
// destination rowset.
35
class RowIdConversion {
36
public:
37
197k
    RowIdConversion() = default;
38
197k
    ~RowIdConversion() { RELEASE_THREAD_MEM_TRACKER(_seg_rowid_map_mem_used); }
39
40
    // resize segment rowid map to its rows num
41
34.0k
    Status init_segment_map(const RowsetId& src_rowset_id, const std::vector<uint32_t>& num_rows) {
42
49.3k
        for (size_t i = 0; i < num_rows.size(); i++) {
43
15.2k
            constexpr size_t RESERVED_MEMORY = 10 * 1024 * 1024; // 10M
44
15.2k
            if (doris::GlobalMemoryArbitrator::is_exceed_hard_mem_limit(RESERVED_MEMORY)) {
45
0
                return Status::MemoryLimitExceeded(fmt::format(
46
0
                        "RowIdConversion init_segment_map failed, process memory exceed limit or "
47
0
                        "sys available memory less than low water mark , {}, "
48
0
                        "consuming "
49
0
                        "tracker:<{}>, peak used {}, current used {}.",
50
0
                        doris::GlobalMemoryArbitrator::process_mem_log_str(),
51
0
                        doris::thread_context()
52
0
                                ->thread_mem_tracker_mgr->limiter_mem_tracker()
53
0
                                ->label(),
54
0
                        doris::thread_context()
55
0
                                ->thread_mem_tracker_mgr->limiter_mem_tracker()
56
0
                                ->peak_consumption(),
57
0
                        doris::thread_context()
58
0
                                ->thread_mem_tracker_mgr->limiter_mem_tracker()
59
0
                                ->consumption()));
60
0
            }
61
62
15.2k
            uint32_t id = static_cast<uint32_t>(_segments_rowid_map.size());
63
15.2k
            _segment_to_id_map.emplace(std::pair<RowsetId, uint32_t> {src_rowset_id, i}, id);
64
15.2k
            _id_to_segment_map.emplace_back(src_rowset_id, i);
65
15.2k
            std::vector<std::pair<uint32_t, uint32_t>> vec(
66
15.2k
                    num_rows[i], std::pair<uint32_t, uint32_t>(UINT32_MAX, UINT32_MAX));
67
68
            //NOTE: manually count _segments_rowid_map's memory here, because _segments_rowid_map could be used by indexCompaction.
69
            // indexCompaction is a thridparty code, it's too complex to modify it.
70
            // refer compact_column.
71
15.2k
            track_mem_usage(vec.capacity());
72
15.2k
            _segments_rowid_map.emplace_back(std::move(vec));
73
15.2k
        }
74
34.0k
        return Status::OK();
75
34.0k
    }
76
77
    // set dst rowset id
78
3.88k
    void set_dst_rowset_id(const RowsetId& dst_rowset_id) { _dst_rowst_id = dst_rowset_id; }
79
238
    const RowsetId get_dst_rowset_id() { return _dst_rowst_id; }
80
81
    // add row id to the map
82
    void add(const std::vector<RowLocation>& rss_row_ids,
83
8.74k
             const std::vector<uint32_t>& dst_segments_num_row) {
84
6.43M
        for (auto& item : rss_row_ids) {
85
6.43M
            if (item.row_id == -1) {
86
134
                continue;
87
134
            }
88
6.43M
            uint32_t id = _segment_to_id_map.at(
89
6.43M
                    std::pair<RowsetId, uint32_t> {item.rowset_id, item.segment_id});
90
6.43M
            if (_cur_dst_segment_id < dst_segments_num_row.size() &&
91
6.43M
                _cur_dst_segment_rowid >= dst_segments_num_row[_cur_dst_segment_id]) {
92
3.65k
                _cur_dst_segment_id++;
93
3.65k
                _cur_dst_segment_rowid = 0;
94
3.65k
            }
95
6.43M
            _segments_rowid_map[id][item.row_id] =
96
6.43M
                    std::pair<uint32_t, uint32_t> {_cur_dst_segment_id, _cur_dst_segment_rowid++};
97
6.43M
        }
98
8.74k
    }
99
100
    // get destination RowLocation
101
    // return non-zero if the src RowLocation does not exist
102
5.05M
    int get(const RowLocation& src, RowLocation* dst) const {
103
5.05M
        auto iter = _segment_to_id_map.find({src.rowset_id, src.segment_id});
104
5.05M
        if (iter == _segment_to_id_map.end()) {
105
1
            return -1;
106
1
        }
107
5.05M
        const auto& rowid_map = _segments_rowid_map[iter->second];
108
5.05M
        if (src.row_id >= rowid_map.size()) {
109
1
            return -1;
110
1
        }
111
5.05M
        auto& [dst_segment_id, dst_rowid] = rowid_map[src.row_id];
112
5.05M
        if (dst_segment_id == UINT32_MAX && dst_rowid == UINT32_MAX) {
113
3.80M
            return -1;
114
3.80M
        }
115
116
1.24M
        dst->rowset_id = _dst_rowst_id;
117
1.24M
        dst->segment_id = dst_segment_id;
118
1.24M
        dst->row_id = dst_rowid;
119
1.24M
        return 0;
120
5.05M
    }
121
122
    const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& get_rowid_conversion_map()
123
238
            const {
124
238
        return _segments_rowid_map;
125
238
    }
126
127
238
    const std::map<std::pair<RowsetId, uint32_t>, uint32_t>& get_src_segment_to_id_map() {
128
238
        return _segment_to_id_map;
129
238
    }
130
131
0
    std::pair<RowsetId, uint32_t> get_segment_by_id(uint32_t id) const {
132
0
        DCHECK_GT(_id_to_segment_map.size(), id);
133
0
        return _id_to_segment_map.at(id);
134
0
    }
135
136
0
    uint32_t get_id_by_segment(const std::pair<RowsetId, uint32_t>& segment) const {
137
0
        return _segment_to_id_map.at(segment);
138
0
    }
139
140
private:
141
15.2k
    void track_mem_usage(size_t delta_std_pair_cap) {
142
15.2k
        _std_pair_cap += delta_std_pair_cap;
143
144
15.2k
        size_t new_size =
145
15.2k
                _std_pair_cap * sizeof(std::pair<uint32_t, uint32_t>) +
146
15.2k
                _segments_rowid_map.capacity() * sizeof(std::vector<std::pair<uint32_t, uint32_t>>);
147
15.2k
        CONSUME_THREAD_MEM_TRACKER(new_size - _seg_rowid_map_mem_used);
148
0
        _seg_rowid_map_mem_used = new_size;
149
15.2k
    }
150
151
private:
152
    // the first level vector: index indicates src segment.
153
    // the second level vector: index indicates row id of source segment,
154
    // value indicates row id of destination segment.
155
    // <UINT32_MAX, UINT32_MAX> indicates current row not exist.
156
    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> _segments_rowid_map;
157
    size_t _seg_rowid_map_mem_used {0};
158
    size_t _std_pair_cap {0};
159
160
    // Map source segment to 0 to n
161
    std::map<std::pair<RowsetId, uint32_t>, uint32_t> _segment_to_id_map;
162
163
    // Map 0 to n to source segment
164
    std::vector<std::pair<RowsetId, uint32_t>> _id_to_segment_map;
165
166
    // dst rowset id
167
    RowsetId _dst_rowst_id;
168
169
    // current dst segment id
170
    std::uint32_t _cur_dst_segment_id = 0;
171
172
    // current rowid of dst segment
173
    std::uint32_t _cur_dst_segment_rowid = 0;
174
};
175
176
} // namespace doris