Coverage Report

Created: 2024-11-20 12:30

/root/doris/be/src/olap/rowid_conversion.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <map>
21
#include <vector>
22
23
#include "olap/olap_common.h"
24
#include "olap/utils.h"
25
26
namespace doris {
27
28
// For unique key merge on write table, we should update delete bitmap
29
// of destination rowset when compaction finished.
30
// Through the row id correspondence between the source rowset and the
31
// destination rowset, we can quickly update the delete bitmap of the
32
// destination rowset.
33
class RowIdConversion {
34
public:
35
114
    RowIdConversion() = default;
36
114
    ~RowIdConversion() = default;
37
38
    // resize segment rowid map to its rows num
39
290
    void init_segment_map(const RowsetId& src_rowset_id, const std::vector<uint32_t>& num_rows) {
40
722
        for (size_t i = 0; i < num_rows.size(); i++) {
41
432
            uint32_t id = _segments_rowid_map.size();
42
432
            _segment_to_id_map.emplace(std::pair<RowsetId, uint32_t> {src_rowset_id, i}, id);
43
432
            _id_to_segment_map.emplace_back(src_rowset_id, i);
44
432
            _segments_rowid_map.emplace_back(std::vector<std::pair<uint32_t, uint32_t>>(
45
432
                    num_rows[i], std::pair<uint32_t, uint32_t>(UINT32_MAX, UINT32_MAX)));
46
432
        }
47
290
    }
48
49
    // set dst rowset id
50
97
    void set_dst_rowset_id(const RowsetId& dst_rowset_id) { _dst_rowst_id = dst_rowset_id; }
51
1
    const RowsetId get_dst_rowset_id() { return _dst_rowst_id; }
52
53
    // add row id to the map
54
    void add(const std::vector<RowLocation>& rss_row_ids,
55
5.15k
             const std::vector<uint32_t>& dst_segments_num_row) {
56
4.68M
        for (auto& item : rss_row_ids) {
57
4.68M
            if (item.row_id == -1) {
58
0
                continue;
59
0
            }
60
4.68M
            uint32_t id = _segment_to_id_map.at(
61
4.68M
                    std::pair<RowsetId, uint32_t> {item.rowset_id, item.segment_id});
62
4.68M
            if (_cur_dst_segment_id < dst_segments_num_row.size() &&
63
4.68M
                _cur_dst_segment_rowid >= dst_segments_num_row[_cur_dst_segment_id]) {
64
3.63k
                _cur_dst_segment_id++;
65
3.63k
                _cur_dst_segment_rowid = 0;
66
3.63k
            }
67
4.68M
            _segments_rowid_map[id][item.row_id] =
68
4.68M
                    std::pair<uint32_t, uint32_t> {_cur_dst_segment_id, _cur_dst_segment_rowid++};
69
4.68M
        }
70
5.15k
    }
71
72
    // get destination RowLocation
73
    // return non-zero if the src RowLocation does not exist
74
1.75M
    int get(const RowLocation& src, RowLocation* dst) const {
75
1.75M
        auto iter = _segment_to_id_map.find({src.rowset_id, src.segment_id});
76
1.75M
        if (iter == _segment_to_id_map.end()) {
77
1
            return -1;
78
1
        }
79
1.75M
        const auto& rowid_map = _segments_rowid_map[iter->second];
80
1.75M
        if (src.row_id >= rowid_map.size()) {
81
1
            return -1;
82
1
        }
83
1.75M
        auto& [dst_segment_id, dst_rowid] = rowid_map[src.row_id];
84
1.75M
        if (dst_segment_id == UINT32_MAX && dst_rowid == UINT32_MAX) {
85
764k
            return -1;
86
764k
        }
87
88
993k
        dst->rowset_id = _dst_rowst_id;
89
993k
        dst->segment_id = dst_segment_id;
90
993k
        dst->row_id = dst_rowid;
91
993k
        return 0;
92
1.75M
    }
93
94
    const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& get_rowid_conversion_map()
95
1
            const {
96
1
        return _segments_rowid_map;
97
1
    }
98
99
1
    const std::map<std::pair<RowsetId, uint32_t>, uint32_t>& get_src_segment_to_id_map() {
100
1
        return _segment_to_id_map;
101
1
    }
102
103
0
    std::pair<RowsetId, uint32_t> get_segment_by_id(uint32_t id) const {
104
0
        DCHECK_GT(_id_to_segment_map.size(), id);
105
0
        return _id_to_segment_map.at(id);
106
0
    }
107
108
0
    uint32_t get_id_by_segment(const std::pair<RowsetId, uint32_t>& segment) const {
109
0
        return _segment_to_id_map.at(segment);
110
0
    }
111
112
private:
113
    // the first level vector: index indicates src segment.
114
    // the second level vector: index indicates row id of source segment,
115
    // value indicates row id of destination segment.
116
    // <UINT32_MAX, UINT32_MAX> indicates current row not exist.
117
    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> _segments_rowid_map;
118
119
    // Map source segment to 0 to n
120
    std::map<std::pair<RowsetId, uint32_t>, uint32_t> _segment_to_id_map;
121
122
    // Map 0 to n to source segment
123
    std::vector<std::pair<RowsetId, uint32_t>> _id_to_segment_map;
124
125
    // dst rowset id
126
    RowsetId _dst_rowst_id;
127
128
    // current dst segment id
129
    std::uint32_t _cur_dst_segment_id = 0;
130
131
    // current rowid of dst segment
132
    std::uint32_t _cur_dst_segment_rowid = 0;
133
};
134
135
} // namespace doris