/root/doris/be/src/olap/rowid_conversion.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <map> | 
| 21 |  | #include <vector> | 
| 22 |  |  | 
| 23 |  | #include "olap/olap_common.h" | 
| 24 |  | #include "olap/utils.h" | 
| 25 |  | #include "runtime/thread_context.h" | 
| 26 |  |  | 
| 27 |  | namespace doris { | 
| 28 |  |  | 
| 29 |  | // For unique key merge on write table, we should update delete bitmap | 
| 30 |  | // of destination rowset when compaction finished. | 
| 31 |  | // Through the row id correspondence between the source rowset and the | 
| 32 |  | // destination rowset, we can quickly update the delete bitmap of the | 
| 33 |  | // destination rowset. | 
| 34 |  | class RowIdConversion { | 
| 35 |  | public: | 
| 36 | 171 |     RowIdConversion() = default; | 
| 37 | 171 |     ~RowIdConversion() { RELEASE_THREAD_MEM_TRACKER(_seg_rowid_map_mem_used); }| Line | Count | Source |  | 606 | 171 | #define RELEASE_THREAD_MEM_TRACKER(size) (void)0 | 
 | 
| 38 |  |  | 
| 39 |  |     // resize segment rowid map to its rows num | 
| 40 | 400 |     Status init_segment_map(const RowsetId& src_rowset_id, const std::vector<uint32_t>& num_rows) { | 
| 41 | 1.10k |         for (size_t i = 0; i < num_rows.size(); i++) {  Branch (41:28): [True: 702, False: 400]
 | 
| 42 | 702 |             constexpr size_t RESERVED_MEMORY = 10 * 1024 * 1024; // 10M | 
| 43 | 702 |             if (doris::GlobalMemoryArbitrator::is_exceed_hard_mem_limit(RESERVED_MEMORY)) {  Branch (43:17): [True: 0, False: 702]
 | 
| 44 | 0 |                 return Status::MemoryLimitExceeded(fmt::format( | 
| 45 | 0 |                         "RowIdConversion init_segment_map failed, memory exceed limit, {}, " | 
| 46 | 0 |                         "consuming " | 
| 47 | 0 |                         "tracker:<{}>, peak used {}, current used {}.", | 
| 48 | 0 |                         doris::GlobalMemoryArbitrator::process_limit_exceeded_errmsg_str(), | 
| 49 | 0 |                         doris::thread_context()->thread_mem_tracker()->label(), | 
| 50 | 0 |                         doris::thread_context()->thread_mem_tracker()->peak_consumption(), | 
| 51 | 0 |                         doris::thread_context()->thread_mem_tracker()->consumption())); | 
| 52 | 0 |             } | 
| 53 |  |  | 
| 54 | 702 |             uint32_t id = _segments_rowid_map.size(); | 
| 55 | 702 |             _segment_to_id_map.emplace(std::pair<RowsetId, uint32_t> {src_rowset_id, i}, id); | 
| 56 | 702 |             _id_to_segment_map.emplace_back(src_rowset_id, i); | 
| 57 | 702 |             std::vector<std::pair<uint32_t, uint32_t>> vec( | 
| 58 | 702 |                     num_rows[i], std::pair<uint32_t, uint32_t>(UINT32_MAX, UINT32_MAX)); | 
| 59 |  |  | 
| 60 |  |             //NOTE: manually count _segments_rowid_map's memory here, because _segments_rowid_map could be used by indexCompaction. | 
| 61 |  |             // indexCompaction is a thridparty code, it's too complex to modify it. | 
| 62 |  |             // refer compact_column. | 
| 63 | 702 |             track_mem_usage(vec.capacity()); | 
| 64 | 702 |             _segments_rowid_map.emplace_back(std::move(vec)); | 
| 65 | 702 |         } | 
| 66 | 400 |         return Status::OK(); | 
| 67 | 400 |     } | 
| 68 |  |  | 
| 69 |  |     // set dst rowset id | 
| 70 | 133 |     void set_dst_rowset_id(const RowsetId& dst_rowset_id) { _dst_rowst_id = dst_rowset_id; } | 
| 71 | 19 |     const RowsetId get_dst_rowset_id() { return _dst_rowst_id; } | 
| 72 |  |  | 
| 73 |  |     // add row id to the map | 
| 74 |  |     void add(const std::vector<RowLocation>& rss_row_ids, | 
| 75 | 5.20k |              const std::vector<uint32_t>& dst_segments_num_row) { | 
| 76 | 4.72M |         for (auto& item : rss_row_ids) {  Branch (76:25): [True: 4.72M, False: 5.20k]
 | 
| 77 | 4.72M |             if (item.row_id == -1) {  Branch (77:17): [True: 0, False: 4.72M]
 | 
| 78 | 0 |                 continue; | 
| 79 | 0 |             } | 
| 80 | 4.72M |             uint32_t id = _segment_to_id_map.at( | 
| 81 | 4.72M |                     std::pair<RowsetId, uint32_t> {item.rowset_id, item.segment_id}); | 
| 82 | 4.72M |             if (_cur_dst_segment_id < dst_segments_num_row.size() &&   Branch (82:17): [True: 73.9k, False: 4.65M]
 | 
| 83 | 4.72M |                 _cur_dst_segment_rowid >= dst_segments_num_row[_cur_dst_segment_id]) {  Branch (83:17): [True: 3.65k, False: 70.2k]
 | 
| 84 | 3.65k |                 _cur_dst_segment_id++; | 
| 85 | 3.65k |                 _cur_dst_segment_rowid = 0; | 
| 86 | 3.65k |             } | 
| 87 | 4.72M |             _segments_rowid_map[id][item.row_id] = | 
| 88 | 4.72M |                     std::pair<uint32_t, uint32_t> {_cur_dst_segment_id, _cur_dst_segment_rowid++}; | 
| 89 | 4.72M |         } | 
| 90 | 5.20k |     } | 
| 91 |  |  | 
| 92 |  |     // get destination RowLocation | 
| 93 |  |     // return non-zero if the src RowLocation does not exist | 
| 94 | 1.75M |     int get(const RowLocation& src, RowLocation* dst) const { | 
| 95 | 1.75M |         auto iter = _segment_to_id_map.find({src.rowset_id, src.segment_id}); | 
| 96 | 1.75M |         if (iter == _segment_to_id_map.end()) {  Branch (96:13): [True: 1, False: 1.75M]
 | 
| 97 | 1 |             return -1; | 
| 98 | 1 |         } | 
| 99 | 1.75M |         const auto& rowid_map = _segments_rowid_map[iter->second]; | 
| 100 | 1.75M |         if (src.row_id >= rowid_map.size()) {  Branch (100:13): [True: 1, False: 1.75M]
 | 
| 101 | 1 |             return -1; | 
| 102 | 1 |         } | 
| 103 | 1.75M |         auto& [dst_segment_id, dst_rowid] = rowid_map[src.row_id]; | 
| 104 | 1.75M |         if (dst_segment_id == UINT32_MAX && dst_rowid == UINT32_MAX) {  Branch (104:13): [True: 764k, False: 993k]
  Branch (104:45): [True: 764k, False: 0]
 | 
| 105 | 764k |             return -1; | 
| 106 | 764k |         } | 
| 107 |  |  | 
| 108 | 993k |         dst->rowset_id = _dst_rowst_id; | 
| 109 | 993k |         dst->segment_id = dst_segment_id; | 
| 110 | 993k |         dst->row_id = dst_rowid; | 
| 111 | 993k |         return 0; | 
| 112 | 1.75M |     } | 
| 113 |  |  | 
| 114 |  |     const std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& get_rowid_conversion_map() | 
| 115 | 19 |             const { | 
| 116 | 19 |         return _segments_rowid_map; | 
| 117 | 19 |     } | 
| 118 |  |  | 
| 119 | 19 |     const std::map<std::pair<RowsetId, uint32_t>, uint32_t>& get_src_segment_to_id_map() { | 
| 120 | 19 |         return _segment_to_id_map; | 
| 121 | 19 |     } | 
| 122 |  |  | 
| 123 | 0 |     std::pair<RowsetId, uint32_t> get_segment_by_id(uint32_t id) const { | 
| 124 | 0 |         DCHECK_GT(_id_to_segment_map.size(), id); | 
| 125 | 0 |         return _id_to_segment_map.at(id); | 
| 126 | 0 |     } | 
| 127 |  |  | 
| 128 | 0 |     uint32_t get_id_by_segment(const std::pair<RowsetId, uint32_t>& segment) const { | 
| 129 | 0 |         return _segment_to_id_map.at(segment); | 
| 130 | 0 |     } | 
| 131 |  |  | 
| 132 |  | private: | 
| 133 | 702 |     void track_mem_usage(size_t delta_std_pair_cap) { | 
| 134 | 702 |         _std_pair_cap += delta_std_pair_cap; | 
| 135 |  |  | 
| 136 | 702 |         size_t new_size = | 
| 137 | 702 |                 _std_pair_cap * sizeof(std::pair<uint32_t, uint32_t>) + | 
| 138 | 702 |                 _segments_rowid_map.capacity() * sizeof(std::vector<std::pair<uint32_t, uint32_t>>); | 
| 139 | 702 |         CONSUME_THREAD_MEM_TRACKER(new_size - _seg_rowid_map_mem_used); | Line | Count | Source |  | 605 | 702 | #define CONSUME_THREAD_MEM_TRACKER(size) (void)0 | 
 | 
| 140 | 702 |         _seg_rowid_map_mem_used = new_size; | 
| 141 | 702 |     } | 
| 142 |  |  | 
| 143 |  | private: | 
| 144 |  |     // the first level vector: index indicates src segment. | 
| 145 |  |     // the second level vector: index indicates row id of source segment, | 
| 146 |  |     // value indicates row id of destination segment. | 
| 147 |  |     // <UINT32_MAX, UINT32_MAX> indicates current row not exist. | 
| 148 |  |     std::vector<std::vector<std::pair<uint32_t, uint32_t>>> _segments_rowid_map; | 
| 149 |  |     size_t _seg_rowid_map_mem_used {0}; | 
| 150 |  |     size_t _std_pair_cap {0}; | 
| 151 |  |  | 
| 152 |  |     // Map source segment to 0 to n | 
| 153 |  |     std::map<std::pair<RowsetId, uint32_t>, uint32_t> _segment_to_id_map; | 
| 154 |  |  | 
| 155 |  |     // Map 0 to n to source segment | 
| 156 |  |     std::vector<std::pair<RowsetId, uint32_t>> _id_to_segment_map; | 
| 157 |  |  | 
| 158 |  |     // dst rowset id | 
| 159 |  |     RowsetId _dst_rowst_id; | 
| 160 |  |  | 
| 161 |  |     // current dst segment id | 
| 162 |  |     std::uint32_t _cur_dst_segment_id = 0; | 
| 163 |  |  | 
| 164 |  |     // current rowid of dst segment | 
| 165 |  |     std::uint32_t _cur_dst_segment_rowid = 0; | 
| 166 |  | }; | 
| 167 |  |  | 
| 168 |  | } // namespace doris |