/root/doris/be/src/olap/version_graph.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/version_graph.h" |
19 | | |
20 | | #include <cctz/time_zone.h> |
21 | | #include <stddef.h> |
22 | | |
23 | | #include <algorithm> |
24 | | // IWYU pragma: no_include <bits/chrono.h> |
25 | | #include <chrono> // IWYU pragma: keep |
26 | | #include <list> |
27 | | #include <memory> |
28 | | #include <ostream> |
29 | | #include <utility> |
30 | | |
31 | | #include "common/logging.h" |
32 | | |
33 | | namespace doris { |
34 | | using namespace ErrorCode; |
35 | | |
36 | | void TimestampedVersionTracker::_construct_versioned_tracker( |
37 | 52 | const std::vector<RowsetMetaSharedPtr>& rs_metas) { |
38 | 52 | int64_t max_version = 0; |
39 | | |
40 | | // construct the rowset graph |
41 | 52 | _version_graph.reconstruct_version_graph(rs_metas, &max_version); |
42 | 52 | } |
43 | | |
44 | | void TimestampedVersionTracker::construct_versioned_tracker( |
45 | 12 | const std::vector<RowsetMetaSharedPtr>& rs_metas) { |
46 | 12 | if (rs_metas.empty()) { |
47 | 0 | VLOG_NOTICE << "there is no version in the header."; |
48 | 0 | return; |
49 | 0 | } |
50 | 12 | _stale_version_path_map.clear(); |
51 | 12 | _next_path_id = 1; |
52 | 12 | _construct_versioned_tracker(rs_metas); |
53 | 12 | } |
54 | | |
55 | | void TimestampedVersionTracker::construct_versioned_tracker( |
56 | | const std::vector<RowsetMetaSharedPtr>& rs_metas, |
57 | 535 | const std::vector<RowsetMetaSharedPtr>& stale_metas) { |
58 | 535 | if (rs_metas.empty()) { |
59 | 495 | VLOG_NOTICE << "there is no version in the header."; |
60 | 495 | return; |
61 | 495 | } |
62 | 40 | _stale_version_path_map.clear(); |
63 | 40 | _next_path_id = 1; |
64 | 40 | _construct_versioned_tracker(rs_metas); |
65 | | |
66 | | // Init `_stale_version_path_map`. |
67 | 40 | _init_stale_version_path_map(rs_metas, stale_metas); |
68 | 40 | } |
69 | | |
70 | | void TimestampedVersionTracker::_init_stale_version_path_map( |
71 | | const std::vector<RowsetMetaSharedPtr>& rs_metas, |
72 | 40 | const std::vector<RowsetMetaSharedPtr>& stale_metas) { |
73 | 40 | if (stale_metas.empty()) { |
74 | 39 | return; |
75 | 39 | } |
76 | | |
77 | | // Sort stale meta by version diff (second version - first version). |
78 | 1 | std::list<RowsetMetaSharedPtr> sorted_stale_metas; |
79 | 7 | for (auto& rs : stale_metas) { |
80 | 7 | sorted_stale_metas.emplace_back(rs); |
81 | 7 | } |
82 | | |
83 | | // 1. sort the existing rowsets by version in ascending order. |
84 | 14 | sorted_stale_metas.sort([](const RowsetMetaSharedPtr& a, const RowsetMetaSharedPtr& b) { |
85 | | // Compare by version diff between `version.first` and `version.second`. |
86 | 14 | int64_t a_diff = a->version().second - a->version().first; |
87 | 14 | int64_t b_diff = b->version().second - b->version().first; |
88 | | |
89 | 14 | int diff = a_diff - b_diff; |
90 | 14 | if (diff < 0) { |
91 | 5 | return true; |
92 | 9 | } else if (diff > 0) { |
93 | 4 | return false; |
94 | 4 | } |
95 | | // When the version diff is equal, compare the rowset`s create time |
96 | 5 | return a->creation_time() < b->creation_time(); |
97 | 14 | }); |
98 | | |
99 | | // first_version -> (second_version -> rowset_meta) |
100 | 1 | std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>> stale_map; |
101 | | |
102 | | // 2. generate stale path from stale_metas. traverse sorted_stale_metas and each time add stale_meta to stale_map. |
103 | | // when a stale path in stale_map can replace stale_meta in sorted_stale_metas, stale_map remove rowset_metas of a stale path |
104 | | // and add the path to `_stale_version_path_map`. |
105 | 7 | for (auto& stale_meta : sorted_stale_metas) { |
106 | 7 | std::vector<RowsetMetaSharedPtr> stale_path; |
107 | | // 2.1 find a path in `stale_map` can replace current `stale_meta` version. |
108 | 7 | bool r = _find_path_from_stale_map(stale_map, stale_meta->start_version(), |
109 | 7 | stale_meta->end_version(), &stale_path); |
110 | | |
111 | | // 2.2 add version to `version_graph`. |
112 | 7 | Version stale_meta_version = stale_meta->version(); |
113 | 7 | add_version(stale_meta_version); |
114 | | |
115 | | // 2.3 find the path. |
116 | 7 | if (r) { |
117 | | // Add the path to `_stale_version_path_map`. |
118 | 1 | add_stale_path_version(stale_path); |
119 | | // Remove `stale_path` from `stale_map`. |
120 | 2 | for (auto stale_item : stale_path) { |
121 | 2 | stale_map[stale_item->start_version()].erase(stale_item->end_version()); |
122 | | |
123 | 2 | if (stale_map[stale_item->start_version()].empty()) { |
124 | 2 | stale_map.erase(stale_item->start_version()); |
125 | 2 | } |
126 | 2 | } |
127 | 1 | } |
128 | | |
129 | | // 2.4 add `stale_meta` to `stale_map`. |
130 | 7 | auto start_iter = stale_map.find(stale_meta->start_version()); |
131 | 7 | if (start_iter != stale_map.end()) { |
132 | 0 | start_iter->second[stale_meta->end_version()] = stale_meta; |
133 | 7 | } else { |
134 | 7 | std::unordered_map<int64_t, RowsetMetaSharedPtr> item; |
135 | 7 | item[stale_meta->end_version()] = stale_meta; |
136 | 7 | stale_map[stale_meta->start_version()] = std::move(item); |
137 | 7 | } |
138 | 7 | } |
139 | | |
140 | | // 3. generate stale path from `rs_metas`. |
141 | 5 | for (auto& stale_meta : rs_metas) { |
142 | 5 | std::vector<RowsetMetaSharedPtr> stale_path; |
143 | | // 3.1 find a path in stale_map can replace current `stale_meta` version. |
144 | 5 | bool r = _find_path_from_stale_map(stale_map, stale_meta->start_version(), |
145 | 5 | stale_meta->end_version(), &stale_path); |
146 | | |
147 | | // 3.2 find the path. |
148 | 5 | if (r) { |
149 | | // Add the path to `_stale_version_path_map`. |
150 | 2 | add_stale_path_version(stale_path); |
151 | | // Remove `stale_path` from `stale_map`. |
152 | 4 | for (auto stale_item : stale_path) { |
153 | 4 | stale_map[stale_item->start_version()].erase(stale_item->end_version()); |
154 | | |
155 | 4 | if (stale_map[stale_item->start_version()].empty()) { |
156 | 4 | stale_map.erase(stale_item->start_version()); |
157 | 4 | } |
158 | 4 | } |
159 | 2 | } |
160 | 5 | } |
161 | | |
162 | | // 4. process remain stale `rowset_meta` in `stale_map`. |
163 | 1 | auto map_iter = stale_map.begin(); |
164 | 2 | while (map_iter != stale_map.end()) { |
165 | 1 | auto second_iter = map_iter->second.begin(); |
166 | 2 | while (second_iter != map_iter->second.end()) { |
167 | | // Each remain stale `rowset_meta` generate a stale path. |
168 | 1 | std::vector<RowsetMetaSharedPtr> stale_path; |
169 | 1 | stale_path.push_back(second_iter->second); |
170 | 1 | add_stale_path_version(stale_path); |
171 | | |
172 | 1 | second_iter++; |
173 | 1 | } |
174 | 1 | map_iter++; |
175 | 1 | } |
176 | 1 | } |
177 | | |
178 | | bool TimestampedVersionTracker::_find_path_from_stale_map( |
179 | | const std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>>& |
180 | | stale_map, |
181 | | int64_t first_version, int64_t second_version, |
182 | 16 | std::vector<RowsetMetaSharedPtr>* stale_path) { |
183 | 16 | auto first_iter = stale_map.find(first_version); |
184 | | // If `first_version` not in `stale_map`, there is no path. |
185 | 16 | if (first_iter == stale_map.end()) { |
186 | 9 | return false; |
187 | 9 | } |
188 | 7 | auto& second_version_map = first_iter->second; |
189 | 7 | auto second_iter = second_version_map.find(second_version); |
190 | | // If second_version in `stale_map`, find a path. |
191 | 7 | if (second_iter != second_version_map.end()) { |
192 | 3 | auto row_meta = second_iter->second; |
193 | | // Add rowset to path. |
194 | 3 | stale_path->push_back(row_meta); |
195 | 3 | return true; |
196 | 3 | } |
197 | | |
198 | | // Traverse the first version map to backtracking `_find_path_from_stale_map`. |
199 | 4 | auto map_iter = second_version_map.begin(); |
200 | 5 | while (map_iter != second_version_map.end()) { |
201 | | // The version greater than `second_version`, we can't find path in `stale_map`. |
202 | 4 | if (map_iter->first > second_version) { |
203 | 0 | map_iter++; |
204 | 0 | continue; |
205 | 0 | } |
206 | | // Backtracking `_find_path_from_stale_map` find from `map_iter->first + 1` to `second_version`. |
207 | 4 | stale_path->push_back(map_iter->second); |
208 | 4 | bool r = _find_path_from_stale_map(stale_map, map_iter->first + 1, second_version, |
209 | 4 | stale_path); |
210 | 4 | if (r) { |
211 | 3 | return true; |
212 | 3 | } |
213 | | // There is no path in current version, pop and continue. |
214 | 1 | stale_path->pop_back(); |
215 | 1 | map_iter++; |
216 | 1 | } |
217 | | |
218 | 1 | return false; |
219 | 4 | } |
220 | | |
221 | 2 | void TimestampedVersionTracker::get_stale_version_path_json_doc(rapidjson::Document& path_arr) { |
222 | 2 | auto path_arr_iter = _stale_version_path_map.begin(); |
223 | | |
224 | | // Do loop version path. |
225 | 6 | while (path_arr_iter != _stale_version_path_map.end()) { |
226 | 4 | auto path_id = path_arr_iter->first; |
227 | 4 | auto path_version_path = path_arr_iter->second; |
228 | | |
229 | 4 | rapidjson::Document item; |
230 | 4 | item.SetObject(); |
231 | | // Add `path_id` to item. |
232 | 4 | auto path_id_str = std::to_string(path_id); |
233 | 4 | rapidjson::Value path_id_value; |
234 | 4 | path_id_value.SetString(path_id_str.c_str(), path_id_str.length(), path_arr.GetAllocator()); |
235 | 4 | item.AddMember("path id", path_id_value, path_arr.GetAllocator()); |
236 | | |
237 | | // Add max create time to item. |
238 | 4 | auto time_zone = cctz::local_time_zone(); |
239 | | |
240 | 4 | auto tp = std::chrono::system_clock::from_time_t(path_version_path->max_create_time()); |
241 | 4 | auto create_time_str = cctz::format("%Y-%m-%d %H:%M:%S %z", tp, time_zone); |
242 | | |
243 | 4 | rapidjson::Value create_time_value; |
244 | 4 | create_time_value.SetString(create_time_str.c_str(), create_time_str.length(), |
245 | 4 | path_arr.GetAllocator()); |
246 | 4 | item.AddMember("last create time", create_time_value, path_arr.GetAllocator()); |
247 | | |
248 | | // Add path list to item. |
249 | 4 | std::stringstream path_list_stream; |
250 | 4 | path_list_stream << path_id_str; |
251 | 4 | auto path_list_ptr = path_version_path->timestamped_versions(); |
252 | 4 | auto path_list_iter = path_list_ptr.begin(); |
253 | 11 | while (path_list_iter != path_list_ptr.end()) { |
254 | 7 | path_list_stream << " -> "; |
255 | 7 | path_list_stream << "["; |
256 | 7 | path_list_stream << (*path_list_iter)->version().first; |
257 | 7 | path_list_stream << "-"; |
258 | 7 | path_list_stream << (*path_list_iter)->version().second; |
259 | 7 | path_list_stream << "]"; |
260 | 7 | path_list_iter++; |
261 | 7 | } |
262 | 4 | std::string path_list = path_list_stream.str(); |
263 | 4 | rapidjson::Value path_list_value; |
264 | 4 | path_list_value.SetString(path_list.c_str(), path_list.length(), path_arr.GetAllocator()); |
265 | 4 | item.AddMember("path list", path_list_value, path_arr.GetAllocator()); |
266 | | |
267 | | // Add item to `path_arr`. |
268 | 4 | path_arr.PushBack(item, path_arr.GetAllocator()); |
269 | | |
270 | 4 | path_arr_iter++; |
271 | 4 | } |
272 | 2 | } |
273 | | |
274 | | void TimestampedVersionTracker::recover_versioned_tracker( |
275 | 1 | const std::map<int64_t, PathVersionListSharedPtr>& stale_version_path_map) { |
276 | 1 | auto _path_map_iter = stale_version_path_map.begin(); |
277 | | // Recover `stale_version_path_map`. |
278 | 1 | while (_path_map_iter != stale_version_path_map.end()) { |
279 | | // Add `PathVersionListSharedPtr` to map. |
280 | 0 | _stale_version_path_map[_path_map_iter->first] = _path_map_iter->second; |
281 | |
|
282 | 0 | std::vector<TimestampedVersionSharedPtr>& timestamped_versions = |
283 | 0 | _path_map_iter->second->timestamped_versions(); |
284 | 0 | std::vector<TimestampedVersionSharedPtr>::iterator version_path_iter = |
285 | 0 | timestamped_versions.begin(); |
286 | 0 | while (version_path_iter != timestamped_versions.end()) { |
287 | | // Add version to `_version_graph`. |
288 | 0 | _version_graph.add_version_to_graph((*version_path_iter)->version()); |
289 | 0 | ++version_path_iter; |
290 | 0 | } |
291 | 0 | ++_path_map_iter; |
292 | 0 | } |
293 | 1 | LOG(INFO) << "recover_versioned_tracker current map info " << get_current_path_map_str(); |
294 | 1 | } |
295 | | |
296 | 10.9k | void TimestampedVersionTracker::add_version(const Version& version) { |
297 | 10.9k | _version_graph.add_version_to_graph(version); |
298 | 10.9k | } |
299 | | |
300 | | void TimestampedVersionTracker::add_stale_path_version( |
301 | 42 | const std::vector<RowsetMetaSharedPtr>& stale_rs_metas) { |
302 | 42 | if (stale_rs_metas.empty()) { |
303 | 1 | VLOG_NOTICE << "there is no version in the stale_rs_metas."; |
304 | 1 | return; |
305 | 1 | } |
306 | | |
307 | 41 | PathVersionListSharedPtr ptr(new TimestampedVersionPathContainer()); |
308 | 94 | for (auto rs : stale_rs_metas) { |
309 | 94 | TimestampedVersionSharedPtr vt_ptr( |
310 | 94 | new TimestampedVersion(rs->version(), rs->creation_time())); |
311 | 94 | ptr->add_timestamped_version(vt_ptr); |
312 | 94 | } |
313 | | |
314 | 41 | std::vector<TimestampedVersionSharedPtr>& timestamped_versions = ptr->timestamped_versions(); |
315 | | |
316 | 41 | struct TimestampedVersionPtrCompare { |
317 | 41 | bool operator()(const TimestampedVersionSharedPtr ptr1, |
318 | 132 | const TimestampedVersionSharedPtr ptr2) { |
319 | 132 | return ptr1->version().first < ptr2->version().first; |
320 | 132 | } |
321 | 41 | }; |
322 | 41 | sort(timestamped_versions.begin(), timestamped_versions.end(), TimestampedVersionPtrCompare()); |
323 | 41 | _stale_version_path_map[_next_path_id] = ptr; |
324 | 41 | _next_path_id++; |
325 | 41 | } |
326 | | |
327 | | // Capture consistent versions from graph. |
328 | | Status TimestampedVersionTracker::capture_consistent_versions( |
329 | 21 | const Version& spec_version, std::vector<Version>* version_path) const { |
330 | 21 | return _version_graph.capture_consistent_versions(spec_version, version_path); |
331 | 21 | } |
332 | | |
333 | | void TimestampedVersionTracker::capture_expired_paths( |
334 | 3 | int64_t stale_sweep_endtime, std::vector<int64_t>* path_version_vec) const { |
335 | 3 | std::map<int64_t, PathVersionListSharedPtr>::const_iterator iter = |
336 | 3 | _stale_version_path_map.begin(); |
337 | | |
338 | 15 | while (iter != _stale_version_path_map.end()) { |
339 | 12 | int64_t max_create_time = iter->second->max_create_time(); |
340 | 12 | if (max_create_time <= stale_sweep_endtime) { |
341 | 8 | int64_t path_version = iter->first; |
342 | 8 | path_version_vec->push_back(path_version); |
343 | 8 | } |
344 | 12 | ++iter; |
345 | 12 | } |
346 | 3 | } |
347 | | |
348 | 13 | PathVersionListSharedPtr TimestampedVersionTracker::fetch_path_version_by_id(int64_t path_id) { |
349 | 13 | if (_stale_version_path_map.count(path_id) == 0) { |
350 | 0 | VLOG_NOTICE << "path version " << path_id << " does not exist!"; |
351 | 0 | return nullptr; |
352 | 0 | } |
353 | | |
354 | 13 | return _stale_version_path_map[path_id]; |
355 | 13 | } |
356 | | |
357 | 13 | PathVersionListSharedPtr TimestampedVersionTracker::fetch_and_delete_path_by_id(int64_t path_id) { |
358 | 13 | if (_stale_version_path_map.count(path_id) == 0) { |
359 | 0 | VLOG_NOTICE << "path version " << path_id << " does not exist!"; |
360 | 0 | return nullptr; |
361 | 0 | } |
362 | | |
363 | 13 | VLOG_NOTICE << get_current_path_map_str(); |
364 | 13 | PathVersionListSharedPtr ptr = fetch_path_version_by_id(path_id); |
365 | | |
366 | 13 | _stale_version_path_map.erase(path_id); |
367 | | |
368 | 23 | for (auto& version : ptr->timestamped_versions()) { |
369 | 23 | static_cast<void>(_version_graph.delete_version_from_graph(version->version())); |
370 | 23 | } |
371 | 13 | return ptr; |
372 | 13 | } |
373 | | |
374 | 1 | std::string TimestampedVersionTracker::get_current_path_map_str() { |
375 | 1 | std::stringstream tracker_info; |
376 | 1 | tracker_info << "current expired next_path_id " << _next_path_id << std::endl; |
377 | | |
378 | 1 | std::map<int64_t, PathVersionListSharedPtr>::const_iterator iter = |
379 | 1 | _stale_version_path_map.begin(); |
380 | 1 | while (iter != _stale_version_path_map.end()) { |
381 | 0 | tracker_info << "current expired path_version " << iter->first; |
382 | 0 | std::vector<TimestampedVersionSharedPtr>& timestamped_versions = |
383 | 0 | iter->second->timestamped_versions(); |
384 | 0 | std::vector<TimestampedVersionSharedPtr>::iterator version_path_iter = |
385 | 0 | timestamped_versions.begin(); |
386 | 0 | int64_t max_create_time = -1; |
387 | 0 | while (version_path_iter != timestamped_versions.end()) { |
388 | 0 | if (max_create_time < (*version_path_iter)->get_create_time()) { |
389 | 0 | max_create_time = (*version_path_iter)->get_create_time(); |
390 | 0 | } |
391 | 0 | tracker_info << " -> ["; |
392 | 0 | tracker_info << (*version_path_iter)->version().first; |
393 | 0 | tracker_info << ","; |
394 | 0 | tracker_info << (*version_path_iter)->version().second; |
395 | 0 | tracker_info << "]"; |
396 | |
|
397 | 0 | ++version_path_iter; |
398 | 0 | } |
399 | |
|
400 | 0 | tracker_info << std::endl; |
401 | 0 | ++iter; |
402 | 0 | } |
403 | 1 | return tracker_info.str(); |
404 | 1 | } |
405 | | |
406 | 1 | double TimestampedVersionTracker::get_orphan_vertex_ratio() { |
407 | 1 | return _version_graph.get_orphan_vertex_ratio(); |
408 | 1 | } |
409 | | |
410 | 94 | void TimestampedVersionPathContainer::add_timestamped_version(TimestampedVersionSharedPtr version) { |
411 | | // Compare and refresh `_max_create_time`. |
412 | 94 | if (version->get_create_time() > _max_create_time) { |
413 | 40 | _max_create_time = version->get_create_time(); |
414 | 40 | } |
415 | 94 | _timestamped_versions_container.push_back(version); |
416 | 94 | } |
417 | | |
418 | 73 | std::vector<TimestampedVersionSharedPtr>& TimestampedVersionPathContainer::timestamped_versions() { |
419 | 73 | return _timestamped_versions_container; |
420 | 73 | } |
421 | | |
422 | | void VersionGraph::construct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
423 | 57 | int64_t* max_version) { |
424 | 57 | if (rs_metas.empty()) { |
425 | 0 | VLOG_NOTICE << "there is no version in the header."; |
426 | 0 | return; |
427 | 0 | } |
428 | | |
429 | | // Distill vertex values from versions in TabletMeta. |
430 | 57 | std::vector<int64_t> vertex_values; |
431 | 57 | vertex_values.reserve(2 * rs_metas.size()); |
432 | | |
433 | 389 | for (size_t i = 0; i < rs_metas.size(); ++i) { |
434 | 332 | vertex_values.push_back(rs_metas[i]->start_version()); |
435 | 332 | vertex_values.push_back(rs_metas[i]->end_version() + 1); |
436 | 332 | if (max_version != nullptr and *max_version < rs_metas[i]->end_version()) { |
437 | 269 | *max_version = rs_metas[i]->end_version(); |
438 | 269 | } |
439 | 332 | } |
440 | 57 | std::sort(vertex_values.begin(), vertex_values.end()); |
441 | | |
442 | | // Items in `vertex_values` are sorted, but not unique. |
443 | | // we choose unique items in `vertex_values` to create vertexes. |
444 | 57 | int64_t last_vertex_value = -1; |
445 | 721 | for (size_t i = 0; i < vertex_values.size(); ++i) { |
446 | 664 | if (i != 0 && vertex_values[i] == last_vertex_value) { |
447 | 291 | continue; |
448 | 291 | } |
449 | | |
450 | | // Add vertex to graph. |
451 | 373 | _add_vertex_to_graph(vertex_values[i]); |
452 | 373 | last_vertex_value = vertex_values[i]; |
453 | 373 | } |
454 | | // Create edges for version graph according to TabletMeta's versions. |
455 | 389 | for (size_t i = 0; i < rs_metas.size(); ++i) { |
456 | | // Versions in header are unique. |
457 | | // We ensure `_vertex_index_map` has its `start_version`. |
458 | 332 | int64_t start_vertex_index = _vertex_index_map[rs_metas[i]->start_version()]; |
459 | 332 | int64_t end_vertex_index = _vertex_index_map[rs_metas[i]->end_version() + 1]; |
460 | | // Add one edge from `start_version` to `end_version`. |
461 | 332 | _version_graph[start_vertex_index].edges.push_front(end_vertex_index); |
462 | | // Add reverse edge from `end_version` to `start_version`. |
463 | 332 | _version_graph[end_vertex_index].edges.push_front(start_vertex_index); |
464 | 332 | } |
465 | | |
466 | | // Sort edges by version in descending order. |
467 | 373 | for (auto& vertex : _version_graph) { |
468 | 373 | vertex.edges.sort([this](const int& vertex_idx_a, const int& vertex_idx_b) { |
469 | 340 | return _version_graph[vertex_idx_a].value > _version_graph[vertex_idx_b].value; |
470 | 340 | }); |
471 | 373 | } |
472 | 57 | } |
473 | | |
474 | | void VersionGraph::reconstruct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
475 | 53 | int64_t* max_version) { |
476 | 53 | _version_graph.clear(); |
477 | 53 | _vertex_index_map.clear(); |
478 | | |
479 | 53 | construct_version_graph(rs_metas, max_version); |
480 | 53 | } |
481 | | |
482 | 11.0k | void VersionGraph::add_version_to_graph(const Version& version) { |
483 | | // Add version.first as new vertex of version graph if not exist. |
484 | 11.0k | int64_t start_vertex_value = version.first; |
485 | 11.0k | int64_t end_vertex_value = version.second + 1; |
486 | | |
487 | | // Add vertex to graph. |
488 | 11.0k | _add_vertex_to_graph(start_vertex_value); |
489 | 11.0k | _add_vertex_to_graph(end_vertex_value); |
490 | | |
491 | 11.0k | int64_t start_vertex_index = _vertex_index_map[start_vertex_value]; |
492 | 11.0k | int64_t end_vertex_index = _vertex_index_map[end_vertex_value]; |
493 | | |
494 | | // We assume this version is new version, so we just add two edges |
495 | | // into version graph. add one edge from `start_version` to `end_version` |
496 | | // Make sure the vertex's edges are sorted by version in descending order when inserting. |
497 | 11.0k | auto end_vertex_it = _version_graph[start_vertex_index].edges.begin(); |
498 | 11.0k | while (end_vertex_it != _version_graph[start_vertex_index].edges.end()) { |
499 | 10.6k | if (_version_graph[*end_vertex_it].value < _version_graph[end_vertex_index].value) { |
500 | 10.6k | break; |
501 | 10.6k | } |
502 | 37 | end_vertex_it++; |
503 | 37 | } |
504 | 11.0k | _version_graph[start_vertex_index].edges.insert(end_vertex_it, end_vertex_index); |
505 | | |
506 | | // We add reverse edge(from end_version to start_version) to graph |
507 | | // Make sure the vertex's edges are sorted by version in descending order when inserting. |
508 | 11.0k | auto start_vertex_it = _version_graph[end_vertex_index].edges.begin(); |
509 | 11.0k | while (start_vertex_it != _version_graph[end_vertex_index].edges.end()) { |
510 | 55 | if (_version_graph[*start_vertex_it].value < _version_graph[start_vertex_index].value) { |
511 | 17 | break; |
512 | 17 | } |
513 | 38 | start_vertex_it++; |
514 | 38 | } |
515 | 11.0k | _version_graph[end_vertex_index].edges.insert(start_vertex_it, start_vertex_index); |
516 | 11.0k | } |
517 | | |
518 | 32 | Status VersionGraph::delete_version_from_graph(const Version& version) { |
519 | 32 | int64_t start_vertex_value = version.first; |
520 | 32 | int64_t end_vertex_value = version.second + 1; |
521 | | |
522 | 32 | if (_vertex_index_map.find(start_vertex_value) == _vertex_index_map.end() || |
523 | 32 | _vertex_index_map.find(end_vertex_value) == _vertex_index_map.end()) { |
524 | 0 | return Status::Error<HEADER_DELETE_VERSION>( |
525 | 0 | "vertex for version does not exists. version={}-{}", version.first, version.second); |
526 | 0 | } |
527 | | |
528 | 32 | int64_t start_vertex_index = _vertex_index_map[start_vertex_value]; |
529 | 32 | int64_t end_vertex_index = _vertex_index_map[end_vertex_value]; |
530 | | // Remove edge and its reverse edge. |
531 | | // When there are same versions in edges, just remove the first version. |
532 | 32 | auto start_edges_iter = _version_graph[start_vertex_index].edges.begin(); |
533 | 47 | while (start_edges_iter != _version_graph[start_vertex_index].edges.end()) { |
534 | 47 | if (*start_edges_iter == end_vertex_index) { |
535 | 32 | _version_graph[start_vertex_index].edges.erase(start_edges_iter); |
536 | 32 | break; |
537 | 32 | } |
538 | 15 | start_edges_iter++; |
539 | 15 | } |
540 | | |
541 | 32 | auto end_edges_iter = _version_graph[end_vertex_index].edges.begin(); |
542 | 65 | while (end_edges_iter != _version_graph[end_vertex_index].edges.end()) { |
543 | 65 | if (*end_edges_iter == start_vertex_index) { |
544 | 32 | _version_graph[end_vertex_index].edges.erase(end_edges_iter); |
545 | 32 | break; |
546 | 32 | } |
547 | 33 | end_edges_iter++; |
548 | 33 | } |
549 | | |
550 | | // Here we do not delete vertex in `_version_graph` even if its edges are empty. |
551 | | // the `_version_graph` will be rebuilt when doing trash sweep. |
552 | 32 | return Status::OK(); |
553 | 32 | } |
554 | | |
555 | 22.3k | void VersionGraph::_add_vertex_to_graph(int64_t vertex_value) { |
556 | | // Vertex with vertex_value already exists. |
557 | 22.3k | if (_vertex_index_map.find(vertex_value) != _vertex_index_map.end()) { |
558 | 10.6k | VLOG_NOTICE << "vertex with vertex value already exists. value=" << vertex_value; |
559 | 10.6k | return; |
560 | 10.6k | } |
561 | | |
562 | 11.6k | _version_graph.emplace_back(Vertex(vertex_value)); |
563 | 11.6k | _vertex_index_map[vertex_value] = _version_graph.size() - 1; |
564 | 11.6k | } |
565 | | |
566 | | Status VersionGraph::capture_consistent_versions(const Version& spec_version, |
567 | 23 | std::vector<Version>* version_path) const { |
568 | 23 | if (spec_version.first > spec_version.second) { |
569 | 0 | return Status::Error<INVALID_ARGUMENT, false>( |
570 | 0 | "invalid specified version. spec_version={}-{}", spec_version.first, |
571 | 0 | spec_version.second); |
572 | 0 | } |
573 | | |
574 | 23 | int64_t cur_idx = -1; |
575 | 27 | for (size_t i = 0; i < _version_graph.size(); i++) { |
576 | 27 | if (_version_graph[i].value == spec_version.first) { |
577 | 23 | cur_idx = i; |
578 | 23 | break; |
579 | 23 | } |
580 | 27 | } |
581 | | |
582 | 23 | if (cur_idx < 0) { |
583 | 0 | return Status::InternalError<false>( |
584 | 0 | "failed to find path in version_graph. spec_version: {}-{}", spec_version.first, |
585 | 0 | spec_version.second); |
586 | 0 | } |
587 | | |
588 | 23 | int64_t end_value = spec_version.second + 1; |
589 | 79 | while (_version_graph[cur_idx].value < end_value) { |
590 | 57 | int64_t next_idx = -1; |
591 | 61 | for (const auto& it : _version_graph[cur_idx].edges) { |
592 | | // Only consider incremental versions. |
593 | 61 | if (_version_graph[it].value < _version_graph[cur_idx].value) { |
594 | 1 | break; |
595 | 1 | } |
596 | | |
597 | 60 | if (_version_graph[it].value > end_value) { |
598 | 4 | continue; |
599 | 4 | } |
600 | | |
601 | | // Considering edges had been sorted by version in descending order, |
602 | | // This version is the largest version that smaller than `end_version`. |
603 | 56 | next_idx = it; |
604 | 56 | break; |
605 | 60 | } |
606 | | |
607 | 57 | if (next_idx > -1) { |
608 | 56 | if (version_path != nullptr) { |
609 | 36 | version_path->emplace_back(_version_graph[cur_idx].value, |
610 | 36 | _version_graph[next_idx].value - 1); |
611 | 36 | } |
612 | 56 | cur_idx = next_idx; |
613 | 56 | } else { |
614 | 1 | return Status::InternalError<false>( |
615 | 1 | "fail to find path in version_graph. spec_version: {}-{}", spec_version.first, |
616 | 1 | spec_version.second); |
617 | 1 | } |
618 | 57 | } |
619 | | |
620 | 22 | if (VLOG_TRACE_IS_ON && version_path != nullptr) { |
621 | 0 | std::stringstream shortest_path_for_debug; |
622 | 0 | for (const auto& version : *version_path) { |
623 | 0 | shortest_path_for_debug << version << ' '; |
624 | 0 | } |
625 | 0 | VLOG_TRACE << "success to find path for spec_version. spec_version=" << spec_version |
626 | 0 | << ", path=" << shortest_path_for_debug.str(); |
627 | 0 | } |
628 | | |
629 | 22 | return Status::OK(); |
630 | 23 | } |
631 | | |
632 | 2 | double VersionGraph::get_orphan_vertex_ratio() { |
633 | 2 | int64_t vertex_num = _version_graph.size(); |
634 | 2 | int64_t orphan_vertex_num = 0; |
635 | 15 | for (auto& iter : _version_graph) { |
636 | 15 | if (iter.edges.empty()) { |
637 | 6 | ++orphan_vertex_num; |
638 | 6 | } |
639 | 15 | } |
640 | 2 | return orphan_vertex_num / (double)vertex_num; |
641 | 2 | } |
642 | | |
643 | | } // namespace doris |