/root/doris/be/src/olap/version_graph.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <stdint.h> |
22 | | |
23 | | #include <functional> |
24 | | #include <map> |
25 | | #include <memory> |
26 | | #include <string> |
27 | | #include <unordered_map> |
28 | | #include <vector> |
29 | | |
30 | | #include "common/status.h" |
31 | | #include "olap/olap_common.h" |
32 | | #include "olap/rowset/rowset_meta.h" |
33 | | |
34 | | namespace doris { |
35 | | /// VersionGraph class which is implemented to build and maintain total versions of rowsets. |
36 | | /// This class use adjacency-matrix represent rowsets version and links. A vertex is a version |
37 | | /// and a link is the _version object of a rowset (from start version to end version + 1). |
38 | | /// Use this class, when given a spec_version, we can get a version path which is the shortest path |
39 | | /// in the graph. |
40 | | class VersionGraph { |
41 | | public: |
42 | | /// Use rs_metas to construct the graph including vertex and edges, and return the |
43 | | /// max_version in metas. |
44 | | void construct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
45 | | int64_t* max_version); |
46 | | /// Reconstruct the graph, begin construction the vertex vec and edges list will be cleared. |
47 | | void reconstruct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
48 | | int64_t* max_version); |
49 | | /// Add a version to this graph, graph will add the version and edge in version. |
50 | | void add_version_to_graph(const Version& version); |
51 | | /// Delete a version from graph. Notice that this del operation only remove this edges and |
52 | | /// remain the vertex. |
53 | | Status delete_version_from_graph(const Version& version); |
54 | | /// Given a spec_version, this method can find a version path which is the shortest path |
55 | | /// in the graph. The version paths are added to version_path as return info. |
56 | | Status capture_consistent_versions(const Version& spec_version, |
57 | | std::vector<Version>* version_path) const; |
58 | | |
59 | | Status capture_consistent_versions_prefer_cache( |
60 | | const Version& spec_version, std::vector<Version>& version_path, |
61 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
62 | | |
63 | | // Given a start, this method can find a version path which satisfy the following conditions: |
64 | | // 1. all edges satisfy the conditions specified by `validator` in the graph. |
65 | | // 2. the destination version is as far as possible. |
66 | | // 3. the path is the shortest path. |
67 | | // The version paths are added to version_path as return info. |
68 | | // If this version not in main version, version_path can be included expired rowset. |
69 | | // NOTE: this method may return edges which is in stale path |
70 | | // |
71 | | // @param validator: Function that takes (start_version, end_version) representing a rowset |
72 | | // and returns true if the rowset should be included in the path, false to skip it |
73 | | Status capture_consistent_versions_with_validator( |
74 | | const Version& spec_version, std::vector<Version>& version_path, |
75 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
76 | | |
77 | | // Capture consistent versions with validator for merge-on-write (MOW) tables. |
78 | | // Similar to capture_consistent_versions_with_validator but with special handling for MOW tables. |
79 | | // For MOW tables, newly generated delete bitmap marks will be on the rowsets which are in newest layout. |
80 | | // So we can only capture rowsets which are in newest data layout to ensure data correctness. |
81 | | // |
82 | | // @param validator: Function that takes (start_version, end_version) representing a rowset |
83 | | // and returns true if the rowset is warmed up, false if not warmed up |
84 | | Status capture_consistent_versions_with_validator_mow( |
85 | | const Version& spec_version, std::vector<Version>& version_path, |
86 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
87 | | |
88 | | // See comment of TimestampedVersionTracker's get_orphan_vertex_ratio(); |
89 | | double get_orphan_vertex_ratio(); |
90 | | |
91 | | std::string debug_string() const; |
92 | | |
93 | | private: |
94 | | /// Private method add a version to graph. |
95 | | void _add_vertex_to_graph(int64_t vertex_value); |
96 | | |
97 | | // OLAP version contains two parts, [start_version, end_version]. In order |
98 | | // to construct graph, the OLAP version has two corresponding vertex, one |
99 | | // vertex's value is version.start_version, the other is |
100 | | // version.end_version + 1. |
101 | | // Use adjacency list to describe version graph. |
102 | | // In order to speed up the version capture, vertex's edges are sorted by version in descending order. |
103 | | std::vector<Vertex> _version_graph; |
104 | | |
105 | | // vertex value --> vertex_index of _version_graph |
106 | | // It is easy to find vertex index according to vertex value. |
107 | | std::unordered_map<int64_t, int64_t> _vertex_index_map; |
108 | | }; |
109 | | |
110 | | /// TimestampedVersion class which is implemented to maintain multi-version path of rowsets. |
111 | | /// This compaction info of a rowset includes start version, end version and the create time. |
112 | | class TimestampedVersion { |
113 | | public: |
114 | | /// TimestampedVersion construction function. Use rowset version and create time to build a TimestampedVersion. |
115 | | TimestampedVersion(const Version& version, int64_t create_time) |
116 | 638 | : _version(version.first, version.second), _create_time(create_time) {} |
117 | | |
118 | 638 | ~TimestampedVersion() {} |
119 | | |
120 | | /// Return the rowset version of TimestampedVersion record. |
121 | 3.40k | Version version() const { return _version; } |
122 | | /// Return the rowset create_time of TimestampedVersion record. |
123 | 774 | int64_t get_create_time() { return _create_time; } |
124 | | |
125 | | /// Compare two version trackers. |
126 | 0 | bool operator!=(const TimestampedVersion& rhs) const { return _version != rhs._version; } |
127 | | |
128 | | /// Compare two version trackers. |
129 | 0 | bool operator==(const TimestampedVersion& rhs) const { return _version == rhs._version; } |
130 | | |
131 | | /// Judge if a tracker contains the other. |
132 | 0 | bool contains(const TimestampedVersion& other) const { |
133 | 0 | return _version.contains(other._version); |
134 | 0 | } |
135 | | |
136 | | private: |
137 | | Version _version; |
138 | | int64_t _create_time; |
139 | | }; |
140 | | |
141 | | using TimestampedVersionSharedPtr = std::shared_ptr<TimestampedVersion>; |
142 | | |
143 | | /// TimestampedVersionPathContainer class is used to maintain a path timestamped version path |
144 | | /// and record the max create time in a path version. Once a timestamped version is added, the max_create_time |
145 | | /// will compare with the version timestamp and be refreshed. |
146 | | class TimestampedVersionPathContainer { |
147 | | public: |
148 | | /// TimestampedVersionPathContainer construction function, max_create_time is assigned to 0. |
149 | 136 | TimestampedVersionPathContainer() : _max_create_time(0) {} |
150 | | |
151 | | /// Return the max create time in a path version. |
152 | 112 | int64_t max_create_time() { return _max_create_time; } |
153 | | |
154 | | /// Add a timestamped version to timestamped_versions_container. Once a timestamped version is added, |
155 | | /// the max_create_time will compare with the version timestamp and be refreshed. |
156 | | void add_timestamped_version(TimestampedVersionSharedPtr version); |
157 | | |
158 | | /// Return the timestamped_versions_container as const type. |
159 | | std::vector<TimestampedVersionSharedPtr>& timestamped_versions(); |
160 | | |
161 | | private: |
162 | | std::vector<TimestampedVersionSharedPtr> _timestamped_versions_container; |
163 | | int64_t _max_create_time; |
164 | | }; |
165 | | |
166 | | using PathVersionListSharedPtr = std::shared_ptr<TimestampedVersionPathContainer>; |
167 | | |
168 | | /// TimestampedVersionTracker class is responsible to track all rowsets version links of a tablet. |
169 | | /// This class not only records the graph of all versions, but also records the paths which will be removed |
170 | | /// after the path is expired. |
171 | | class TimestampedVersionTracker { |
172 | | public: |
173 | | /// Construct rowsets version tracker by main path rowset meta. |
174 | | void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas); |
175 | | |
176 | | /// Construct rowsets version tracker by main path rowset meta and stale rowset meta. |
177 | | void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
178 | | const std::vector<RowsetMetaSharedPtr>& stale_metas); |
179 | | |
180 | | /// Recover rowsets version tracker from stale version path map. When delete operation fails, the |
181 | | /// tracker can be recovered from deleted stale_version_path_map. |
182 | | void recover_versioned_tracker( |
183 | | const std::map<int64_t, PathVersionListSharedPtr>& stale_version_path_map); |
184 | | |
185 | | /// Add a version to tracker, this version is a new version rowset, not merged rowset. |
186 | | void add_version(const Version& version); |
187 | | |
188 | 5 | void delete_version(const Version& version) { |
189 | 5 | static_cast<void>(_version_graph.delete_version_from_graph(version)); |
190 | 5 | } |
191 | | |
192 | | /// Add a version path with stale_rs_metas, this versions in version path |
193 | | /// are merged rowsets. These rowsets are tracked and removed after they are expired. |
194 | | /// TabletManager sweep these rowsets using tracker by timing. |
195 | | void add_stale_path_version(const std::vector<RowsetMetaSharedPtr>& stale_rs_metas); |
196 | | |
197 | | /// Given a spec_version, this method can find a version path which is the shortest path |
198 | | /// in the graph. The version paths are added to version_path as return info. |
199 | | /// If this version not in main version, version_path can be included expired rowset. |
200 | | Status capture_consistent_versions(const Version& spec_version, |
201 | | std::vector<Version>* version_path) const; |
202 | | |
203 | | Status capture_consistent_versions_prefer_cache( |
204 | | const Version& spec_version, std::vector<Version>& version_path, |
205 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
206 | | |
207 | | // Given a start, this method can find a version path which satisfy the following conditions: |
208 | | // 1. all edges satisfy the conditions specified by `validator` in the graph. |
209 | | // 2. the destination version is as far as possible. |
210 | | // 3. the path is the shortest path. |
211 | | // The version paths are added to version_path as return info. |
212 | | // If this version not in main version, version_path can be included expired rowset. |
213 | | // NOTE: this method may return edges which is in stale path |
214 | | // |
215 | | // @param validator: Function that takes (start_version, end_version) representing a rowset |
216 | | // and returns true if the rowset should be included in the path, false to skip it |
217 | | Status capture_consistent_versions_with_validator( |
218 | | const Version& spec_version, std::vector<Version>& version_path, |
219 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
220 | | |
221 | | // Capture consistent versions with validator for merge-on-write (MOW) tables. |
222 | | // Similar to capture_consistent_versions_with_validator but with special handling for MOW tables. |
223 | | // For MOW tables, newly generated delete bitmap marks will be on the rowsets which are in newest layout. |
224 | | // So we can only capture rowsets which are in newest data layout to ensure data correctness. |
225 | | // |
226 | | // @param validator: Function that takes (start_version, end_version) representing a rowset |
227 | | // and returns true if the rowset is warmed up, false if not warmed up |
228 | | Status capture_consistent_versions_with_validator_mow( |
229 | | const Version& spec_version, std::vector<Version>& version_path, |
230 | | const std::function<bool(int64_t, int64_t)>& validator) const; |
231 | | |
232 | | /// Capture all expired path version. |
233 | | /// When the last rowset create time of a path greater than expired time which can be expressed |
234 | | /// "now() - tablet_rowset_stale_sweep_time_sec" , this path will be remained. |
235 | | /// Otherwise, this path will be added to path_version. |
236 | | void capture_expired_paths(int64_t stale_sweep_endtime, |
237 | | std::vector<int64_t>* path_version) const; |
238 | | |
239 | | /// Fetch all versions with a path_version. |
240 | | PathVersionListSharedPtr fetch_path_version_by_id(int64_t path_id); |
241 | | |
242 | | /// Fetch all versions with a path_version, at the same time remove this path from the tracker. |
243 | | /// Next time, fetch this path, it will return empty. |
244 | | PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id); |
245 | | |
246 | | /// Print all expired version path in a tablet. |
247 | | std::string get_current_path_map_str(); |
248 | | |
249 | | /// Get json document of _stale_version_path_map. Fill the path_id and version_path |
250 | | /// list in the document. The parameter path arr is used as return variable. |
251 | | void get_stale_version_path_json_doc(rapidjson::Document& path_arr); |
252 | | |
253 | | // Return proportion of orphan vertex in VersionGraph's _version_graph. |
254 | | // If a vertex is no longer the starting point of any edge, then this vertex is defined as orphan vertex |
255 | | double get_orphan_vertex_ratio(); |
256 | | |
257 | | std::string debug_string() const; |
258 | | |
259 | | private: |
260 | | /// Construct rowsets version tracker with main path rowset meta. |
261 | | void _construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas); |
262 | | |
263 | | /// init stale_version_path_map by main path rowset meta and stale rowset meta. |
264 | | void _init_stale_version_path_map(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
265 | | const std::vector<RowsetMetaSharedPtr>& stale_metas); |
266 | | |
267 | | /// find a path in stale_map from first_version to second_version, stale_path is used as result. |
268 | | bool _find_path_from_stale_map( |
269 | | const std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>>& |
270 | | stale_map, |
271 | | int64_t first_version, int64_t second_version, |
272 | | std::vector<RowsetMetaSharedPtr>* stale_path); |
273 | | |
274 | | private: |
275 | | // This variable records the id of path version which will be dispatched to next path version, |
276 | | // it is not persisted. |
277 | | int64_t _next_path_id = 1; |
278 | | |
279 | | // path_version -> list of path version, |
280 | | // This variable is used to maintain the map from path version and it's all version. |
281 | | std::map<int64_t, PathVersionListSharedPtr> _stale_version_path_map; |
282 | | |
283 | | VersionGraph _version_graph; |
284 | | }; |
285 | | |
286 | | } // namespace doris |