/root/doris/be/src/olap/version_graph.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <rapidjson/document.h> |
21 | | #include <stdint.h> |
22 | | |
23 | | #include <map> |
24 | | #include <memory> |
25 | | #include <string> |
26 | | #include <unordered_map> |
27 | | #include <vector> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "olap/olap_common.h" |
31 | | #include "olap/rowset/rowset_meta.h" |
32 | | |
33 | | namespace doris { |
34 | | /// VersionGraph class which is implemented to build and maintain total versions of rowsets. |
35 | | /// This class use adjacency-matrix represent rowsets version and links. A vertex is a version |
36 | | /// and a link is the _version object of a rowset (from start version to end version + 1). |
37 | | /// Use this class, when given a spec_version, we can get a version path which is the shortest path |
38 | | /// in the graph. |
39 | | class VersionGraph { |
40 | | public: |
41 | | /// Use rs_metas to construct the graph including vertex and edges, and return the |
42 | | /// max_version in metas. |
43 | | void construct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
44 | | int64_t* max_version); |
45 | | /// Reconstruct the graph, begin construction the vertex vec and edges list will be cleared. |
46 | | void reconstruct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
47 | | int64_t* max_version); |
48 | | /// Add a version to this graph, graph will add the version and edge in version. |
49 | | void add_version_to_graph(const Version& version); |
50 | | /// Delete a version from graph. Notice that this del operation only remove this edges and |
51 | | /// remain the vertex. |
52 | | Status delete_version_from_graph(const Version& version); |
53 | | /// Given a spec_version, this method can find a version path which is the shortest path |
54 | | /// in the graph. The version paths are added to version_path as return info. |
55 | | Status capture_consistent_versions(const Version& spec_version, |
56 | | std::vector<Version>* version_path) const; |
57 | | |
58 | | // See comment of TimestampedVersionTracker's get_orphan_vertex_ratio(); |
59 | | double get_orphan_vertex_ratio(); |
60 | | |
61 | | private: |
62 | | /// Private method add a version to graph. |
63 | | void _add_vertex_to_graph(int64_t vertex_value); |
64 | | |
65 | | // OLAP version contains two parts, [start_version, end_version]. In order |
66 | | // to construct graph, the OLAP version has two corresponding vertex, one |
67 | | // vertex's value is version.start_version, the other is |
68 | | // version.end_version + 1. |
69 | | // Use adjacency list to describe version graph. |
70 | | // In order to speed up the version capture, vertex's edges are sorted by version in descending order. |
71 | | std::vector<Vertex> _version_graph; |
72 | | |
73 | | // vertex value --> vertex_index of _version_graph |
74 | | // It is easy to find vertex index according to vertex value. |
75 | | std::unordered_map<int64_t, int64_t> _vertex_index_map; |
76 | | }; |
77 | | |
78 | | /// TimestampedVersion class which is implemented to maintain multi-version path of rowsets. |
79 | | /// This compaction info of a rowset includes start version, end version and the create time. |
80 | | class TimestampedVersion { |
81 | | public: |
82 | | /// TimestampedVersion construction function. Use rowset version and create time to build a TimestampedVersion. |
83 | | TimestampedVersion(const Version& version, int64_t create_time) |
84 | 74 | : _version(version.first, version.second), _create_time(create_time) {} |
85 | | |
86 | 74 | ~TimestampedVersion() {} |
87 | | |
88 | | /// Return the rowset version of TimestampedVersion record. |
89 | 222 | Version version() const { return _version; } |
90 | | /// Return the rowset create_time of TimestampedVersion record. |
91 | 114 | int64_t get_create_time() { return _create_time; } |
92 | | |
93 | | /// Compare two version trackers. |
94 | 0 | bool operator!=(const TimestampedVersion& rhs) const { return _version != rhs._version; } |
95 | | |
96 | | /// Compare two version trackers. |
97 | 0 | bool operator==(const TimestampedVersion& rhs) const { return _version == rhs._version; } |
98 | | |
99 | | /// Judge if a tracker contains the other. |
100 | 0 | bool contains(const TimestampedVersion& other) const { |
101 | 0 | return _version.contains(other._version); |
102 | 0 | } |
103 | | |
104 | | private: |
105 | | Version _version; |
106 | | int64_t _create_time; |
107 | | }; |
108 | | |
109 | | using TimestampedVersionSharedPtr = std::shared_ptr<TimestampedVersion>; |
110 | | |
111 | | /// TimestampedVersionPathContainer class is used to maintain a path timestamped version path |
112 | | /// and record the max create time in a path version. Once a timestamped version is added, the max_create_time |
113 | | /// will compare with the version timestamp and be refreshed. |
114 | | class TimestampedVersionPathContainer { |
115 | | public: |
116 | | /// TimestampedVersionPathContainer construction function, max_create_time is assigned to 0. |
117 | 40 | TimestampedVersionPathContainer() : _max_create_time(0) {} |
118 | | |
119 | | /// Return the max create time in a path version. |
120 | 16 | int64_t max_create_time() { return _max_create_time; } |
121 | | |
122 | | /// Add a timestamped version to timestamped_versions_container. Once a timestamped version is added, |
123 | | /// the max_create_time will compare with the version timestamp and be refreshed. |
124 | | void add_timestamped_version(TimestampedVersionSharedPtr version); |
125 | | |
126 | | /// Return the timestamped_versions_container as const type. |
127 | | std::vector<TimestampedVersionSharedPtr>& timestamped_versions(); |
128 | | |
129 | | private: |
130 | | std::vector<TimestampedVersionSharedPtr> _timestamped_versions_container; |
131 | | int64_t _max_create_time; |
132 | | }; |
133 | | |
134 | | using PathVersionListSharedPtr = std::shared_ptr<TimestampedVersionPathContainer>; |
135 | | |
136 | | /// TimestampedVersionTracker class is responsible to track all rowsets version links of a tablet. |
137 | | /// This class not only records the graph of all versions, but also records the paths which will be removed |
138 | | /// after the path is expired. |
139 | | class TimestampedVersionTracker { |
140 | | public: |
141 | | /// Construct rowsets version tracker by main path rowset meta. |
142 | | void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas); |
143 | | |
144 | | /// Construct rowsets version tracker by main path rowset meta and stale rowset meta. |
145 | | void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
146 | | const std::vector<RowsetMetaSharedPtr>& stale_metas); |
147 | | |
148 | | /// Recover rowsets version tracker from stale version path map. When delete operation fails, the |
149 | | /// tracker can be recovered from deleted stale_version_path_map. |
150 | | void recover_versioned_tracker( |
151 | | const std::map<int64_t, PathVersionListSharedPtr>& stale_version_path_map); |
152 | | |
153 | | /// Add a version to tracker, this version is a new version rowset, not merged rowset. |
154 | | void add_version(const Version& version); |
155 | | |
156 | 5 | void delete_version(const Version& version) { |
157 | 5 | static_cast<void>(_version_graph.delete_version_from_graph(version)); |
158 | 5 | } |
159 | | |
160 | | /// Add a version path with stale_rs_metas, this versions in version path |
161 | | /// are merged rowsets. These rowsets are tracked and removed after they are expired. |
162 | | /// TabletManager sweep these rowsets using tracker by timing. |
163 | | void add_stale_path_version(const std::vector<RowsetMetaSharedPtr>& stale_rs_metas); |
164 | | |
165 | | /// Given a spec_version, this method can find a version path which is the shortest path |
166 | | /// in the graph. The version paths are added to version_path as return info. |
167 | | /// If this version not in main version, version_path can be included expired rowset. |
168 | | Status capture_consistent_versions(const Version& spec_version, |
169 | | std::vector<Version>* version_path) const; |
170 | | |
171 | | /// Capture all expired path version. |
172 | | /// When the last rowset create time of a path greater than expired time which can be expressed |
173 | | /// "now() - tablet_rowset_stale_sweep_time_sec" , this path will be remained. |
174 | | /// Otherwise, this path will be added to path_version. |
175 | | void capture_expired_paths(int64_t stale_sweep_endtime, |
176 | | std::vector<int64_t>* path_version) const; |
177 | | |
178 | | /// Fetch all versions with a path_version. |
179 | | PathVersionListSharedPtr fetch_path_version_by_id(int64_t path_id); |
180 | | |
181 | | /// Fetch all versions with a path_version, at the same time remove this path from the tracker. |
182 | | /// Next time, fetch this path, it will return empty. |
183 | | PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id); |
184 | | |
185 | | /// Print all expired version path in a tablet. |
186 | | std::string get_current_path_map_str(); |
187 | | |
188 | | /// Get json document of _stale_version_path_map. Fill the path_id and version_path |
189 | | /// list in the document. The parameter path arr is used as return variable. |
190 | | void get_stale_version_path_json_doc(rapidjson::Document& path_arr); |
191 | | |
192 | | // Return proportion of orphan vertex in VersionGraph's _version_graph. |
193 | | // If a vertex is no longer the starting point of any edge, then this vertex is defined as orphan vertex |
194 | | double get_orphan_vertex_ratio(); |
195 | | |
196 | | private: |
197 | | /// Construct rowsets version tracker with main path rowset meta. |
198 | | void _construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas); |
199 | | |
200 | | /// init stale_version_path_map by main path rowset meta and stale rowset meta. |
201 | | void _init_stale_version_path_map(const std::vector<RowsetMetaSharedPtr>& rs_metas, |
202 | | const std::vector<RowsetMetaSharedPtr>& stale_metas); |
203 | | |
204 | | /// find a path in stale_map from first_version to second_version, stale_path is used as result. |
205 | | bool _find_path_from_stale_map( |
206 | | const std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>>& |
207 | | stale_map, |
208 | | int64_t first_version, int64_t second_version, |
209 | | std::vector<RowsetMetaSharedPtr>* stale_path); |
210 | | |
211 | | private: |
212 | | // This variable records the id of path version which will be dispatched to next path version, |
213 | | // it is not persisted. |
214 | | int64_t _next_path_id = 1; |
215 | | |
216 | | // path_version -> list of path version, |
217 | | // This variable is used to maintain the map from path version and it's all version. |
218 | | std::map<int64_t, PathVersionListSharedPtr> _stale_version_path_map; |
219 | | |
220 | | VersionGraph _version_graph; |
221 | | }; |
222 | | |
223 | | } // namespace doris |