Coverage Report

Created: 2025-09-22 13:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/version_graph.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <rapidjson/document.h>
21
#include <stdint.h>
22
23
#include <functional>
24
#include <map>
25
#include <memory>
26
#include <string>
27
#include <unordered_map>
28
#include <vector>
29
30
#include "common/status.h"
31
#include "olap/olap_common.h"
32
#include "olap/rowset/rowset_meta.h"
33
34
namespace doris {
35
/// VersionGraph class which is implemented to build and maintain total versions of rowsets.
36
/// This class use adjacency-matrix represent rowsets version and links. A vertex is a version
37
/// and a link is the _version object of a rowset (from start version to end version + 1).
38
/// Use this class, when given a spec_version, we can get a version path which is the shortest path
39
/// in the graph.
40
class VersionGraph {
41
public:
42
    /// Use rs_metas to construct the graph including vertex and edges, and return the
43
    /// max_version in metas.
44
    void construct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas,
45
                                 int64_t* max_version);
46
    /// Reconstruct the graph, begin construction the vertex vec and edges list will be cleared.
47
    void reconstruct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas,
48
                                   int64_t* max_version);
49
    /// Add a version to this graph, graph will add the version and edge in version.
50
    void add_version_to_graph(const Version& version);
51
    /// Delete a version from graph. Notice that this del operation only remove this edges and
52
    /// remain the vertex.
53
    Status delete_version_from_graph(const Version& version);
54
    /// Given a spec_version, this method can find a version path which is the shortest path
55
    /// in the graph. The version paths are added to version_path as return info.
56
    Status capture_consistent_versions(const Version& spec_version,
57
                                       std::vector<Version>* version_path) const;
58
59
    Status capture_consistent_versions_prefer_cache(
60
            const Version& spec_version, std::vector<Version>& version_path,
61
            const std::function<bool(int64_t, int64_t)>& validator) const;
62
63
    // Given a start, this method can find a version path which satisfy the following conditions:
64
    // 1. all edges satisfy the conditions specified by `validator` in the graph.
65
    // 2. the destination version is as far as possible.
66
    // 3. the path is the shortest path.
67
    // The version paths are added to version_path as return info.
68
    // If this version not in main version, version_path can be included expired rowset.
69
    // NOTE: this method may return edges which is in stale path
70
    //
71
    // @param validator: Function that takes (start_version, end_version) representing a rowset
72
    //                   and returns true if the rowset should be included in the path, false to skip it
73
    Status capture_consistent_versions_with_validator(
74
            const Version& spec_version, std::vector<Version>& version_path,
75
            const std::function<bool(int64_t, int64_t)>& validator) const;
76
77
    // Capture consistent versions with validator for merge-on-write (MOW) tables.
78
    // Similar to capture_consistent_versions_with_validator but with special handling for MOW tables.
79
    // For MOW tables, newly generated delete bitmap marks will be on the rowsets which are in newest layout.
80
    // So we can only capture rowsets which are in newest data layout to ensure data correctness.
81
    //
82
    // @param validator: Function that takes (start_version, end_version) representing a rowset
83
    //                   and returns true if the rowset is warmed up, false if not warmed up
84
    Status capture_consistent_versions_with_validator_mow(
85
            const Version& spec_version, std::vector<Version>& version_path,
86
            const std::function<bool(int64_t, int64_t)>& validator) const;
87
88
    // See comment of TimestampedVersionTracker's get_orphan_vertex_ratio();
89
    double get_orphan_vertex_ratio();
90
91
    std::string debug_string() const;
92
93
private:
94
    /// Private method add a version to graph.
95
    void _add_vertex_to_graph(int64_t vertex_value);
96
97
    // OLAP version contains two parts, [start_version, end_version]. In order
98
    // to construct graph, the OLAP version has two corresponding vertex, one
99
    // vertex's value is version.start_version, the other is
100
    // version.end_version + 1.
101
    // Use adjacency list to describe version graph.
102
    // In order to speed up the version capture, vertex's edges are sorted by version in descending order.
103
    std::vector<Vertex> _version_graph;
104
105
    // vertex value --> vertex_index of _version_graph
106
    // It is easy to find vertex index according to vertex value.
107
    std::unordered_map<int64_t, int64_t> _vertex_index_map;
108
};
109
110
/// TimestampedVersion class which is implemented to maintain multi-version path of rowsets.
111
/// This compaction info of a rowset includes start version, end version and the create time.
112
class TimestampedVersion {
113
public:
114
    /// TimestampedVersion construction function. Use rowset version and create time to build a TimestampedVersion.
115
    TimestampedVersion(const Version& version, int64_t create_time)
116
638
            : _version(version.first, version.second), _create_time(create_time) {}
117
118
638
    ~TimestampedVersion() {}
119
120
    /// Return the rowset version of TimestampedVersion record.
121
3.40k
    Version version() const { return _version; }
122
    /// Return the rowset create_time of TimestampedVersion record.
123
774
    int64_t get_create_time() { return _create_time; }
124
125
    /// Compare two version trackers.
126
0
    bool operator!=(const TimestampedVersion& rhs) const { return _version != rhs._version; }
127
128
    /// Compare two version trackers.
129
0
    bool operator==(const TimestampedVersion& rhs) const { return _version == rhs._version; }
130
131
    /// Judge if a tracker contains the other.
132
0
    bool contains(const TimestampedVersion& other) const {
133
0
        return _version.contains(other._version);
134
0
    }
135
136
private:
137
    Version _version;
138
    int64_t _create_time;
139
};
140
141
using TimestampedVersionSharedPtr = std::shared_ptr<TimestampedVersion>;
142
143
/// TimestampedVersionPathContainer class is used to maintain a path timestamped version path
144
/// and record the max create time in a path version. Once a timestamped version is added, the max_create_time
145
/// will compare with the version timestamp and be refreshed.
146
class TimestampedVersionPathContainer {
147
public:
148
    /// TimestampedVersionPathContainer construction function, max_create_time is assigned to 0.
149
136
    TimestampedVersionPathContainer() : _max_create_time(0) {}
150
151
    /// Return the max create time in a path version.
152
112
    int64_t max_create_time() { return _max_create_time; }
153
154
    /// Add a timestamped version to timestamped_versions_container. Once a timestamped version is added,
155
    /// the max_create_time will compare with the version timestamp and be refreshed.
156
    void add_timestamped_version(TimestampedVersionSharedPtr version);
157
158
    /// Return the timestamped_versions_container as const type.
159
    std::vector<TimestampedVersionSharedPtr>& timestamped_versions();
160
161
private:
162
    std::vector<TimestampedVersionSharedPtr> _timestamped_versions_container;
163
    int64_t _max_create_time;
164
};
165
166
using PathVersionListSharedPtr = std::shared_ptr<TimestampedVersionPathContainer>;
167
168
/// TimestampedVersionTracker class is responsible to track all rowsets version links of a tablet.
169
/// This class not only records the graph of all versions, but also records the paths which will be removed
170
/// after the path is expired.
171
class TimestampedVersionTracker {
172
public:
173
    /// Construct rowsets version tracker by main path rowset meta.
174
    void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas);
175
176
    /// Construct rowsets version tracker by main path rowset meta and stale rowset meta.
177
    void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas,
178
                                     const std::vector<RowsetMetaSharedPtr>& stale_metas);
179
180
    /// Recover rowsets version tracker from stale version path map. When delete operation fails, the
181
    /// tracker can be recovered from deleted stale_version_path_map.
182
    void recover_versioned_tracker(
183
            const std::map<int64_t, PathVersionListSharedPtr>& stale_version_path_map);
184
185
    /// Add a version to tracker, this version is a new version rowset, not merged rowset.
186
    void add_version(const Version& version);
187
188
5
    void delete_version(const Version& version) {
189
5
        static_cast<void>(_version_graph.delete_version_from_graph(version));
190
5
    }
191
192
    /// Add a version path with stale_rs_metas, this versions in version path
193
    /// are merged rowsets.  These rowsets are tracked and removed after they are expired.
194
    /// TabletManager sweep these rowsets using tracker by timing.
195
    void add_stale_path_version(const std::vector<RowsetMetaSharedPtr>& stale_rs_metas);
196
197
    /// Given a spec_version, this method can find a version path which is the shortest path
198
    /// in the graph. The version paths are added to version_path as return info.
199
    /// If this version not in main version, version_path can be included expired rowset.
200
    Status capture_consistent_versions(const Version& spec_version,
201
                                       std::vector<Version>* version_path) const;
202
203
    Status capture_consistent_versions_prefer_cache(
204
            const Version& spec_version, std::vector<Version>& version_path,
205
            const std::function<bool(int64_t, int64_t)>& validator) const;
206
207
    // Given a start, this method can find a version path which satisfy the following conditions:
208
    // 1. all edges satisfy the conditions specified by `validator` in the graph.
209
    // 2. the destination version is as far as possible.
210
    // 3. the path is the shortest path.
211
    // The version paths are added to version_path as return info.
212
    // If this version not in main version, version_path can be included expired rowset.
213
    // NOTE: this method may return edges which is in stale path
214
    //
215
    // @param validator: Function that takes (start_version, end_version) representing a rowset
216
    //                   and returns true if the rowset should be included in the path, false to skip it
217
    Status capture_consistent_versions_with_validator(
218
            const Version& spec_version, std::vector<Version>& version_path,
219
            const std::function<bool(int64_t, int64_t)>& validator) const;
220
221
    // Capture consistent versions with validator for merge-on-write (MOW) tables.
222
    // Similar to capture_consistent_versions_with_validator but with special handling for MOW tables.
223
    // For MOW tables, newly generated delete bitmap marks will be on the rowsets which are in newest layout.
224
    // So we can only capture rowsets which are in newest data layout to ensure data correctness.
225
    //
226
    // @param validator: Function that takes (start_version, end_version) representing a rowset
227
    //                   and returns true if the rowset is warmed up, false if not warmed up
228
    Status capture_consistent_versions_with_validator_mow(
229
            const Version& spec_version, std::vector<Version>& version_path,
230
            const std::function<bool(int64_t, int64_t)>& validator) const;
231
232
    /// Capture all expired path version.
233
    /// When the last rowset create time of a path greater than expired time  which can be expressed
234
    /// "now() - tablet_rowset_stale_sweep_time_sec" , this path will be remained.
235
    /// Otherwise, this path will be added to path_version.
236
    void capture_expired_paths(int64_t stale_sweep_endtime,
237
                               std::vector<int64_t>* path_version) const;
238
239
    /// Fetch all versions with a path_version.
240
    PathVersionListSharedPtr fetch_path_version_by_id(int64_t path_id);
241
242
    /// Fetch all versions with a path_version, at the same time remove this path from the tracker.
243
    /// Next time, fetch this path, it will return empty.
244
    PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id);
245
246
    /// Print all expired version path in a tablet.
247
    std::string get_current_path_map_str();
248
249
    /// Get json document of _stale_version_path_map. Fill the path_id and version_path
250
    /// list in the document. The parameter path arr is used as return variable.
251
    void get_stale_version_path_json_doc(rapidjson::Document& path_arr);
252
253
    // Return proportion of orphan vertex in VersionGraph's _version_graph.
254
    // If a vertex is no longer the starting point of any edge, then this vertex is defined as orphan vertex
255
    double get_orphan_vertex_ratio();
256
257
    std::string debug_string() const;
258
259
private:
260
    /// Construct rowsets version tracker with main path rowset meta.
261
    void _construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas);
262
263
    /// init stale_version_path_map by main path rowset meta and stale rowset meta.
264
    void _init_stale_version_path_map(const std::vector<RowsetMetaSharedPtr>& rs_metas,
265
                                      const std::vector<RowsetMetaSharedPtr>& stale_metas);
266
267
    /// find a path in stale_map from first_version to second_version, stale_path is used as result.
268
    bool _find_path_from_stale_map(
269
            const std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>>&
270
                    stale_map,
271
            int64_t first_version, int64_t second_version,
272
            std::vector<RowsetMetaSharedPtr>* stale_path);
273
274
private:
275
    // This variable records the id of path version which will be dispatched to next path version,
276
    // it is not persisted.
277
    int64_t _next_path_id = 1;
278
279
    // path_version -> list of path version,
280
    // This variable is used to maintain the map from path version and it's all version.
281
    std::map<int64_t, PathVersionListSharedPtr> _stale_version_path_map;
282
283
    VersionGraph _version_graph;
284
};
285
286
} // namespace doris