Coverage Report

Created: 2024-11-21 17:33

/root/doris/be/src/olap/version_graph.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <rapidjson/document.h>
21
#include <stdint.h>
22
23
#include <map>
24
#include <memory>
25
#include <string>
26
#include <unordered_map>
27
#include <vector>
28
29
#include "common/status.h"
30
#include "olap/olap_common.h"
31
#include "olap/rowset/rowset_meta.h"
32
33
namespace doris {
34
/// VersionGraph class which is implemented to build and maintain total versions of rowsets.
35
/// This class use adjacency-matrix represent rowsets version and links. A vertex is a version
36
/// and a link is the _version object of a rowset (from start version to end version + 1).
37
/// Use this class, when given a spec_version, we can get a version path which is the shortest path
38
/// in the graph.
39
class VersionGraph {
40
public:
41
    /// Use rs_metas to construct the graph including vertex and edges, and return the
42
    /// max_version in metas.
43
    void construct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas,
44
                                 int64_t* max_version);
45
    /// Reconstruct the graph, begin construction the vertex vec and edges list will be cleared.
46
    void reconstruct_version_graph(const std::vector<RowsetMetaSharedPtr>& rs_metas,
47
                                   int64_t* max_version);
48
    /// Add a version to this graph, graph will add the version and edge in version.
49
    void add_version_to_graph(const Version& version);
50
    /// Delete a version from graph. Notice that this del operation only remove this edges and
51
    /// remain the vertex.
52
    Status delete_version_from_graph(const Version& version);
53
    /// Given a spec_version, this method can find a version path which is the shortest path
54
    /// in the graph. The version paths are added to version_path as return info.
55
    Status capture_consistent_versions(const Version& spec_version,
56
                                       std::vector<Version>* version_path) const;
57
58
    // See comment of TimestampedVersionTracker's get_orphan_vertex_ratio();
59
    double get_orphan_vertex_ratio();
60
61
private:
62
    /// Private method add a version to graph.
63
    void _add_vertex_to_graph(int64_t vertex_value);
64
65
    // OLAP version contains two parts, [start_version, end_version]. In order
66
    // to construct graph, the OLAP version has two corresponding vertex, one
67
    // vertex's value is version.start_version, the other is
68
    // version.end_version + 1.
69
    // Use adjacency list to describe version graph.
70
    // In order to speed up the version capture, vertex's edges are sorted by version in descending order.
71
    std::vector<Vertex> _version_graph;
72
73
    // vertex value --> vertex_index of _version_graph
74
    // It is easy to find vertex index according to vertex value.
75
    std::unordered_map<int64_t, int64_t> _vertex_index_map;
76
};
77
78
/// TimestampedVersion class which is implemented to maintain multi-version path of rowsets.
79
/// This compaction info of a rowset includes start version, end version and the create time.
80
class TimestampedVersion {
81
public:
82
    /// TimestampedVersion construction function. Use rowset version and create time to build a TimestampedVersion.
83
    TimestampedVersion(const Version& version, int64_t create_time)
84
94
            : _version(version.first, version.second), _create_time(create_time) {}
85
86
94
    ~TimestampedVersion() {}
87
88
    /// Return the rowset version of TimestampedVersion record.
89
348
    Version version() const { return _version; }
90
    /// Return the rowset create_time of TimestampedVersion record.
91
134
    int64_t get_create_time() { return _create_time; }
92
93
    /// Compare two version trackers.
94
0
    bool operator!=(const TimestampedVersion& rhs) const { return _version != rhs._version; }
95
96
    /// Compare two version trackers.
97
0
    bool operator==(const TimestampedVersion& rhs) const { return _version == rhs._version; }
98
99
    /// Judge if a tracker contains the other.
100
0
    bool contains(const TimestampedVersion& other) const {
101
0
        return _version.contains(other._version);
102
0
    }
103
104
private:
105
    Version _version;
106
    int64_t _create_time;
107
};
108
109
using TimestampedVersionSharedPtr = std::shared_ptr<TimestampedVersion>;
110
111
/// TimestampedVersionPathContainer class is used to maintain a path timestamped version path
112
/// and record the max create time in a path version. Once a timestamped version is added, the max_create_time
113
/// will compare with the version timestamp and be refreshed.
114
class TimestampedVersionPathContainer {
115
public:
116
    /// TimestampedVersionPathContainer construction function, max_create_time is assigned to 0.
117
41
    TimestampedVersionPathContainer() : _max_create_time(0) {}
118
119
    /// Return the max create time in a path version.
120
16
    int64_t max_create_time() { return _max_create_time; }
121
122
    /// Add a timestamped version to timestamped_versions_container. Once a timestamped version is added,
123
    /// the max_create_time will compare with the version timestamp and be refreshed.
124
    void add_timestamped_version(TimestampedVersionSharedPtr version);
125
126
    /// Return the timestamped_versions_container as const type.
127
    std::vector<TimestampedVersionSharedPtr>& timestamped_versions();
128
129
private:
130
    std::vector<TimestampedVersionSharedPtr> _timestamped_versions_container;
131
    int64_t _max_create_time;
132
};
133
134
using PathVersionListSharedPtr = std::shared_ptr<TimestampedVersionPathContainer>;
135
136
/// TimestampedVersionTracker class is responsible to track all rowsets version links of a tablet.
137
/// This class not only records the graph of all versions, but also records the paths which will be removed
138
/// after the path is expired.
139
class TimestampedVersionTracker {
140
public:
141
    /// Construct rowsets version tracker by main path rowset meta.
142
    void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas);
143
144
    /// Construct rowsets version tracker by main path rowset meta and stale rowset meta.
145
    void construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas,
146
                                     const std::vector<RowsetMetaSharedPtr>& stale_metas);
147
148
    /// Recover rowsets version tracker from stale version path map. When delete operation fails, the
149
    /// tracker can be recovered from deleted stale_version_path_map.
150
    void recover_versioned_tracker(
151
            const std::map<int64_t, PathVersionListSharedPtr>& stale_version_path_map);
152
153
    /// Add a version to tracker, this version is a new version rowset, not merged rowset.
154
    void add_version(const Version& version);
155
156
5
    void delete_version(const Version& version) {
157
5
        static_cast<void>(_version_graph.delete_version_from_graph(version));
158
5
    }
159
160
    /// Add a version path with stale_rs_metas, this versions in version path
161
    /// are merged rowsets.  These rowsets are tracked and removed after they are expired.
162
    /// TabletManager sweep these rowsets using tracker by timing.
163
    void add_stale_path_version(const std::vector<RowsetMetaSharedPtr>& stale_rs_metas);
164
165
    /// Given a spec_version, this method can find a version path which is the shortest path
166
    /// in the graph. The version paths are added to version_path as return info.
167
    /// If this version not in main version, version_path can be included expired rowset.
168
    Status capture_consistent_versions(const Version& spec_version,
169
                                       std::vector<Version>* version_path) const;
170
171
    /// Capture all expired path version.
172
    /// When the last rowset create time of a path greater than expired time  which can be expressed
173
    /// "now() - tablet_rowset_stale_sweep_time_sec" , this path will be remained.
174
    /// Otherwise, this path will be added to path_version.
175
    void capture_expired_paths(int64_t stale_sweep_endtime,
176
                               std::vector<int64_t>* path_version) const;
177
178
    /// Fetch all versions with a path_version.
179
    PathVersionListSharedPtr fetch_path_version_by_id(int64_t path_id);
180
181
    /// Fetch all versions with a path_version, at the same time remove this path from the tracker.
182
    /// Next time, fetch this path, it will return empty.
183
    PathVersionListSharedPtr fetch_and_delete_path_by_id(int64_t path_id);
184
185
    /// Print all expired version path in a tablet.
186
    std::string get_current_path_map_str();
187
188
    /// Get json document of _stale_version_path_map. Fill the path_id and version_path
189
    /// list in the document. The parameter path arr is used as return variable.
190
    void get_stale_version_path_json_doc(rapidjson::Document& path_arr);
191
192
    // Return proportion of orphan vertex in VersionGraph's _version_graph.
193
    // If a vertex is no longer the starting point of any edge, then this vertex is defined as orphan vertex
194
    double get_orphan_vertex_ratio();
195
196
private:
197
    /// Construct rowsets version tracker with main path rowset meta.
198
    void _construct_versioned_tracker(const std::vector<RowsetMetaSharedPtr>& rs_metas);
199
200
    /// init stale_version_path_map by main path rowset meta and stale rowset meta.
201
    void _init_stale_version_path_map(const std::vector<RowsetMetaSharedPtr>& rs_metas,
202
                                      const std::vector<RowsetMetaSharedPtr>& stale_metas);
203
204
    /// find a path in stale_map from first_version to second_version, stale_path is used as result.
205
    bool _find_path_from_stale_map(
206
            const std::unordered_map<int64_t, std::unordered_map<int64_t, RowsetMetaSharedPtr>>&
207
                    stale_map,
208
            int64_t first_version, int64_t second_version,
209
            std::vector<RowsetMetaSharedPtr>* stale_path);
210
211
private:
212
    // This variable records the id of path version which will be dispatched to next path version,
213
    // it is not persisted.
214
    int64_t _next_path_id = 1;
215
216
    // path_version -> list of path version,
217
    // This variable is used to maintain the map from path version and it's all version.
218
    std::map<int64_t, PathVersionListSharedPtr> _stale_version_path_map;
219
220
    VersionGraph _version_graph;
221
};
222
223
} // namespace doris