/root/doris/be/src/olap/id_manager.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <gen_cpp/BackendService_types.h> |
22 | | #include <gen_cpp/Types_types.h> |
23 | | #include <stddef.h> |
24 | | #include <stdint.h> |
25 | | |
26 | | #include <functional> |
27 | | #include <map> |
28 | | #include <memory> |
29 | | #include <mutex> |
30 | | #include <set> |
31 | | #include <shared_mutex> |
32 | | #include <string> |
33 | | #include <string_view> |
34 | | #include <unordered_map> |
35 | | #include <unordered_set> |
36 | | #include <utility> |
37 | | #include <vector> |
38 | | |
39 | | #include "common/status.h" |
40 | | #include "olap/olap_common.h" |
41 | | #include "olap/tablet.h" |
42 | | #include "olap/tablet_meta.h" |
43 | | |
44 | | namespace doris { |
45 | | |
46 | | enum class FileMappingType { |
47 | | INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id} |
48 | | EXTERNAL, // for external table. |
49 | | }; |
50 | | |
51 | | struct InternalFileMappingInfo { |
52 | | int64_t tablet_id; |
53 | | RowsetId rowset_id; |
54 | | uint32_t segment_id; |
55 | | |
56 | 2.92k | std::string to_string() const { |
57 | 2.92k | std::string value; |
58 | 2.92k | value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id)); |
59 | 2.92k | auto* ptr = value.data(); |
60 | | |
61 | 2.92k | memcpy(ptr, &tablet_id, sizeof(tablet_id)); |
62 | 2.92k | ptr += sizeof(tablet_id); |
63 | 2.92k | memcpy(ptr, &rowset_id, sizeof(rowset_id)); |
64 | 2.92k | ptr += sizeof(rowset_id); |
65 | 2.92k | memcpy(ptr, &segment_id, sizeof(segment_id)); |
66 | 2.92k | return value; |
67 | 2.92k | } |
68 | | }; |
69 | | |
70 | | struct ExternalFileMappingInfo { |
71 | | /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found |
72 | | * from QueryContext according to the plan_node_id. Because there are some important information in |
73 | | * TFileScanRangeParams (needed when creating hdfs/s3 reader): |
74 | | * 8: optional THdfsParams hdfs_params; |
75 | | * 9: optional map<string, string> properties; |
76 | | */ |
77 | | int plan_node_id; |
78 | | |
79 | | /* |
80 | | * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage: |
81 | | * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column |
82 | | * 2. path, file_type, modification_time,compress_type .... used to read the file |
83 | | * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon |
84 | | */ |
85 | | TFileRangeDesc scan_range_desc; |
86 | | bool enable_file_meta_cache; |
87 | | |
88 | | ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range, |
89 | | bool file_meta_cache) |
90 | | : plan_node_id(plan_node_id), |
91 | | scan_range_desc(scan_range), |
92 | 16 | enable_file_meta_cache(file_meta_cache) {} |
93 | | |
94 | 2 | std::string to_string() const { |
95 | 2 | std::string value; |
96 | 2 | value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) + |
97 | 2 | sizeof(scan_range_desc.start_offset)); |
98 | 2 | auto* ptr = value.data(); |
99 | | |
100 | 2 | memcpy(ptr, &plan_node_id, sizeof(plan_node_id)); |
101 | 2 | ptr += sizeof(plan_node_id); |
102 | 2 | memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset)); |
103 | 2 | ptr += sizeof(scan_range_desc.start_offset); |
104 | 2 | memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size()); |
105 | 2 | return value; |
106 | 2 | } |
107 | | }; |
108 | | |
109 | | struct FileMapping { |
110 | | ENABLE_FACTORY_CREATOR(FileMapping); |
111 | | |
112 | | FileMappingType type; |
113 | | std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value; |
114 | | |
115 | | FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id) |
116 | | : type(FileMappingType::INTERNAL), |
117 | 979 | value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) { |
118 | 979 | } |
119 | | |
120 | | FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache) |
121 | | : type(FileMappingType::EXTERNAL), |
122 | | value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range, |
123 | 1 | enable_file_meta_cache) {} |
124 | | |
125 | 0 | std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const { |
126 | 0 | DCHECK(type == FileMappingType::INTERNAL); |
127 | 0 | auto info = std::get<InternalFileMappingInfo>(value); |
128 | 0 | return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id); |
129 | 0 | } |
130 | | |
131 | 0 | ExternalFileMappingInfo& get_external_file_info() { |
132 | 0 | DCHECK(type == FileMappingType::EXTERNAL); |
133 | 0 | return std::get<ExternalFileMappingInfo>(value); |
134 | 0 | } |
135 | | |
136 | | static std::string file_mapping_info_to_string( |
137 | 2.93k | const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) { |
138 | 2.93k | return std::visit( |
139 | 2.93k | [](const auto& info) -> std::string { |
140 | 2.91k | using T = std::decay_t<decltype(info)>; |
141 | | |
142 | 2.91k | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { |
143 | 2 | return info.to_string(); |
144 | | |
145 | 2 | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { |
146 | 2 | return info.to_string(); |
147 | 2 | } |
148 | 2.91k | }, _ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_ Line | Count | Source | 139 | 2.90k | [](const auto& info) -> std::string { | 140 | 2.90k | using T = std::decay_t<decltype(info)>; | 141 | | | 142 | 2.91k | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { | 143 | 2.91k | return info.to_string(); | 144 | | | 145 | 2.91k | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { | 146 | 2.90k | return info.to_string(); | 147 | 2.90k | } | 148 | 2.90k | }, |
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_ Line | Count | Source | 139 | 2 | [](const auto& info) -> std::string { | 140 | 2 | using T = std::decay_t<decltype(info)>; | 141 | | | 142 | 2 | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { | 143 | 2 | return info.to_string(); | 144 | | | 145 | 2 | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { | 146 | 2 | return info.to_string(); | 147 | 2 | } | 148 | 2 | }, |
|
149 | 2.93k | info); |
150 | 2.93k | } |
151 | | |
152 | 2.89k | std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); } |
153 | | }; |
154 | | |
155 | | class IdFileMap { |
156 | | public: |
157 | 205 | IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {} |
158 | | |
159 | 1.00k | std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) { |
160 | 1.00k | std::shared_lock lock(_mtx); |
161 | 1.00k | auto it = _id_map.find(id); |
162 | 1.00k | if (it == _id_map.end()) { |
163 | 1 | return nullptr; |
164 | 1 | } |
165 | 1.00k | return it->second; |
166 | 1.00k | } |
167 | | |
168 | 987 | uint32 get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) { |
169 | 987 | DCHECK(mapping.get() != nullptr); |
170 | 987 | auto value = mapping->file_mapping_info_to_string(); |
171 | | |
172 | 987 | std::unique_lock lock(_mtx); |
173 | 987 | auto it = _mapping_to_id.find(value); |
174 | 987 | if (it != _mapping_to_id.end()) { |
175 | 0 | return it->second; |
176 | 0 | } |
177 | 987 | _id_map[_init_id++] = mapping; |
178 | 987 | _mapping_to_id[value] = _init_id - 1; |
179 | | |
180 | 987 | return _init_id - 1; |
181 | 987 | } |
182 | | |
183 | 0 | void add_temp_rowset(const RowsetSharedPtr& rowset) { |
184 | 0 | std::unique_lock lock(_mtx); |
185 | 0 | _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset; |
186 | 0 | } |
187 | | |
188 | 0 | RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) { |
189 | 0 | std::shared_lock lock(_mtx); |
190 | 0 | auto it = _temp_rowset_maps.find({tablet_id, rowset_id}); |
191 | 0 | if (it == _temp_rowset_maps.end()) { |
192 | 0 | return nullptr; |
193 | 0 | } |
194 | 0 | return it->second; |
195 | 0 | } |
196 | | |
197 | 0 | int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; } |
198 | | |
199 | | private: |
200 | | std::shared_mutex _mtx; |
201 | | uint32_t _init_id = 0; |
202 | | std::unordered_map<std::string, uint32_t> _mapping_to_id; |
203 | | std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map; |
204 | | |
205 | | // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction |
206 | | std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps; |
207 | | uint64_t delayed_expired_timestamp = 0; |
208 | | }; |
209 | | |
210 | | class IdManager { |
211 | | public: |
212 | | static constexpr uint8_t ID_VERSION = 0; |
213 | | |
214 | 2 | IdManager() = default; |
215 | | |
216 | 2 | ~IdManager() { |
217 | 2 | std::unique_lock lock(_query_to_id_file_map_mtx); |
218 | 2 | _query_to_id_file_map.clear(); |
219 | 2 | } |
220 | | |
221 | 204 | std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) { |
222 | 204 | std::unique_lock lock(_query_to_id_file_map_mtx); |
223 | 204 | auto it = _query_to_id_file_map.find(query_id); |
224 | 204 | if (it == _query_to_id_file_map.end()) { |
225 | 203 | auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout); |
226 | 203 | _query_to_id_file_map[query_id] = id_file_map; |
227 | 203 | return id_file_map; |
228 | 203 | } |
229 | 1 | return it->second; |
230 | 204 | } |
231 | | |
232 | 0 | void gc_expired_id_file_map(int64_t now) { |
233 | 0 | std::unique_lock lock(_query_to_id_file_map_mtx); |
234 | 0 | for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) { |
235 | 0 | if (it->second->get_delayed_expired_timestamp() <= now) { |
236 | 0 | it = _query_to_id_file_map.erase(it); |
237 | 0 | } else { |
238 | 0 | ++it; |
239 | 0 | } |
240 | 0 | } |
241 | 0 | } |
242 | | |
243 | 101 | void remove_id_file_map(const UniqueId& query_id) { |
244 | 101 | std::unique_lock lock(_query_to_id_file_map_mtx); |
245 | 101 | _query_to_id_file_map.erase(query_id); |
246 | 101 | } |
247 | | |
248 | 0 | std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) { |
249 | 0 | std::shared_lock lock(_query_to_id_file_map_mtx); |
250 | 0 | auto it = _query_to_id_file_map.find(query_id); |
251 | 0 | if (it == _query_to_id_file_map.end()) { |
252 | 0 | return nullptr; |
253 | 0 | } |
254 | 0 | return it->second; |
255 | 0 | } |
256 | | |
257 | | private: |
258 | | DISALLOW_COPY_AND_ASSIGN(IdManager); |
259 | | |
260 | | phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map; |
261 | | std::shared_mutex _query_to_id_file_map_mtx; |
262 | | }; |
263 | | |
264 | | } // namespace doris |