/root/doris/be/src/olap/id_manager.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <gen_cpp/BackendService_types.h> |
22 | | #include <gen_cpp/Types_types.h> |
23 | | #include <stddef.h> |
24 | | #include <stdint.h> |
25 | | |
26 | | #include <functional> |
27 | | #include <map> |
28 | | #include <memory> |
29 | | #include <mutex> |
30 | | #include <set> |
31 | | #include <shared_mutex> |
32 | | #include <string> |
33 | | #include <string_view> |
34 | | #include <unordered_map> |
35 | | #include <unordered_set> |
36 | | #include <utility> |
37 | | #include <vector> |
38 | | |
39 | | #include "common/status.h" |
40 | | #include "olap/olap_common.h" |
41 | | #include "olap/tablet.h" |
42 | | #include "olap/tablet_meta.h" |
43 | | #include "runtime/query_context.h" |
44 | | |
45 | | namespace doris { |
46 | | |
47 | | enum class FileMappingType { |
48 | | INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id} |
49 | | EXTERNAL, // for external table. |
50 | | }; |
51 | | |
52 | | struct InternalFileMappingInfo { |
53 | | int64_t tablet_id; |
54 | | RowsetId rowset_id; |
55 | | uint32_t segment_id; |
56 | | |
57 | 2.92k | std::string to_string() const { |
58 | 2.92k | std::string value; |
59 | 2.92k | value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id)); |
60 | 2.92k | auto* ptr = value.data(); |
61 | | |
62 | 2.92k | memcpy(ptr, &tablet_id, sizeof(tablet_id)); |
63 | 2.92k | ptr += sizeof(tablet_id); |
64 | 2.92k | memcpy(ptr, &rowset_id, sizeof(rowset_id)); |
65 | 2.92k | ptr += sizeof(rowset_id); |
66 | 2.92k | memcpy(ptr, &segment_id, sizeof(segment_id)); |
67 | 2.92k | return value; |
68 | 2.92k | } |
69 | | }; |
70 | | |
71 | | struct ExternalFileMappingInfo { |
72 | | /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found |
73 | | * from QueryContext according to the plan_node_id. Because there are some important information in |
74 | | * TFileScanRangeParams (needed when creating hdfs/s3 reader): |
75 | | * 8: optional THdfsParams hdfs_params; |
76 | | * 9: optional map<string, string> properties; |
77 | | */ |
78 | | int plan_node_id; |
79 | | |
80 | | /* |
81 | | * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage: |
82 | | * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column |
83 | | * 2. path, file_type, modification_time,compress_type .... used to read the file |
84 | | * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon |
85 | | */ |
86 | | TFileRangeDesc scan_range_desc; |
87 | | bool enable_file_meta_cache; |
88 | | |
89 | | ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range, |
90 | | bool file_meta_cache) |
91 | | : plan_node_id(plan_node_id), |
92 | | scan_range_desc(scan_range), |
93 | 16 | enable_file_meta_cache(file_meta_cache) {} |
94 | | |
95 | 2 | std::string to_string() const { |
96 | 2 | std::string value; |
97 | 2 | value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) + |
98 | 2 | sizeof(scan_range_desc.start_offset)); |
99 | 2 | auto* ptr = value.data(); |
100 | | |
101 | 2 | memcpy(ptr, &plan_node_id, sizeof(plan_node_id)); |
102 | 2 | ptr += sizeof(plan_node_id); |
103 | 2 | memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset)); |
104 | 2 | ptr += sizeof(scan_range_desc.start_offset); |
105 | 2 | memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size()); |
106 | 2 | return value; |
107 | 2 | } |
108 | | }; |
109 | | |
110 | | struct FileMapping { |
111 | | ENABLE_FACTORY_CREATOR(FileMapping); |
112 | | |
113 | | FileMappingType type; |
114 | | std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value; |
115 | | |
116 | | FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id) |
117 | | : type(FileMappingType::INTERNAL), |
118 | 985 | value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) { |
119 | 985 | } |
120 | | |
121 | | FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache) |
122 | | : type(FileMappingType::EXTERNAL), |
123 | | value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range, |
124 | 1 | enable_file_meta_cache) {} |
125 | | |
126 | 0 | std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const { |
127 | 0 | DCHECK(type == FileMappingType::INTERNAL); |
128 | 0 | auto info = std::get<InternalFileMappingInfo>(value); |
129 | 0 | return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id); |
130 | 0 | } |
131 | | |
132 | 0 | ExternalFileMappingInfo& get_external_file_info() { |
133 | 0 | DCHECK(type == FileMappingType::EXTERNAL); |
134 | 0 | return std::get<ExternalFileMappingInfo>(value); |
135 | 0 | } |
136 | | |
137 | | static std::string file_mapping_info_to_string( |
138 | 2.94k | const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) { |
139 | 2.94k | return std::visit( |
140 | 2.94k | [](const auto& info) -> std::string { |
141 | 2.91k | using T = std::decay_t<decltype(info)>; |
142 | | |
143 | 2.91k | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { |
144 | 2 | return info.to_string(); |
145 | | |
146 | 2 | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { |
147 | 2 | return info.to_string(); |
148 | 2 | } |
149 | 2.91k | }, _ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_ Line | Count | Source | 140 | 2.91k | [](const auto& info) -> std::string { | 141 | 2.91k | using T = std::decay_t<decltype(info)>; | 142 | | | 143 | 2.91k | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { | 144 | 2.91k | return info.to_string(); | 145 | | | 146 | 2.91k | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { | 147 | 2.91k | return info.to_string(); | 148 | 2.91k | } | 149 | 2.91k | }, |
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_ Line | Count | Source | 140 | 2 | [](const auto& info) -> std::string { | 141 | 2 | using T = std::decay_t<decltype(info)>; | 142 | | | 143 | 2 | if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { | 144 | 2 | return info.to_string(); | 145 | | | 146 | 2 | } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { | 147 | 2 | return info.to_string(); | 148 | 2 | } | 149 | 2 | }, |
|
150 | 2.94k | info); |
151 | 2.94k | } |
152 | | |
153 | 2.92k | std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); } |
154 | | }; |
155 | | |
156 | | class IdFileMap { |
157 | | public: |
158 | 205 | IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {} |
159 | | |
160 | 1.00k | std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) { |
161 | 1.00k | std::shared_lock lock(_mtx); |
162 | 1.00k | auto it = _id_map.find(id); |
163 | 1.00k | if (it == _id_map.end()) { |
164 | 1 | return nullptr; |
165 | 1 | } |
166 | 1.00k | return it->second; |
167 | 1.00k | } |
168 | | |
169 | 991 | uint32 get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) { |
170 | 991 | DCHECK(mapping.get() != nullptr); |
171 | 991 | auto value = mapping->file_mapping_info_to_string(); |
172 | | |
173 | 991 | std::unique_lock lock(_mtx); |
174 | 991 | auto it = _mapping_to_id.find(value); |
175 | 991 | if (it != _mapping_to_id.end()) { |
176 | 0 | return it->second; |
177 | 0 | } |
178 | 991 | _id_map[_init_id++] = mapping; |
179 | 991 | _mapping_to_id[value] = _init_id - 1; |
180 | | |
181 | 991 | return _init_id - 1; |
182 | 991 | } |
183 | | |
184 | 0 | void add_temp_rowset(const RowsetSharedPtr& rowset) { |
185 | 0 | std::unique_lock lock(_mtx); |
186 | 0 | _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset; |
187 | 0 | } |
188 | | |
189 | 0 | RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) { |
190 | 0 | std::shared_lock lock(_mtx); |
191 | 0 | auto it = _temp_rowset_maps.find({tablet_id, rowset_id}); |
192 | 0 | if (it == _temp_rowset_maps.end()) { |
193 | 0 | return nullptr; |
194 | 0 | } |
195 | 0 | return it->second; |
196 | 0 | } |
197 | | |
198 | 0 | int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; } |
199 | | |
200 | 0 | void set_external_scan_params(QueryContext* query_ctx) { |
201 | 0 | std::call_once(once_flag_for_external, [&] { |
202 | 0 | DCHECK(query_ctx != nullptr); |
203 | 0 | _query_global = query_ctx->get_query_globals(); |
204 | 0 | _query_options = query_ctx->get_query_options(); |
205 | 0 | _file_scan_range_params_map = query_ctx->file_scan_range_params_map; |
206 | 0 | }); |
207 | 0 | } |
208 | | |
209 | 0 | const TQueryGlobals& get_query_globals() const { return _query_global; } |
210 | | |
211 | 0 | const TQueryOptions& get_query_options() const { return _query_options; } |
212 | | |
213 | 0 | const std::map<int, TFileScanRangeParams>& get_external_scan_params() const { |
214 | 0 | return _file_scan_range_params_map; |
215 | 0 | } |
216 | | |
217 | | private: |
218 | | std::shared_mutex _mtx; |
219 | | uint32_t _init_id = 0; |
220 | | std::unordered_map<std::string, uint32_t> _mapping_to_id; |
221 | | std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map; |
222 | | |
223 | | // use in scan external table |
224 | | TQueryGlobals _query_global; |
225 | | TQueryOptions _query_options; |
226 | | std::map<int, TFileScanRangeParams> _file_scan_range_params_map; |
227 | | std::once_flag once_flag_for_external; |
228 | | |
229 | | // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction |
230 | | std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps; |
231 | | uint64_t delayed_expired_timestamp = 0; |
232 | | }; |
233 | | |
234 | | class IdManager { |
235 | | public: |
236 | | static constexpr uint8_t ID_VERSION = 0; |
237 | | |
238 | 2 | IdManager() = default; |
239 | | |
240 | 2 | ~IdManager() { |
241 | 2 | std::unique_lock lock(_query_to_id_file_map_mtx); |
242 | 2 | _query_to_id_file_map.clear(); |
243 | 2 | } |
244 | | |
245 | 204 | std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) { |
246 | 204 | std::unique_lock lock(_query_to_id_file_map_mtx); |
247 | 204 | auto it = _query_to_id_file_map.find(query_id); |
248 | 204 | if (it == _query_to_id_file_map.end()) { |
249 | 203 | auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout); |
250 | 203 | _query_to_id_file_map[query_id] = id_file_map; |
251 | 203 | return id_file_map; |
252 | 203 | } |
253 | 1 | return it->second; |
254 | 204 | } |
255 | | |
256 | 0 | void gc_expired_id_file_map(int64_t now) { |
257 | 0 | std::unique_lock lock(_query_to_id_file_map_mtx); |
258 | 0 | for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) { |
259 | 0 | if (it->second->get_delayed_expired_timestamp() <= now) { |
260 | 0 | it = _query_to_id_file_map.erase(it); |
261 | 0 | } else { |
262 | 0 | ++it; |
263 | 0 | } |
264 | 0 | } |
265 | 0 | } |
266 | | |
267 | 101 | void remove_id_file_map(const UniqueId& query_id) { |
268 | 101 | std::unique_lock lock(_query_to_id_file_map_mtx); |
269 | 101 | _query_to_id_file_map.erase(query_id); |
270 | 101 | } |
271 | | |
272 | 0 | std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) { |
273 | 0 | std::shared_lock lock(_query_to_id_file_map_mtx); |
274 | 0 | auto it = _query_to_id_file_map.find(query_id); |
275 | 0 | if (it == _query_to_id_file_map.end()) { |
276 | 0 | return nullptr; |
277 | 0 | } |
278 | 0 | return it->second; |
279 | 0 | } |
280 | | |
281 | | private: |
282 | | DISALLOW_COPY_AND_ASSIGN(IdManager); |
283 | | |
284 | | phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map; |
285 | | std::shared_mutex _query_to_id_file_map_mtx; |
286 | | }; |
287 | | |
288 | | } // namespace doris |