/root/doris/be/src/olap/id_manager.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <butil/macros.h> | 
| 21 |  | #include <gen_cpp/BackendService_types.h> | 
| 22 |  | #include <gen_cpp/Types_types.h> | 
| 23 |  | #include <stddef.h> | 
| 24 |  | #include <stdint.h> | 
| 25 |  |  | 
| 26 |  | #include <functional> | 
| 27 |  | #include <map> | 
| 28 |  | #include <memory> | 
| 29 |  | #include <mutex> | 
| 30 |  | #include <set> | 
| 31 |  | #include <shared_mutex> | 
| 32 |  | #include <string> | 
| 33 |  | #include <string_view> | 
| 34 |  | #include <unordered_map> | 
| 35 |  | #include <unordered_set> | 
| 36 |  | #include <utility> | 
| 37 |  | #include <vector> | 
| 38 |  |  | 
| 39 |  | #include "common/status.h" | 
| 40 |  | #include "olap/olap_common.h" | 
| 41 |  | #include "olap/tablet.h" | 
| 42 |  | #include "olap/tablet_meta.h" | 
| 43 |  | #include "runtime/query_context.h" | 
| 44 |  |  | 
| 45 |  | namespace doris { | 
| 46 |  |  | 
| 47 |  | enum class FileMappingType { | 
| 48 |  |     INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id} | 
| 49 |  |     EXTERNAL, // for external table. | 
| 50 |  | }; | 
| 51 |  |  | 
| 52 |  | struct InternalFileMappingInfo { | 
| 53 |  |     int64_t tablet_id; | 
| 54 |  |     RowsetId rowset_id; | 
| 55 |  |     uint32_t segment_id; | 
| 56 |  |  | 
| 57 | 2.92k |     std::string to_string() const { | 
| 58 | 2.92k |         std::string value; | 
| 59 | 2.92k |         value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id)); | 
| 60 | 2.92k |         auto* ptr = value.data(); | 
| 61 |  |  | 
| 62 | 2.92k |         memcpy(ptr, &tablet_id, sizeof(tablet_id)); | 
| 63 | 2.92k |         ptr += sizeof(tablet_id); | 
| 64 | 2.92k |         memcpy(ptr, &rowset_id, sizeof(rowset_id)); | 
| 65 | 2.92k |         ptr += sizeof(rowset_id); | 
| 66 | 2.92k |         memcpy(ptr, &segment_id, sizeof(segment_id)); | 
| 67 | 2.92k |         return value; | 
| 68 | 2.92k |     } | 
| 69 |  | }; | 
| 70 |  |  | 
| 71 |  | struct ExternalFileMappingInfo { | 
| 72 |  |     /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found | 
| 73 |  |     * from QueryContext according to the plan_node_id. Because there are some important information in | 
| 74 |  |     * TFileScanRangeParams (needed when creating hdfs/s3 reader): | 
| 75 |  |     *      8: optional THdfsParams hdfs_params; | 
| 76 |  |     *      9: optional map<string, string> properties; | 
| 77 |  |     */ | 
| 78 |  |     int plan_node_id; | 
| 79 |  |  | 
| 80 |  |     /* | 
| 81 |  |      * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage: | 
| 82 |  |      * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column | 
| 83 |  |      * 2. path, file_type, modification_time,compress_type .... used to read the file | 
| 84 |  |      * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon | 
| 85 |  |      */ | 
| 86 |  |     TFileRangeDesc scan_range_desc; | 
| 87 |  |     bool enable_file_meta_cache; | 
| 88 |  |  | 
| 89 |  |     ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range, | 
| 90 |  |                             bool file_meta_cache) | 
| 91 | 16 |             : plan_node_id(plan_node_id), | 
| 92 | 16 |               scan_range_desc(scan_range), | 
| 93 | 16 |               enable_file_meta_cache(file_meta_cache) {} | 
| 94 |  |  | 
| 95 | 2 |     std::string to_string() const { | 
| 96 | 2 |         std::string value; | 
| 97 | 2 |         value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) + | 
| 98 | 2 |                      sizeof(scan_range_desc.start_offset)); | 
| 99 | 2 |         auto* ptr = value.data(); | 
| 100 |  |  | 
| 101 | 2 |         memcpy(ptr, &plan_node_id, sizeof(plan_node_id)); | 
| 102 | 2 |         ptr += sizeof(plan_node_id); | 
| 103 | 2 |         memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset)); | 
| 104 | 2 |         ptr += sizeof(scan_range_desc.start_offset); | 
| 105 | 2 |         memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size()); | 
| 106 | 2 |         return value; | 
| 107 | 2 |     } | 
| 108 |  | }; | 
| 109 |  |  | 
| 110 |  | struct FileMapping { | 
| 111 |  |     ENABLE_FACTORY_CREATOR(FileMapping); | 
| 112 |  |  | 
| 113 |  |     FileMappingType type; | 
| 114 |  |     std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value; | 
| 115 |  |  | 
| 116 |  |     FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id) | 
| 117 | 986 |             : type(FileMappingType::INTERNAL), | 
| 118 | 986 |               value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) { | 
| 119 | 986 |     } | 
| 120 |  |  | 
| 121 |  |     FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache) | 
| 122 | 1 |             : type(FileMappingType::EXTERNAL), | 
| 123 | 1 |               value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range, | 
| 124 | 1 |                     enable_file_meta_cache) {} | 
| 125 |  |  | 
| 126 | 0 |     std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const { | 
| 127 | 0 |         DCHECK(type == FileMappingType::INTERNAL); | 
| 128 | 0 |         auto info = std::get<InternalFileMappingInfo>(value); | 
| 129 | 0 |         return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id); | 
| 130 | 0 |     } | 
| 131 |  |  | 
| 132 | 0 |     ExternalFileMappingInfo& get_external_file_info() { | 
| 133 | 0 |         DCHECK(type == FileMappingType::EXTERNAL); | 
| 134 | 0 |         return std::get<ExternalFileMappingInfo>(value); | 
| 135 | 0 |     } | 
| 136 |  |  | 
| 137 |  |     static std::string file_mapping_info_to_string( | 
| 138 | 2.93k |             const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) { | 
| 139 | 2.93k |         return std::visit( | 
| 140 | 2.93k |                 [](const auto& info) -> std::string { | 
| 141 | 2.92k |                     using T = std::decay_t<decltype(info)>; | 
| 142 |  |  | 
| 143 | 2.92k |                     if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { | 
| 144 | 2.92k |                         return info.to_string(); | 
| 145 |  |  | 
| 146 | 2.92k |                     } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { | 
| 147 | 2 |                         return info.to_string(); | 
| 148 | 2 |                     } | 
| 149 | 2.92k |                 }, _ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_| Line | Count | Source |  | 140 | 2.92k |                 [](const auto& info) -> std::string { |  | 141 | 2.92k |                     using T = std::decay_t<decltype(info)>; |  | 142 |  |  |  | 143 | 2.92k |                     if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { |  | 144 | 2.92k |                         return info.to_string(); |  | 145 |  |  |  | 146 |  |                     } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { |  | 147 |  |                         return info.to_string(); |  | 148 |  |                     } |  | 149 | 2.92k |                 }, | 
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_| Line | Count | Source |  | 140 | 2 |                 [](const auto& info) -> std::string { |  | 141 | 2 |                     using T = std::decay_t<decltype(info)>; |  | 142 |  |  |  | 143 |  |                     if constexpr (std::is_same_v<T, InternalFileMappingInfo>) { |  | 144 |  |                         return info.to_string(); |  | 145 |  |  |  | 146 | 2 |                     } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) { |  | 147 | 2 |                         return info.to_string(); |  | 148 | 2 |                     } |  | 149 | 2 |                 }, | 
 | 
| 150 | 2.93k |                 info); | 
| 151 | 2.93k |     } | 
| 152 |  |  | 
| 153 | 2.91k |     std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); } | 
| 154 |  | }; | 
| 155 |  |  | 
| 156 |  | class IdFileMap { | 
| 157 |  | public: | 
| 158 | 205 |     IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {} | 
| 159 |  |  | 
| 160 | 1.00k |     std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) { | 
| 161 | 1.00k |         std::shared_lock lock(_mtx); | 
| 162 | 1.00k |         auto it = _id_map.find(id); | 
| 163 | 1.00k |         if (it == _id_map.end()) { | 
| 164 | 1 |             return nullptr; | 
| 165 | 1 |         } | 
| 166 | 1.00k |         return it->second; | 
| 167 | 1.00k |     } | 
| 168 |  |  | 
| 169 | 993 |     uint32_t get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) { | 
| 170 | 993 |         DCHECK(mapping.get() != nullptr); | 
| 171 | 993 |         auto value = mapping->file_mapping_info_to_string(); | 
| 172 |  |  | 
| 173 | 993 |         std::unique_lock lock(_mtx); | 
| 174 | 993 |         auto it = _mapping_to_id.find(value); | 
| 175 | 993 |         if (it != _mapping_to_id.end()) { | 
| 176 | 0 |             return it->second; | 
| 177 | 0 |         } | 
| 178 | 993 |         _id_map[_init_id++] = mapping; | 
| 179 | 993 |         _mapping_to_id[value] = _init_id - 1; | 
| 180 |  |  | 
| 181 | 993 |         return _init_id - 1; | 
| 182 | 993 |     } | 
| 183 |  |  | 
| 184 | 0 |     void add_temp_rowset(const RowsetSharedPtr& rowset) { | 
| 185 | 0 |         std::unique_lock lock(_mtx); | 
| 186 | 0 |         _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset; | 
| 187 | 0 |     } | 
| 188 |  |  | 
| 189 | 0 |     RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) { | 
| 190 | 0 |         std::shared_lock lock(_mtx); | 
| 191 | 0 |         auto it = _temp_rowset_maps.find({tablet_id, rowset_id}); | 
| 192 | 0 |         if (it == _temp_rowset_maps.end()) { | 
| 193 | 0 |             return nullptr; | 
| 194 | 0 |         } | 
| 195 | 0 |         return it->second; | 
| 196 | 0 |     } | 
| 197 |  |  | 
| 198 | 0 |     int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; } | 
| 199 |  |  | 
| 200 | 0 |     void set_external_scan_params(QueryContext* query_ctx, int max_file_scanners) { | 
| 201 | 0 |         std::call_once(once_flag_for_external, [&] { | 
| 202 | 0 |             DCHECK(query_ctx != nullptr); | 
| 203 | 0 |             _query_global = query_ctx->get_query_globals(); | 
| 204 | 0 |             _query_options = query_ctx->get_query_options(); | 
| 205 | 0 |             _file_scan_range_params_map = query_ctx->file_scan_range_params_map; | 
| 206 | 0 |             _max_file_scanners = max_file_scanners; | 
| 207 | 0 |         }); | 
| 208 | 0 |     } | 
| 209 |  |  | 
| 210 | 0 |     const TQueryGlobals& get_query_globals() const { return _query_global; } | 
| 211 |  |  | 
| 212 | 0 |     const TQueryOptions& get_query_options() const { return _query_options; } | 
| 213 |  |  | 
| 214 | 0 |     const std::map<int, TFileScanRangeParams>& get_external_scan_params() const { | 
| 215 | 0 |         return _file_scan_range_params_map; | 
| 216 | 0 |     } | 
| 217 |  |  | 
| 218 | 0 |     int get_max_file_scanners() const { return _max_file_scanners; } | 
| 219 |  |  | 
| 220 |  | private: | 
| 221 |  |     std::shared_mutex _mtx; | 
| 222 |  |     uint32_t _init_id = 0; | 
| 223 |  |     std::unordered_map<std::string, uint32_t> _mapping_to_id; | 
| 224 |  |     std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map; | 
| 225 |  |  | 
| 226 |  |     // use in scan external table | 
| 227 |  |     TQueryGlobals _query_global; | 
| 228 |  |     TQueryOptions _query_options; | 
| 229 |  |     std::map<int, TFileScanRangeParams> _file_scan_range_params_map; | 
| 230 |  |     std::once_flag once_flag_for_external; | 
| 231 |  |     int _max_file_scanners = 10; | 
| 232 |  |  | 
| 233 |  |     // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction | 
| 234 |  |     std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps; | 
| 235 |  |     uint64_t delayed_expired_timestamp = 0; | 
| 236 |  | }; | 
| 237 |  |  | 
| 238 |  | class IdManager { | 
| 239 |  | public: | 
| 240 |  |     static constexpr uint8_t ID_VERSION = 0; | 
| 241 |  |  | 
| 242 | 2 |     IdManager() = default; | 
| 243 |  |  | 
| 244 | 2 |     ~IdManager() { | 
| 245 | 2 |         std::unique_lock lock(_query_to_id_file_map_mtx); | 
| 246 | 2 |         _query_to_id_file_map.clear(); | 
| 247 | 2 |     } | 
| 248 |  |  | 
| 249 | 204 |     std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) { | 
| 250 | 204 |         std::unique_lock lock(_query_to_id_file_map_mtx); | 
| 251 | 204 |         auto it = _query_to_id_file_map.find(query_id); | 
| 252 | 204 |         if (it == _query_to_id_file_map.end()) { | 
| 253 | 203 |             auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout); | 
| 254 | 203 |             _query_to_id_file_map[query_id] = id_file_map; | 
| 255 | 203 |             return id_file_map; | 
| 256 | 203 |         } | 
| 257 | 1 |         return it->second; | 
| 258 | 204 |     } | 
| 259 |  |  | 
| 260 | 0 |     void gc_expired_id_file_map(int64_t now) { | 
| 261 | 0 |         std::unique_lock lock(_query_to_id_file_map_mtx); | 
| 262 | 0 |         for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) { | 
| 263 | 0 |             if (it->second->get_delayed_expired_timestamp() <= now) { | 
| 264 | 0 |                 it = _query_to_id_file_map.erase(it); | 
| 265 | 0 |             } else { | 
| 266 | 0 |                 ++it; | 
| 267 | 0 |             } | 
| 268 | 0 |         } | 
| 269 | 0 |     } | 
| 270 |  |  | 
| 271 | 101 |     void remove_id_file_map(const UniqueId& query_id) { | 
| 272 | 101 |         std::unique_lock lock(_query_to_id_file_map_mtx); | 
| 273 | 101 |         _query_to_id_file_map.erase(query_id); | 
| 274 | 101 |     } | 
| 275 |  |  | 
| 276 | 0 |     std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) { | 
| 277 | 0 |         std::shared_lock lock(_query_to_id_file_map_mtx); | 
| 278 | 0 |         auto it = _query_to_id_file_map.find(query_id); | 
| 279 | 0 |         if (it == _query_to_id_file_map.end()) { | 
| 280 | 0 |             return nullptr; | 
| 281 | 0 |         } | 
| 282 | 0 |         return it->second; | 
| 283 | 0 |     } | 
| 284 |  |  | 
| 285 |  | private: | 
| 286 |  |     DISALLOW_COPY_AND_ASSIGN(IdManager); | 
| 287 |  |  | 
| 288 |  |     phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map; | 
| 289 |  |     std::shared_mutex _query_to_id_file_map_mtx; | 
| 290 |  | }; | 
| 291 |  |  | 
| 292 |  | } // namespace doris |