Coverage Report

Created: 2026-06-25 12:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/id_manager.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <functional>
27
#include <map>
28
#include <memory>
29
#include <mutex>
30
#include <set>
31
#include <shared_mutex>
32
#include <string>
33
#include <string_view>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
#include <vector>
38
39
#include "common/status.h"
40
#include "runtime/query_context.h"
41
#include "storage/olap_common.h"
42
#include "storage/tablet/tablet.h"
43
#include "storage/tablet/tablet_meta.h"
44
45
namespace doris {
46
47
enum class FileMappingType {
48
    INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id}
49
    EXTERNAL, // for external table.
50
};
51
52
struct InternalFileMappingInfo {
53
    int64_t tablet_id;
54
    RowsetId rowset_id;
55
    uint32_t segment_id;
56
57
11.3k
    std::string to_string() const {
58
11.3k
        std::string value;
59
11.3k
        value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id));
60
11.3k
        auto* ptr = value.data();
61
62
11.3k
        memcpy(ptr, &tablet_id, sizeof(tablet_id));
63
11.3k
        ptr += sizeof(tablet_id);
64
11.3k
        memcpy(ptr, &rowset_id, sizeof(rowset_id));
65
11.3k
        ptr += sizeof(rowset_id);
66
11.3k
        memcpy(ptr, &segment_id, sizeof(segment_id));
67
11.3k
        return value;
68
11.3k
    }
69
};
70
71
struct ExternalFileMappingInfo {
72
    /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found
73
    * from QueryContext according to the plan_node_id. Because there are some important information in
74
    * TFileScanRangeParams (needed when creating hdfs/s3 reader):
75
    *      8: optional THdfsParams hdfs_params;
76
    *      9: optional map<string, string> properties;
77
     */
78
    int plan_node_id;
79
80
    /*
81
     * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage:
82
     * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column
83
     * 2. path, file_type, modification_time,compress_type .... used to read the file
84
     * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon
85
     */
86
    TFileRangeDesc scan_range_desc;
87
    bool enable_file_meta_cache;
88
    bool enable_file_meta_memory_cache;
89
90
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
91
                            bool file_meta_cache)
92
            : ExternalFileMappingInfo(plan_node_id, scan_range, file_meta_cache, file_meta_cache) {}
93
94
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
95
                            bool file_meta_cache, bool file_meta_memory_cache)
96
7.52k
            : plan_node_id(plan_node_id),
97
7.52k
              scan_range_desc(scan_range),
98
7.52k
              enable_file_meta_cache(file_meta_cache),
99
7.52k
              enable_file_meta_memory_cache(file_meta_memory_cache) {}
100
101
7.50k
    std::string to_string() const {
102
7.50k
        std::string value;
103
7.50k
        value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) +
104
7.50k
                     sizeof(scan_range_desc.start_offset));
105
7.50k
        auto* ptr = value.data();
106
107
7.50k
        memcpy(ptr, &plan_node_id, sizeof(plan_node_id));
108
7.50k
        ptr += sizeof(plan_node_id);
109
7.50k
        memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset));
110
7.50k
        ptr += sizeof(scan_range_desc.start_offset);
111
7.50k
        memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size());
112
7.50k
        return value;
113
7.50k
    }
114
};
115
116
struct FileMapping {
117
    ENABLE_FACTORY_CREATOR(FileMapping);
118
119
    FileMappingType type;
120
    std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value;
121
122
    FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id)
123
9.35k
            : type(FileMappingType::INTERNAL),
124
9.35k
              value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) {
125
9.35k
    }
126
127
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache)
128
            : FileMapping(plan_node_id, scan_range, enable_file_meta_cache,
129
                          enable_file_meta_cache) {}
130
131
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache,
132
                bool enable_file_meta_memory_cache)
133
7.50k
            : type(FileMappingType::EXTERNAL),
134
7.50k
              value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range,
135
7.50k
                    enable_file_meta_cache, enable_file_meta_memory_cache) {}
136
137
12.4k
    std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const {
138
12.4k
        DCHECK(type == FileMappingType::INTERNAL);
139
12.4k
        auto info = std::get<InternalFileMappingInfo>(value);
140
12.4k
        return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id);
141
12.4k
    }
142
143
18.6k
    ExternalFileMappingInfo& get_external_file_info() {
144
18.6k
        DCHECK(type == FileMappingType::EXTERNAL);
145
18.6k
        return std::get<ExternalFileMappingInfo>(value);
146
18.6k
    }
147
148
    static std::string file_mapping_info_to_string(
149
18.8k
            const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) {
150
18.8k
        return std::visit(
151
18.8k
                [](const auto& info) -> std::string {
152
18.8k
                    using T = std::decay_t<decltype(info)>;
153
154
18.8k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
155
11.3k
                        return info.to_string();
156
157
11.3k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
158
7.49k
                        return info.to_string();
159
7.49k
                    }
160
18.8k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
151
11.3k
                [](const auto& info) -> std::string {
152
11.3k
                    using T = std::decay_t<decltype(info)>;
153
154
11.3k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
155
11.3k
                        return info.to_string();
156
157
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
158
                        return info.to_string();
159
                    }
160
11.3k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
151
7.49k
                [](const auto& info) -> std::string {
152
7.49k
                    using T = std::decay_t<decltype(info)>;
153
154
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
155
                        return info.to_string();
156
157
7.49k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
158
7.49k
                        return info.to_string();
159
7.49k
                    }
160
7.49k
                },
161
18.8k
                info);
162
18.8k
    }
163
164
18.8k
    std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); }
165
};
166
167
class IdFileMap {
168
public:
169
1.85k
    IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {}
170
171
26.5k
    std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) {
172
26.5k
        std::shared_lock lock(_mtx);
173
26.5k
        auto it = _id_map.find(id);
174
26.5k
        if (it == _id_map.end()) {
175
1
            return nullptr;
176
1
        }
177
26.5k
        return it->second;
178
26.5k
    }
179
180
16.8k
    uint32_t get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) {
181
16.8k
        DCHECK(mapping.get() != nullptr);
182
16.8k
        auto value = mapping->file_mapping_info_to_string();
183
184
16.8k
        std::unique_lock lock(_mtx);
185
16.8k
        auto it = _mapping_to_id.find(value);
186
16.8k
        if (it != _mapping_to_id.end()) {
187
2.62k
            return it->second;
188
2.62k
        }
189
14.2k
        _id_map[_init_id++] = mapping;
190
14.2k
        _mapping_to_id[value] = _init_id - 1;
191
192
14.2k
        return _init_id - 1;
193
16.8k
    }
194
195
13.4k
    void add_temp_rowset(const RowsetSharedPtr& rowset) {
196
13.4k
        std::unique_lock lock(_mtx);
197
13.4k
        _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset;
198
13.4k
    }
199
200
3.25k
    RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) {
201
3.25k
        std::shared_lock lock(_mtx);
202
3.25k
        auto it = _temp_rowset_maps.find({tablet_id, rowset_id});
203
3.25k
        if (it == _temp_rowset_maps.end()) {
204
0
            return nullptr;
205
0
        }
206
3.25k
        return it->second;
207
3.25k
    }
208
209
25.1k
    int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; }
210
211
3.18k
    void set_external_scan_params(QueryContext* query_ctx, int max_file_scanners) {
212
3.18k
        std::call_once(once_flag_for_external, [&] {
213
942
            DCHECK(query_ctx != nullptr);
214
942
            _query_global = query_ctx->get_query_globals();
215
942
            _query_options = query_ctx->get_query_options();
216
942
            _file_scan_range_params_map = query_ctx->file_scan_range_params_map;
217
942
            _max_file_scanners = max_file_scanners;
218
942
        });
219
3.18k
    }
220
221
2.07k
    const TQueryGlobals& get_query_globals() const { return _query_global; }
222
223
2.07k
    const TQueryOptions& get_query_options() const { return _query_options; }
224
225
4.14k
    const std::map<int, TFileScanRangeParams>& get_external_scan_params() const {
226
4.14k
        return _file_scan_range_params_map;
227
4.14k
    }
228
229
2.07k
    int get_max_file_scanners() const { return _max_file_scanners; }
230
231
private:
232
    std::shared_mutex _mtx;
233
    uint32_t _init_id = 0;
234
    std::unordered_map<std::string, uint32_t> _mapping_to_id;
235
    std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map;
236
237
    // use in scan external table
238
    TQueryGlobals _query_global;
239
    TQueryOptions _query_options;
240
    std::map<int, TFileScanRangeParams> _file_scan_range_params_map;
241
    std::once_flag once_flag_for_external;
242
    int _max_file_scanners = 10;
243
244
    // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction
245
    std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps;
246
    uint64_t delayed_expired_timestamp = 0;
247
};
248
249
class IdManager {
250
public:
251
    static constexpr uint8_t ID_VERSION = 0;
252
253
9
    IdManager() = default;
254
255
5
    ~IdManager() {
256
5
        std::unique_lock lock(_query_to_id_file_map_mtx);
257
5
        _query_to_id_file_map.clear();
258
5
    }
259
260
5.09k
    std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) {
261
5.09k
        std::unique_lock lock(_query_to_id_file_map_mtx);
262
5.09k
        auto it = _query_to_id_file_map.find(query_id);
263
5.09k
        if (it == _query_to_id_file_map.end()) {
264
1.84k
            auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout);
265
1.84k
            _query_to_id_file_map[query_id] = id_file_map;
266
1.84k
            return id_file_map;
267
1.84k
        }
268
3.24k
        return it->second;
269
5.09k
    }
270
271
517
    void gc_expired_id_file_map(int64_t now) {
272
517
        std::unique_lock lock(_query_to_id_file_map_mtx);
273
25.6k
        for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) {
274
25.1k
            if (it->second->get_delayed_expired_timestamp() <= now) {
275
802
                it = _query_to_id_file_map.erase(it);
276
24.3k
            } else {
277
24.3k
                ++it;
278
24.3k
            }
279
25.1k
        }
280
517
    }
281
282
935
    void remove_id_file_map(const UniqueId& query_id) {
283
935
        std::unique_lock lock(_query_to_id_file_map_mtx);
284
935
        _query_to_id_file_map.erase(query_id);
285
935
    }
286
287
125k
    std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) {
288
125k
        std::shared_lock lock(_query_to_id_file_map_mtx);
289
125k
        auto it = _query_to_id_file_map.find(query_id);
290
125k
        if (it == _query_to_id_file_map.end()) {
291
121k
            return nullptr;
292
121k
        }
293
4.02k
        return it->second;
294
125k
    }
295
296
private:
297
    DISALLOW_COPY_AND_ASSIGN(IdManager);
298
299
    phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map;
300
    std::shared_mutex _query_to_id_file_map_mtx;
301
};
302
303
} // namespace doris