Coverage Report

Created: 2026-03-19 17:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/id_manager.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <functional>
27
#include <map>
28
#include <memory>
29
#include <mutex>
30
#include <set>
31
#include <shared_mutex>
32
#include <string>
33
#include <string_view>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
#include <vector>
38
39
#include "common/status.h"
40
#include "runtime/query_context.h"
41
#include "storage/olap_common.h"
42
#include "storage/tablet/tablet.h"
43
#include "storage/tablet/tablet_meta.h"
44
45
namespace doris {
46
47
enum class FileMappingType {
48
    INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id}
49
    EXTERNAL, // for external table.
50
};
51
52
struct InternalFileMappingInfo {
53
    int64_t tablet_id;
54
    RowsetId rowset_id;
55
    uint32_t segment_id;
56
57
10.9k
    std::string to_string() const {
58
10.9k
        std::string value;
59
10.9k
        value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id));
60
10.9k
        auto* ptr = value.data();
61
62
10.9k
        memcpy(ptr, &tablet_id, sizeof(tablet_id));
63
10.9k
        ptr += sizeof(tablet_id);
64
10.9k
        memcpy(ptr, &rowset_id, sizeof(rowset_id));
65
10.9k
        ptr += sizeof(rowset_id);
66
10.9k
        memcpy(ptr, &segment_id, sizeof(segment_id));
67
10.9k
        return value;
68
10.9k
    }
69
};
70
71
struct ExternalFileMappingInfo {
72
    /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found
73
    * from QueryContext according to the plan_node_id. Because there are some important information in
74
    * TFileScanRangeParams (needed when creating hdfs/s3 reader):
75
    *      8: optional THdfsParams hdfs_params;
76
    *      9: optional map<string, string> properties;
77
    */
78
    int plan_node_id;
79
80
    /*
81
     * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage:
82
     * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column
83
     * 2. path, file_type, modification_time,compress_type .... used to read the file
84
     * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon
85
     */
86
    TFileRangeDesc scan_range_desc;
87
    bool enable_file_meta_cache;
88
89
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
90
                            bool file_meta_cache)
91
11.9k
            : plan_node_id(plan_node_id),
92
11.9k
              scan_range_desc(scan_range),
93
11.9k
              enable_file_meta_cache(file_meta_cache) {}
94
95
11.8k
    std::string to_string() const {
96
11.8k
        std::string value;
97
11.8k
        value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) +
98
11.8k
                     sizeof(scan_range_desc.start_offset));
99
11.8k
        auto* ptr = value.data();
100
101
11.8k
        memcpy(ptr, &plan_node_id, sizeof(plan_node_id));
102
11.8k
        ptr += sizeof(plan_node_id);
103
11.8k
        memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset));
104
11.8k
        ptr += sizeof(scan_range_desc.start_offset);
105
11.8k
        memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size());
106
11.8k
        return value;
107
11.8k
    }
108
};
109
110
struct FileMapping {
111
    ENABLE_FACTORY_CREATOR(FileMapping);
112
113
    FileMappingType type;
114
    std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value;
115
116
    FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id)
117
8.96k
            : type(FileMappingType::INTERNAL),
118
8.96k
              value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) {
119
8.96k
    }
120
121
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache)
122
11.8k
            : type(FileMappingType::EXTERNAL),
123
11.8k
              value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range,
124
11.8k
                    enable_file_meta_cache) {}
125
126
6.82k
    std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const {
127
6.82k
        DCHECK(type == FileMappingType::INTERNAL);
128
6.82k
        auto info = std::get<InternalFileMappingInfo>(value);
129
6.82k
        return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id);
130
6.82k
    }
131
132
22.4k
    ExternalFileMappingInfo& get_external_file_info() {
133
22.4k
        DCHECK(type == FileMappingType::EXTERNAL);
134
22.4k
        return std::get<ExternalFileMappingInfo>(value);
135
22.4k
    }
136
137
    static std::string file_mapping_info_to_string(
138
22.7k
            const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) {
139
22.7k
        return std::visit(
140
22.7k
                [](const auto& info) -> std::string {
141
22.7k
                    using T = std::decay_t<decltype(info)>;
142
143
22.7k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
10.9k
                        return info.to_string();
145
146
11.8k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
11.8k
                        return info.to_string();
148
11.8k
                    }
149
22.7k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
10.9k
                [](const auto& info) -> std::string {
141
10.9k
                    using T = std::decay_t<decltype(info)>;
142
143
10.9k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
10.9k
                        return info.to_string();
145
146
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
                        return info.to_string();
148
                    }
149
10.9k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
11.8k
                [](const auto& info) -> std::string {
141
11.8k
                    using T = std::decay_t<decltype(info)>;
142
143
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
                        return info.to_string();
145
146
11.8k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
11.8k
                        return info.to_string();
148
11.8k
                    }
149
11.8k
                },
150
22.7k
                info);
151
22.7k
    }
152
153
22.7k
    std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); }
154
};
155
156
class IdFileMap {
157
public:
158
2.11k
    IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {}
159
160
26.7k
    std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) {
161
26.7k
        std::shared_lock lock(_mtx);
162
26.7k
        auto it = _id_map.find(id);
163
26.7k
        if (it == _id_map.end()) {
164
1
            return nullptr;
165
1
        }
166
26.7k
        return it->second;
167
26.7k
    }
168
169
20.8k
    uint32_t get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) {
170
20.8k
        DCHECK(mapping.get() != nullptr);
171
20.8k
        auto value = mapping->file_mapping_info_to_string();
172
173
20.8k
        std::unique_lock lock(_mtx);
174
20.8k
        auto it = _mapping_to_id.find(value);
175
20.8k
        if (it != _mapping_to_id.end()) {
176
2.79k
            return it->second;
177
2.79k
        }
178
18.0k
        _id_map[_init_id++] = mapping;
179
18.0k
        _mapping_to_id[value] = _init_id - 1;
180
181
18.0k
        return _init_id - 1;
182
20.8k
    }
183
184
13.4k
    void add_temp_rowset(const RowsetSharedPtr& rowset) {
185
13.4k
        std::unique_lock lock(_mtx);
186
13.4k
        _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset;
187
13.4k
    }
188
189
2.81k
    RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) {
190
2.81k
        std::shared_lock lock(_mtx);
191
2.81k
        auto it = _temp_rowset_maps.find({tablet_id, rowset_id});
192
2.81k
        if (it == _temp_rowset_maps.end()) {
193
0
            return nullptr;
194
0
        }
195
2.81k
        return it->second;
196
2.81k
    }
197
198
147
    int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; }
199
200
4.54k
    void set_external_scan_params(QueryContext* query_ctx, int max_file_scanners) {
201
4.54k
        std::call_once(once_flag_for_external, [&] {
202
1.24k
            DCHECK(query_ctx != nullptr);
203
1.24k
            _query_global = query_ctx->get_query_globals();
204
1.24k
            _query_options = query_ctx->get_query_options();
205
1.24k
            _file_scan_range_params_map = query_ctx->file_scan_range_params_map;
206
1.24k
            _max_file_scanners = max_file_scanners;
207
1.24k
        });
208
4.54k
    }
209
210
2.03k
    const TQueryGlobals& get_query_globals() const { return _query_global; }
211
212
2.03k
    const TQueryOptions& get_query_options() const { return _query_options; }
213
214
4.07k
    const std::map<int, TFileScanRangeParams>& get_external_scan_params() const {
215
4.07k
        return _file_scan_range_params_map;
216
4.07k
    }
217
218
2.03k
    int get_max_file_scanners() const { return _max_file_scanners; }
219
220
private:
221
    std::shared_mutex _mtx;
222
    uint32_t _init_id = 0;
223
    std::unordered_map<std::string, uint32_t> _mapping_to_id;
224
    std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map;
225
226
    // use in scan external table
227
    TQueryGlobals _query_global;
228
    TQueryOptions _query_options;
229
    std::map<int, TFileScanRangeParams> _file_scan_range_params_map;
230
    std::once_flag once_flag_for_external;
231
    int _max_file_scanners = 10;
232
233
    // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction
234
    std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps;
235
    uint64_t delayed_expired_timestamp = 0;
236
};
237
238
class IdManager {
239
public:
240
    static constexpr uint8_t ID_VERSION = 0;
241
242
9
    IdManager() = default;
243
244
5
    ~IdManager() {
245
5
        std::unique_lock lock(_query_to_id_file_map_mtx);
246
5
        _query_to_id_file_map.clear();
247
5
    }
248
249
6.63k
    std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) {
250
6.63k
        std::unique_lock lock(_query_to_id_file_map_mtx);
251
6.63k
        auto it = _query_to_id_file_map.find(query_id);
252
6.63k
        if (it == _query_to_id_file_map.end()) {
253
2.10k
            auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout);
254
2.10k
            _query_to_id_file_map[query_id] = id_file_map;
255
2.10k
            return id_file_map;
256
2.10k
        }
257
4.53k
        return it->second;
258
6.63k
    }
259
260
454
    void gc_expired_id_file_map(int64_t now) {
261
454
        std::unique_lock lock(_query_to_id_file_map_mtx);
262
601
        for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) {
263
147
            if (it->second->get_delayed_expired_timestamp() <= now) {
264
2
                it = _query_to_id_file_map.erase(it);
265
145
            } else {
266
145
                ++it;
267
145
            }
268
147
        }
269
454
    }
270
271
2.00k
    void remove_id_file_map(const UniqueId& query_id) {
272
2.00k
        std::unique_lock lock(_query_to_id_file_map_mtx);
273
2.00k
        _query_to_id_file_map.erase(query_id);
274
2.00k
    }
275
276
2.95k
    std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) {
277
2.95k
        std::shared_lock lock(_query_to_id_file_map_mtx);
278
2.95k
        auto it = _query_to_id_file_map.find(query_id);
279
2.95k
        if (it == _query_to_id_file_map.end()) {
280
0
            return nullptr;
281
0
        }
282
2.95k
        return it->second;
283
2.95k
    }
284
285
private:
286
    DISALLOW_COPY_AND_ASSIGN(IdManager);
287
288
    phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map;
289
    std::shared_mutex _query_to_id_file_map_mtx;
290
};
291
292
} // namespace doris