Coverage Report

Created: 2026-03-12 14:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/id_manager.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <functional>
27
#include <map>
28
#include <memory>
29
#include <mutex>
30
#include <set>
31
#include <shared_mutex>
32
#include <string>
33
#include <string_view>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
#include <vector>
38
39
#include "common/status.h"
40
#include "runtime/query_context.h"
41
#include "storage/olap_common.h"
42
#include "storage/tablet/tablet.h"
43
#include "storage/tablet/tablet_meta.h"
44
45
namespace doris {
46
47
enum class FileMappingType {
48
    INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id}
49
    EXTERNAL, // for external table.
50
};
51
52
struct InternalFileMappingInfo {
53
    int64_t tablet_id;
54
    RowsetId rowset_id;
55
    uint32_t segment_id;
56
57
11.6k
    std::string to_string() const {
58
11.6k
        std::string value;
59
11.6k
        value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id));
60
11.6k
        auto* ptr = value.data();
61
62
11.6k
        memcpy(ptr, &tablet_id, sizeof(tablet_id));
63
11.6k
        ptr += sizeof(tablet_id);
64
11.6k
        memcpy(ptr, &rowset_id, sizeof(rowset_id));
65
11.6k
        ptr += sizeof(rowset_id);
66
11.6k
        memcpy(ptr, &segment_id, sizeof(segment_id));
67
11.6k
        return value;
68
11.6k
    }
69
};
70
71
struct ExternalFileMappingInfo {
72
    /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found
73
    * from QueryContext according to the plan_node_id. Because there are some important information in
74
    * TFileScanRangeParams (needed when creating hdfs/s3 reader):
75
    *      8: optional THdfsParams hdfs_params;
76
    *      9: optional map<string, string> properties;
77
    */
78
    int plan_node_id;
79
80
    /*
81
     * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage:
82
     * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column
83
     * 2. path, file_type, modification_time,compress_type .... used to read the file
84
     * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon
85
     */
86
    TFileRangeDesc scan_range_desc;
87
    bool enable_file_meta_cache;
88
89
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
90
                            bool file_meta_cache)
91
29
            : plan_node_id(plan_node_id),
92
29
              scan_range_desc(scan_range),
93
29
              enable_file_meta_cache(file_meta_cache) {}
94
95
15
    std::string to_string() const {
96
15
        std::string value;
97
15
        value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) +
98
15
                     sizeof(scan_range_desc.start_offset));
99
15
        auto* ptr = value.data();
100
101
15
        memcpy(ptr, &plan_node_id, sizeof(plan_node_id));
102
15
        ptr += sizeof(plan_node_id);
103
15
        memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset));
104
15
        ptr += sizeof(scan_range_desc.start_offset);
105
15
        memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size());
106
15
        return value;
107
15
    }
108
};
109
110
struct FileMapping {
111
    ENABLE_FACTORY_CREATOR(FileMapping);
112
113
    FileMappingType type;
114
    std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value;
115
116
    FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id)
117
9.73k
            : type(FileMappingType::INTERNAL),
118
9.73k
              value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) {
119
9.73k
    }
120
121
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache)
122
14
            : type(FileMappingType::EXTERNAL),
123
14
              value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range,
124
14
                    enable_file_meta_cache) {}
125
126
6.83k
    std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const {
127
6.83k
        DCHECK(type == FileMappingType::INTERNAL);
128
6.83k
        auto info = std::get<InternalFileMappingInfo>(value);
129
6.83k
        return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id);
130
6.83k
    }
131
132
48
    ExternalFileMappingInfo& get_external_file_info() {
133
48
        DCHECK(type == FileMappingType::EXTERNAL);
134
48
        return std::get<ExternalFileMappingInfo>(value);
135
48
    }
136
137
    static std::string file_mapping_info_to_string(
138
11.6k
            const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) {
139
11.6k
        return std::visit(
140
11.6k
                [](const auto& info) -> std::string {
141
11.6k
                    using T = std::decay_t<decltype(info)>;
142
143
11.6k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
11.6k
                        return info.to_string();
145
146
11.6k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
15
                        return info.to_string();
148
15
                    }
149
11.6k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
11.6k
                [](const auto& info) -> std::string {
141
11.6k
                    using T = std::decay_t<decltype(info)>;
142
143
11.6k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
11.6k
                        return info.to_string();
145
146
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
                        return info.to_string();
148
                    }
149
11.6k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
15
                [](const auto& info) -> std::string {
141
15
                    using T = std::decay_t<decltype(info)>;
142
143
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
                        return info.to_string();
145
146
15
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
15
                        return info.to_string();
148
15
                    }
149
15
                },
150
11.6k
                info);
151
11.6k
    }
152
153
11.6k
    std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); }
154
};
155
156
class IdFileMap {
157
public:
158
869
    IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {}
159
160
8.65k
    std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) {
161
8.65k
        std::shared_lock lock(_mtx);
162
8.65k
        auto it = _id_map.find(id);
163
8.65k
        if (it == _id_map.end()) {
164
1
            return nullptr;
165
1
        }
166
8.65k
        return it->second;
167
8.65k
    }
168
169
9.68k
    uint32_t get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) {
170
9.68k
        DCHECK(mapping.get() != nullptr);
171
9.68k
        auto value = mapping->file_mapping_info_to_string();
172
173
9.68k
        std::unique_lock lock(_mtx);
174
9.68k
        auto it = _mapping_to_id.find(value);
175
9.68k
        if (it != _mapping_to_id.end()) {
176
3.55k
            return it->second;
177
3.55k
        }
178
6.12k
        _id_map[_init_id++] = mapping;
179
6.12k
        _mapping_to_id[value] = _init_id - 1;
180
181
6.12k
        return _init_id - 1;
182
9.68k
    }
183
184
12.8k
    void add_temp_rowset(const RowsetSharedPtr& rowset) {
185
12.8k
        std::unique_lock lock(_mtx);
186
12.8k
        _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset;
187
12.8k
    }
188
189
2.86k
    RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) {
190
2.86k
        std::shared_lock lock(_mtx);
191
2.86k
        auto it = _temp_rowset_maps.find({tablet_id, rowset_id});
192
2.86k
        if (it == _temp_rowset_maps.end()) {
193
0
            return nullptr;
194
0
        }
195
2.86k
        return it->second;
196
2.86k
    }
197
198
101
    int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; }
199
200
13
    void set_external_scan_params(QueryContext* query_ctx, int max_file_scanners) {
201
13
        std::call_once(once_flag_for_external, [&] {
202
13
            DCHECK(query_ctx != nullptr);
203
13
            _query_global = query_ctx->get_query_globals();
204
13
            _query_options = query_ctx->get_query_options();
205
13
            _file_scan_range_params_map = query_ctx->file_scan_range_params_map;
206
13
            _max_file_scanners = max_file_scanners;
207
13
        });
208
13
    }
209
210
13
    const TQueryGlobals& get_query_globals() const { return _query_global; }
211
212
13
    const TQueryOptions& get_query_options() const { return _query_options; }
213
214
26
    const std::map<int, TFileScanRangeParams>& get_external_scan_params() const {
215
26
        return _file_scan_range_params_map;
216
26
    }
217
218
13
    int get_max_file_scanners() const { return _max_file_scanners; }
219
220
private:
221
    std::shared_mutex _mtx;
222
    uint32_t _init_id = 0;
223
    std::unordered_map<std::string, uint32_t> _mapping_to_id;
224
    std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map;
225
226
    // use in scan external table
227
    TQueryGlobals _query_global;
228
    TQueryOptions _query_options;
229
    std::map<int, TFileScanRangeParams> _file_scan_range_params_map;
230
    std::once_flag once_flag_for_external;
231
    int _max_file_scanners = 10;
232
233
    // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction
234
    std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps;
235
    uint64_t delayed_expired_timestamp = 0;
236
};
237
238
class IdManager {
239
public:
240
    static constexpr uint8_t ID_VERSION = 0;
241
242
9
    IdManager() = default;
243
244
5
    ~IdManager() {
245
5
        std::unique_lock lock(_query_to_id_file_map_mtx);
246
5
        _query_to_id_file_map.clear();
247
5
    }
248
249
2.12k
    std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) {
250
2.12k
        std::unique_lock lock(_query_to_id_file_map_mtx);
251
2.12k
        auto it = _query_to_id_file_map.find(query_id);
252
2.12k
        if (it == _query_to_id_file_map.end()) {
253
867
            auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout);
254
867
            _query_to_id_file_map[query_id] = id_file_map;
255
867
            return id_file_map;
256
867
        }
257
1.25k
        return it->second;
258
2.12k
    }
259
260
198
    void gc_expired_id_file_map(int64_t now) {
261
198
        std::unique_lock lock(_query_to_id_file_map_mtx);
262
299
        for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) {
263
101
            if (it->second->get_delayed_expired_timestamp() <= now) {
264
0
                it = _query_to_id_file_map.erase(it);
265
101
            } else {
266
101
                ++it;
267
101
            }
268
101
        }
269
198
    }
270
271
763
    void remove_id_file_map(const UniqueId& query_id) {
272
763
        std::unique_lock lock(_query_to_id_file_map_mtx);
273
763
        _query_to_id_file_map.erase(query_id);
274
763
    }
275
276
967
    std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) {
277
967
        std::shared_lock lock(_query_to_id_file_map_mtx);
278
967
        auto it = _query_to_id_file_map.find(query_id);
279
967
        if (it == _query_to_id_file_map.end()) {
280
0
            return nullptr;
281
0
        }
282
967
        return it->second;
283
967
    }
284
285
private:
286
    DISALLOW_COPY_AND_ASSIGN(IdManager);
287
288
    phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map;
289
    std::shared_mutex _query_to_id_file_map_mtx;
290
};
291
292
} // namespace doris