Coverage Report

Created: 2025-06-16 14:03

/root/doris/be/src/olap/id_manager.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <functional>
27
#include <map>
28
#include <memory>
29
#include <mutex>
30
#include <set>
31
#include <shared_mutex>
32
#include <string>
33
#include <string_view>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
#include <vector>
38
39
#include "common/status.h"
40
#include "olap/olap_common.h"
41
#include "olap/tablet.h"
42
#include "olap/tablet_meta.h"
43
#include "runtime/query_context.h"
44
45
namespace doris {
46
47
enum class FileMappingType {
48
    INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id}
49
    EXTERNAL, // for external table.
50
};
51
52
struct InternalFileMappingInfo {
53
    int64_t tablet_id;
54
    RowsetId rowset_id;
55
    uint32_t segment_id;
56
57
2.92k
    std::string to_string() const {
58
2.92k
        std::string value;
59
2.92k
        value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id));
60
2.92k
        auto* ptr = value.data();
61
62
2.92k
        memcpy(ptr, &tablet_id, sizeof(tablet_id));
63
2.92k
        ptr += sizeof(tablet_id);
64
2.92k
        memcpy(ptr, &rowset_id, sizeof(rowset_id));
65
2.92k
        ptr += sizeof(rowset_id);
66
2.92k
        memcpy(ptr, &segment_id, sizeof(segment_id));
67
2.92k
        return value;
68
2.92k
    }
69
};
70
71
struct ExternalFileMappingInfo {
72
    /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found
73
    * from QueryContext according to the plan_node_id. Because there are some important information in
74
    * TFileScanRangeParams (needed when creating hdfs/s3 reader):
75
    *      8: optional THdfsParams hdfs_params;
76
    *      9: optional map<string, string> properties;
77
    */
78
    int plan_node_id;
79
80
    /*
81
     * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage:
82
     * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column
83
     * 2. path, file_type, modification_time,compress_type .... used to read the file
84
     * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon
85
     */
86
    TFileRangeDesc scan_range_desc;
87
    bool enable_file_meta_cache;
88
89
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
90
                            bool file_meta_cache)
91
            : plan_node_id(plan_node_id),
92
              scan_range_desc(scan_range),
93
16
              enable_file_meta_cache(file_meta_cache) {}
94
95
2
    std::string to_string() const {
96
2
        std::string value;
97
2
        value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) +
98
2
                     sizeof(scan_range_desc.start_offset));
99
2
        auto* ptr = value.data();
100
101
2
        memcpy(ptr, &plan_node_id, sizeof(plan_node_id));
102
2
        ptr += sizeof(plan_node_id);
103
2
        memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset));
104
2
        ptr += sizeof(scan_range_desc.start_offset);
105
2
        memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size());
106
2
        return value;
107
2
    }
108
};
109
110
struct FileMapping {
111
    ENABLE_FACTORY_CREATOR(FileMapping);
112
113
    FileMappingType type;
114
    std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value;
115
116
    FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id)
117
            : type(FileMappingType::INTERNAL),
118
985
              value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) {
119
985
    }
120
121
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache)
122
            : type(FileMappingType::EXTERNAL),
123
              value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range,
124
1
                    enable_file_meta_cache) {}
125
126
0
    std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const {
127
0
        DCHECK(type == FileMappingType::INTERNAL);
128
0
        auto info = std::get<InternalFileMappingInfo>(value);
129
0
        return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id);
130
0
    }
131
132
0
    ExternalFileMappingInfo& get_external_file_info() {
133
0
        DCHECK(type == FileMappingType::EXTERNAL);
134
0
        return std::get<ExternalFileMappingInfo>(value);
135
0
    }
136
137
    static std::string file_mapping_info_to_string(
138
2.94k
            const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) {
139
2.94k
        return std::visit(
140
2.94k
                [](const auto& info) -> std::string {
141
2.91k
                    using T = std::decay_t<decltype(info)>;
142
143
2.91k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
2
                        return info.to_string();
145
146
2
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
2
                        return info.to_string();
148
2
                    }
149
2.91k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
2.91k
                [](const auto& info) -> std::string {
141
2.91k
                    using T = std::decay_t<decltype(info)>;
142
143
2.91k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
2.91k
                        return info.to_string();
145
146
2.91k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
2.91k
                        return info.to_string();
148
2.91k
                    }
149
2.91k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
140
2
                [](const auto& info) -> std::string {
141
2
                    using T = std::decay_t<decltype(info)>;
142
143
2
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
144
2
                        return info.to_string();
145
146
2
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
147
2
                        return info.to_string();
148
2
                    }
149
2
                },
150
2.94k
                info);
151
2.94k
    }
152
153
2.92k
    std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); }
154
};
155
156
class IdFileMap {
157
public:
158
205
    IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {}
159
160
1.00k
    std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) {
161
1.00k
        std::shared_lock lock(_mtx);
162
1.00k
        auto it = _id_map.find(id);
163
1.00k
        if (it == _id_map.end()) {
164
1
            return nullptr;
165
1
        }
166
1.00k
        return it->second;
167
1.00k
    }
168
169
991
    uint32 get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) {
170
991
        DCHECK(mapping.get() != nullptr);
171
991
        auto value = mapping->file_mapping_info_to_string();
172
173
991
        std::unique_lock lock(_mtx);
174
991
        auto it = _mapping_to_id.find(value);
175
991
        if (it != _mapping_to_id.end()) {
176
0
            return it->second;
177
0
        }
178
991
        _id_map[_init_id++] = mapping;
179
991
        _mapping_to_id[value] = _init_id - 1;
180
181
991
        return _init_id - 1;
182
991
    }
183
184
0
    void add_temp_rowset(const RowsetSharedPtr& rowset) {
185
0
        std::unique_lock lock(_mtx);
186
0
        _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset;
187
0
    }
188
189
0
    RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) {
190
0
        std::shared_lock lock(_mtx);
191
0
        auto it = _temp_rowset_maps.find({tablet_id, rowset_id});
192
0
        if (it == _temp_rowset_maps.end()) {
193
0
            return nullptr;
194
0
        }
195
0
        return it->second;
196
0
    }
197
198
0
    int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; }
199
200
0
    void set_external_scan_params(QueryContext* query_ctx) {
201
0
        std::call_once(once_flag_for_external, [&] {
202
0
            DCHECK(query_ctx != nullptr);
203
0
            _query_global = query_ctx->get_query_globals();
204
0
            _query_options = query_ctx->get_query_options();
205
0
            _file_scan_range_params_map = query_ctx->file_scan_range_params_map;
206
0
        });
207
0
    }
208
209
0
    const TQueryGlobals& get_query_globals() const { return _query_global; }
210
211
0
    const TQueryOptions& get_query_options() const { return _query_options; }
212
213
0
    const std::map<int, TFileScanRangeParams>& get_external_scan_params() const {
214
0
        return _file_scan_range_params_map;
215
0
    }
216
217
private:
218
    std::shared_mutex _mtx;
219
    uint32_t _init_id = 0;
220
    std::unordered_map<std::string, uint32_t> _mapping_to_id;
221
    std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map;
222
223
    // use in scan external table
224
    TQueryGlobals _query_global;
225
    TQueryOptions _query_options;
226
    std::map<int, TFileScanRangeParams> _file_scan_range_params_map;
227
    std::once_flag once_flag_for_external;
228
229
    // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction
230
    std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps;
231
    uint64_t delayed_expired_timestamp = 0;
232
};
233
234
class IdManager {
235
public:
236
    static constexpr uint8_t ID_VERSION = 0;
237
238
2
    IdManager() = default;
239
240
2
    ~IdManager() {
241
2
        std::unique_lock lock(_query_to_id_file_map_mtx);
242
2
        _query_to_id_file_map.clear();
243
2
    }
244
245
204
    std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) {
246
204
        std::unique_lock lock(_query_to_id_file_map_mtx);
247
204
        auto it = _query_to_id_file_map.find(query_id);
248
204
        if (it == _query_to_id_file_map.end()) {
249
203
            auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout);
250
203
            _query_to_id_file_map[query_id] = id_file_map;
251
203
            return id_file_map;
252
203
        }
253
1
        return it->second;
254
204
    }
255
256
0
    void gc_expired_id_file_map(int64_t now) {
257
0
        std::unique_lock lock(_query_to_id_file_map_mtx);
258
0
        for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) {
259
0
            if (it->second->get_delayed_expired_timestamp() <= now) {
260
0
                it = _query_to_id_file_map.erase(it);
261
0
            } else {
262
0
                ++it;
263
0
            }
264
0
        }
265
0
    }
266
267
101
    void remove_id_file_map(const UniqueId& query_id) {
268
101
        std::unique_lock lock(_query_to_id_file_map_mtx);
269
101
        _query_to_id_file_map.erase(query_id);
270
101
    }
271
272
0
    std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) {
273
0
        std::shared_lock lock(_query_to_id_file_map_mtx);
274
0
        auto it = _query_to_id_file_map.find(query_id);
275
0
        if (it == _query_to_id_file_map.end()) {
276
0
            return nullptr;
277
0
        }
278
0
        return it->second;
279
0
    }
280
281
private:
282
    DISALLOW_COPY_AND_ASSIGN(IdManager);
283
284
    phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map;
285
    std::shared_mutex _query_to_id_file_map_mtx;
286
};
287
288
} // namespace doris