Coverage Report

Created: 2025-05-19 15:53

/root/doris/be/src/olap/id_manager.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <stddef.h>
24
#include <stdint.h>
25
26
#include <functional>
27
#include <map>
28
#include <memory>
29
#include <mutex>
30
#include <set>
31
#include <shared_mutex>
32
#include <string>
33
#include <string_view>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
#include <vector>
38
39
#include "common/status.h"
40
#include "olap/olap_common.h"
41
#include "olap/tablet.h"
42
#include "olap/tablet_meta.h"
43
44
namespace doris {
45
46
enum class FileMappingType {
47
    INTERNAL, // for doris format file {tablet_id}{rowset_id}{segment_id}
48
    EXTERNAL, // for external table.
49
};
50
51
struct InternalFileMappingInfo {
52
    int64_t tablet_id;
53
    RowsetId rowset_id;
54
    uint32_t segment_id;
55
56
2.92k
    std::string to_string() const {
57
2.92k
        std::string value;
58
2.92k
        value.resize(sizeof(tablet_id) + sizeof(rowset_id) + sizeof(segment_id));
59
2.92k
        auto* ptr = value.data();
60
61
2.92k
        memcpy(ptr, &tablet_id, sizeof(tablet_id));
62
2.92k
        ptr += sizeof(tablet_id);
63
2.92k
        memcpy(ptr, &rowset_id, sizeof(rowset_id));
64
2.92k
        ptr += sizeof(rowset_id);
65
2.92k
        memcpy(ptr, &segment_id, sizeof(segment_id));
66
2.92k
        return value;
67
2.92k
    }
68
};
69
70
struct ExternalFileMappingInfo {
71
    /* By recording the plan_node_id in fileMapping, the TFileScanRangeParams used in the scan phase can be found
72
    * from QueryContext according to the plan_node_id. Because there are some important information in
73
    * TFileScanRangeParams (needed when creating hdfs/s3 reader):
74
    *      8: optional THdfsParams hdfs_params;
75
    *      9: optional map<string, string> properties;
76
    */
77
    int plan_node_id;
78
79
    /*
80
     * Record TFileRangeDesc external_scan_range_desc in fileMapping, usage:
81
     * 1. If the file belongs to a partition, columns_from_path_keys and columns_from_path in TFileRangeDesc are needed when materializing the partition column
82
     * 2. path, file_type, modification_time,compress_type .... used to read the file
83
     * 3. TFileFormatType can distinguish whether it is iceberg/hive/hudi/paimon
84
     */
85
    TFileRangeDesc scan_range_desc;
86
    bool enable_file_meta_cache;
87
88
    ExternalFileMappingInfo(int plan_node_id, const TFileRangeDesc& scan_range,
89
                            bool file_meta_cache)
90
            : plan_node_id(plan_node_id),
91
              scan_range_desc(scan_range),
92
16
              enable_file_meta_cache(file_meta_cache) {}
93
94
2
    std::string to_string() const {
95
2
        std::string value;
96
2
        value.resize(scan_range_desc.path.size() + sizeof(plan_node_id) +
97
2
                     sizeof(scan_range_desc.start_offset));
98
2
        auto* ptr = value.data();
99
100
2
        memcpy(ptr, &plan_node_id, sizeof(plan_node_id));
101
2
        ptr += sizeof(plan_node_id);
102
2
        memcpy(ptr, &scan_range_desc.start_offset, sizeof(scan_range_desc.start_offset));
103
2
        ptr += sizeof(scan_range_desc.start_offset);
104
2
        memcpy(ptr, scan_range_desc.path.data(), scan_range_desc.path.size());
105
2
        return value;
106
2
    }
107
};
108
109
struct FileMapping {
110
    ENABLE_FACTORY_CREATOR(FileMapping);
111
112
    FileMappingType type;
113
    std::variant<InternalFileMappingInfo, ExternalFileMappingInfo> value;
114
115
    FileMapping(int64_t tablet_id, RowsetId rowset_id, uint32_t segment_id)
116
            : type(FileMappingType::INTERNAL),
117
979
              value(std::in_place_type<InternalFileMappingInfo>, tablet_id, rowset_id, segment_id) {
118
979
    }
119
120
    FileMapping(int plan_node_id, const TFileRangeDesc& scan_range, bool enable_file_meta_cache)
121
            : type(FileMappingType::EXTERNAL),
122
              value(std::in_place_type<ExternalFileMappingInfo>, plan_node_id, scan_range,
123
1
                    enable_file_meta_cache) {}
124
125
0
    std::tuple<int64_t, RowsetId, uint32_t> get_doris_format_info() const {
126
0
        DCHECK(type == FileMappingType::INTERNAL);
127
0
        auto info = std::get<InternalFileMappingInfo>(value);
128
0
        return std::make_tuple(info.tablet_id, info.rowset_id, info.segment_id);
129
0
    }
130
131
0
    ExternalFileMappingInfo& get_external_file_info() {
132
0
        DCHECK(type == FileMappingType::EXTERNAL);
133
0
        return std::get<ExternalFileMappingInfo>(value);
134
0
    }
135
136
    static std::string file_mapping_info_to_string(
137
2.93k
            const std::variant<InternalFileMappingInfo, ExternalFileMappingInfo>& info) {
138
2.93k
        return std::visit(
139
2.93k
                [](const auto& info) -> std::string {
140
2.91k
                    using T = std::decay_t<decltype(info)>;
141
142
2.91k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
143
2
                        return info.to_string();
144
145
2
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
146
2
                        return info.to_string();
147
2
                    }
148
2.91k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS2_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
139
2.90k
                [](const auto& info) -> std::string {
140
2.90k
                    using T = std::decay_t<decltype(info)>;
141
142
2.91k
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
143
2.91k
                        return info.to_string();
144
145
2.91k
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
146
2.90k
                        return info.to_string();
147
2.90k
                    }
148
2.90k
                },
_ZZN5doris11FileMapping27file_mapping_info_to_stringB5cxx11ERKSt7variantIJNS_23InternalFileMappingInfoENS_23ExternalFileMappingInfoEEEENKUlRKT_E_clIS3_EENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES9_
Line
Count
Source
139
2
                [](const auto& info) -> std::string {
140
2
                    using T = std::decay_t<decltype(info)>;
141
142
2
                    if constexpr (std::is_same_v<T, InternalFileMappingInfo>) {
143
2
                        return info.to_string();
144
145
2
                    } else if constexpr (std::is_same_v<T, ExternalFileMappingInfo>) {
146
2
                        return info.to_string();
147
2
                    }
148
2
                },
149
2.93k
                info);
150
2.93k
    }
151
152
2.89k
    std::string file_mapping_info_to_string() { return file_mapping_info_to_string(value); }
153
};
154
155
class IdFileMap {
156
public:
157
205
    IdFileMap(uint64_t expired_timestamp) : delayed_expired_timestamp(expired_timestamp) {}
158
159
1.00k
    std::shared_ptr<FileMapping> get_file_mapping(uint32_t id) {
160
1.00k
        std::shared_lock lock(_mtx);
161
1.00k
        auto it = _id_map.find(id);
162
1.00k
        if (it == _id_map.end()) {
163
1
            return nullptr;
164
1
        }
165
1.00k
        return it->second;
166
1.00k
    }
167
168
987
    uint32 get_file_mapping_id(const std::shared_ptr<FileMapping>& mapping) {
169
987
        DCHECK(mapping.get() != nullptr);
170
987
        auto value = mapping->file_mapping_info_to_string();
171
172
987
        std::unique_lock lock(_mtx);
173
987
        auto it = _mapping_to_id.find(value);
174
987
        if (it != _mapping_to_id.end()) {
175
0
            return it->second;
176
0
        }
177
987
        _id_map[_init_id++] = mapping;
178
987
        _mapping_to_id[value] = _init_id - 1;
179
180
987
        return _init_id - 1;
181
987
    }
182
183
0
    void add_temp_rowset(const RowsetSharedPtr& rowset) {
184
0
        std::unique_lock lock(_mtx);
185
0
        _temp_rowset_maps[{rowset->rowset_meta()->tablet_id(), rowset->rowset_id()}] = rowset;
186
0
    }
187
188
0
    RowsetSharedPtr get_temp_rowset(const int64_t tablet_id, const RowsetId& rowset_id) {
189
0
        std::shared_lock lock(_mtx);
190
0
        auto it = _temp_rowset_maps.find({tablet_id, rowset_id});
191
0
        if (it == _temp_rowset_maps.end()) {
192
0
            return nullptr;
193
0
        }
194
0
        return it->second;
195
0
    }
196
197
0
    int64_t get_delayed_expired_timestamp() { return delayed_expired_timestamp; }
198
199
private:
200
    std::shared_mutex _mtx;
201
    uint32_t _init_id = 0;
202
    std::unordered_map<std::string, uint32_t> _mapping_to_id;
203
    std::unordered_map<uint32_t, std::shared_ptr<FileMapping>> _id_map;
204
205
    // use in Doris Format to keep temp rowsets, preventing them from being deleted by compaction
206
    std::unordered_map<std::pair<int64_t, RowsetId>, RowsetSharedPtr> _temp_rowset_maps;
207
    uint64_t delayed_expired_timestamp = 0;
208
};
209
210
class IdManager {
211
public:
212
    static constexpr uint8_t ID_VERSION = 0;
213
214
2
    IdManager() = default;
215
216
2
    ~IdManager() {
217
2
        std::unique_lock lock(_query_to_id_file_map_mtx);
218
2
        _query_to_id_file_map.clear();
219
2
    }
220
221
204
    std::shared_ptr<IdFileMap> add_id_file_map(const UniqueId& query_id, int timeout) {
222
204
        std::unique_lock lock(_query_to_id_file_map_mtx);
223
204
        auto it = _query_to_id_file_map.find(query_id);
224
204
        if (it == _query_to_id_file_map.end()) {
225
203
            auto id_file_map = std::make_shared<IdFileMap>(UnixSeconds() + timeout);
226
203
            _query_to_id_file_map[query_id] = id_file_map;
227
203
            return id_file_map;
228
203
        }
229
1
        return it->second;
230
204
    }
231
232
0
    void gc_expired_id_file_map(int64_t now) {
233
0
        std::unique_lock lock(_query_to_id_file_map_mtx);
234
0
        for (auto it = _query_to_id_file_map.begin(); it != _query_to_id_file_map.end();) {
235
0
            if (it->second->get_delayed_expired_timestamp() <= now) {
236
0
                it = _query_to_id_file_map.erase(it);
237
0
            } else {
238
0
                ++it;
239
0
            }
240
0
        }
241
0
    }
242
243
101
    void remove_id_file_map(const UniqueId& query_id) {
244
101
        std::unique_lock lock(_query_to_id_file_map_mtx);
245
101
        _query_to_id_file_map.erase(query_id);
246
101
    }
247
248
0
    std::shared_ptr<IdFileMap> get_id_file_map(const UniqueId& query_id) {
249
0
        std::shared_lock lock(_query_to_id_file_map_mtx);
250
0
        auto it = _query_to_id_file_map.find(query_id);
251
0
        if (it == _query_to_id_file_map.end()) {
252
0
            return nullptr;
253
0
        }
254
0
        return it->second;
255
0
    }
256
257
private:
258
    DISALLOW_COPY_AND_ASSIGN(IdManager);
259
260
    phmap::flat_hash_map<UniqueId, std::shared_ptr<IdFileMap>> _query_to_id_file_map;
261
    std::shared_mutex _query_to_id_file_map_mtx;
262
};
263
264
} // namespace doris