Coverage Report

Created: 2026-06-02 23:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/cache/file_cache_common.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h
19
// and modified by Doris
20
21
#pragma once
22
#include <cstdint>
23
#include <vector>
24
25
#include "core/uint128.h"
26
#include "io/io_common.h"
27
28
namespace doris::io {
29
30
inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024;
31
inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024;
32
inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;
33
inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5;
34
inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5;
35
inline static constexpr size_t DEFAULT_TTL_PERCENT = 50;
36
37
using uint128_t = UInt128;
38
39
enum FileCacheType {
40
    INDEX = 2,
41
    NORMAL = 1,
42
    DISPOSABLE = 0,
43
    TTL = 3,
44
};
45
std::string cache_type_to_surfix(FileCacheType type);
46
FileCacheType surfix_to_cache_type(const std::string& str);
47
48
FileCacheType string_to_cache_type(const std::string& str);
49
std::string cache_type_to_string(FileCacheType type);
50
51
struct UInt128Wrapper {
52
    uint128_t value_;
53
    [[nodiscard]] std::string to_string() const;
54
55
    UInt128Wrapper() = default;
56
2.16M
    explicit UInt128Wrapper(const uint128_t& value) : value_(value) {}
57
58
68.6M
    bool operator==(const UInt128Wrapper& other) const { return value_ == other.value_; }
59
60
443k
    uint64_t high() const { return static_cast<uint64_t>(value_ >> 64); }
61
443k
    uint64_t low() const { return static_cast<uint64_t>(value_); }
62
63
    friend std::ostream& operator<<(std::ostream& os, const UInt128Wrapper& wrapper) {
64
        os << "UInt128Wrapper(" << wrapper.high() << ", " << wrapper.low() << ")";
65
        return os;
66
    }
67
};
68
69
struct ReadStatistics {
70
    bool hit_cache = true;
71
    bool from_peer_cache = false;
72
    bool skip_cache = false;
73
    int64_t bytes_read = 0;
74
    int64_t bytes_read_from_local = 0;
75
    int64_t bytes_read_from_remote = 0;
76
    int64_t bytes_read_from_peer = 0;
77
    int64_t bytes_write_into_file_cache = 0;
78
    int64_t remote_read_timer = 0;
79
    int64_t peer_read_timer = 0;
80
    int64_t remote_wait_timer = 0; // wait for other downloader
81
    int64_t local_read_timer = 0;
82
    int64_t local_write_timer = 0;
83
    int64_t read_cache_file_directly_timer = 0;
84
    int64_t cache_get_or_set_timer = 0;
85
    int64_t lock_wait_timer = 0;
86
    int64_t get_timer = 0;
87
    int64_t set_timer = 0;
88
};
89
90
class BlockFileCache;
91
struct FileBlocksHolder;
92
using FileBlocksHolderPtr = std::unique_ptr<FileBlocksHolder>;
93
94
struct FileCacheAllocatorBuilder {
95
    bool _is_cold_data;
96
    uint64_t _expiration_time;
97
    UInt128Wrapper _cache_hash;
98
    BlockFileCache* _cache; // Only one ref, the lifetime is owned by FileCache
99
    FileBlocksHolderPtr allocate_cache_holder(size_t offset, size_t size, int64_t tablet_id) const;
100
};
101
102
struct KeyHash {
103
72.2M
    std::size_t operator()(const UInt128Wrapper& w) const {
104
72.2M
        return util_hash::HashLen16(w.value_.low(), w.value_.high());
105
72.2M
    }
106
};
107
108
using AccessKeyAndOffset = std::pair<UInt128Wrapper, size_t>;
109
struct KeyAndOffsetHash {
110
24.0M
    std::size_t operator()(const AccessKeyAndOffset& key) const {
111
24.0M
        return KeyHash()(key.first) ^ std::hash<uint64_t>()(key.second);
112
24.0M
    }
113
};
114
115
struct KeyMeta {
116
    uint64_t expiration_time; // absolute time
117
    FileCacheType type;
118
    int64_t tablet_id {0};
119
};
120
121
struct FileCacheKey {
122
    UInt128Wrapper hash;
123
    size_t offset;
124
    KeyMeta meta;
125
};
126
127
struct FileCacheSettings {
128
    size_t capacity {0};
129
    size_t disposable_queue_size {0};
130
    size_t disposable_queue_elements {0};
131
    size_t index_queue_size {0};
132
    size_t index_queue_elements {0};
133
    size_t query_queue_size {0};
134
    size_t query_queue_elements {0};
135
    size_t ttl_queue_size {0};
136
    size_t ttl_queue_elements {0};
137
    size_t max_file_block_size {0};
138
    size_t max_query_cache_size {0};
139
    std::string storage;
140
141
    // to string
142
    std::string to_string() const;
143
};
144
145
FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size,
146
                                          size_t normal_percent = DEFAULT_NORMAL_PERCENT,
147
                                          size_t disposable_percent = DEFAULT_DISPOSABLE_PERCENT,
148
                                          size_t index_percent = DEFAULT_INDEX_PERCENT,
149
                                          size_t ttl_percent = DEFAULT_TTL_PERCENT,
150
                                          const std::string& storage = "disk");
151
152
struct CacheContext {
153
1.05M
    CacheContext(const IOContext* io_context) {
154
1.05M
        if (io_context->expiration_time != 0) {
155
2
            cache_type = FileCacheType::TTL;
156
2
            expiration_time = io_context->expiration_time;
157
1.05M
        } else if (io_context->is_index_data) {
158
285k
            cache_type = FileCacheType::INDEX;
159
767k
        } else if (io_context->is_disposable) {
160
6.01k
            cache_type = FileCacheType::DISPOSABLE;
161
761k
        } else {
162
761k
            cache_type = FileCacheType::NORMAL;
163
761k
        }
164
1.05M
        query_id = io_context->query_id ? *io_context->query_id : TUniqueId();
165
1.05M
        is_warmup = io_context->is_warmup;
166
1.05M
    }
167
63.1k
    CacheContext() = default;
168
    bool operator==(const CacheContext& rhs) const {
169
        return query_id == rhs.query_id && cache_type == rhs.cache_type &&
170
               expiration_time == rhs.expiration_time && is_cold_data == rhs.is_cold_data;
171
    }
172
    TUniqueId query_id;
173
    FileCacheType cache_type;
174
    int64_t expiration_time {0};
175
    bool is_cold_data {false};
176
    ReadStatistics* stats;
177
    bool is_warmup {false};
178
    int64_t tablet_id {0};
179
};
180
181
template <class Lock>
182
concept IsXLock = std::same_as<Lock, std::lock_guard<std::mutex>> ||
183
                  std::same_as<Lock, std::unique_lock<std::mutex>>;
184
185
class LRUQueue {
186
public:
187
1.43k
    LRUQueue() = default;
188
    LRUQueue(size_t max_size, size_t max_element_size, int64_t hot_data_interval)
189
729
            : max_size(max_size),
190
729
              max_element_size(max_element_size),
191
729
              hot_data_interval(hot_data_interval) {}
192
193
    struct HashFileKeyAndOffset {
194
2.66M
        std::size_t operator()(const std::pair<UInt128Wrapper, size_t>& pair) const {
195
2.66M
            return KeyHash()(pair.first) + pair.second;
196
2.66M
        }
197
    };
198
199
    struct FileKeyAndOffset {
200
        UInt128Wrapper hash;
201
        size_t offset;
202
        size_t size;
203
204
        FileKeyAndOffset(const UInt128Wrapper& hash, size_t offset, size_t size)
205
668k
                : hash(hash), offset(offset), size(size) {}
206
    };
207
208
    using Iterator = typename std::list<FileKeyAndOffset>::iterator;
209
210
80.9k
    size_t get_max_size() const { return max_size; }
211
1.28k
    size_t get_max_element_size() const { return max_element_size; }
212
213
    template <class T>
214
        requires IsXLock<T>
215
109k
    size_t get_capacity(T& /* cache_lock */) const {
216
109k
        return cache_size;
217
109k
    }
218
219
689
    size_t get_capacity_unsafe() const { return cache_size; }
220
221
716
    size_t get_elements_num_unsafe() const { return queue.size(); }
222
223
18.0k
    size_t get_elements_num(std::lock_guard<std::mutex>& /* cache_lock */) const {
224
18.0k
        return queue.size();
225
18.0k
    }
226
227
    Iterator add(const UInt128Wrapper& hash, size_t offset, size_t size,
228
                 std::lock_guard<std::mutex>& cache_lock);
229
    template <class T>
230
        requires IsXLock<T>
231
645k
    void remove(Iterator queue_it, T& /* cache_lock */) {
232
645k
        cache_size -= queue_it->size;
233
645k
        map.erase(std::make_pair(queue_it->hash, queue_it->offset));
234
645k
        queue.erase(queue_it);
235
645k
    }
236
237
    void move_to_end(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock);
238
239
    void resize(Iterator queue_it, size_t new_size, std::lock_guard<std::mutex>& cache_lock);
240
241
    std::string to_string(std::lock_guard<std::mutex>& cache_lock) const;
242
243
    bool contains(const UInt128Wrapper& hash, size_t offset,
244
                  std::lock_guard<std::mutex>& cache_lock) const;
245
246
887k
    Iterator begin() { return queue.begin(); }
247
248
888k
    Iterator end() { return queue.end(); }
249
250
    void remove_all(std::lock_guard<std::mutex>& cache_lock);
251
252
    Iterator get(const UInt128Wrapper& hash, size_t offset,
253
                 std::lock_guard<std::mutex>& /* cache_lock */) const;
254
255
135k
    int64_t get_hot_data_interval() const { return hot_data_interval; }
256
257
140
    void clear(std::lock_guard<std::mutex>& cache_lock) {
258
140
        queue.clear();
259
140
        map.clear();
260
140
        cache_size = 0;
261
140
    }
262
263
    size_t levenshtein_distance_from(LRUQueue& base, std::lock_guard<std::mutex>& cache_lock);
264
265
    size_t max_size;
266
    size_t max_element_size;
267
    std::list<FileKeyAndOffset> queue;
268
    std::unordered_map<std::pair<UInt128Wrapper, size_t>, Iterator, HashFileKeyAndOffset> map;
269
    size_t cache_size = 0;
270
    int64_t hot_data_interval {0};
271
};
272
struct FileCacheInfo {
273
    UInt128Wrapper hash {0};
274
    uint64_t expiration_time {0};
275
    uint64_t size {0};
276
    size_t offset {0};
277
    bool is_tmp {false};
278
    FileCacheType cache_type {NORMAL};
279
280
    std::string to_string() const;
281
};
282
283
class InconsistencyType {
284
    uint32_t type;
285
286
public:
287
    enum : uint32_t {
288
        // No anomaly
289
        NONE = 0,
290
        // Missing a block cache metadata in _files
291
        NOT_LOADED = 1 << 0,
292
        // A block cache is missing in storage
293
        MISSING_IN_STORAGE = 1 << 1,
294
        // Size of a block cache recorded in _files is inconsistent with the storage
295
        SIZE_INCONSISTENT = 1 << 2,
296
        // Cache type of a block cache recorded in _files is inconsistent with the storage
297
        CACHE_TYPE_INCONSISTENT = 1 << 3,
298
        // Expiration time of a block cache recorded in _files is inconsistent with the storage
299
        EXPIRATION_TIME_INCONSISTENT = 1 << 4,
300
        // File in storage has a _tmp suffix, but the state of block cache in _files is not set to downloading
301
        TMP_FILE_EXPECT_DOWNLOADING_STATE = 1 << 5
302
    };
303
0
    InconsistencyType(uint32_t t = 0) : type(t) {}
304
0
    operator uint32_t&() { return type; }
305
306
    std::string to_string() const;
307
};
308
309
struct InconsistencyContext {
310
    // The infos in _files of BlockFileCache.
311
    std::vector<FileCacheInfo> infos_in_manager;
312
    std::vector<FileCacheInfo> infos_in_storage;
313
    std::vector<InconsistencyType> types;
314
};
315
316
std::optional<int64_t> get_tablet_id(std::string file_path);
317
318
} // namespace doris::io