Coverage Report

Created: 2026-06-02 17:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/cache/file_cache_common.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h
19
// and modified by Doris
20
21
#pragma once
22
#include <cstdint>
23
#include <string>
24
#include <vector>
25
26
#include "core/uint128.h"
27
#include "io/io_common.h"
28
29
namespace doris::io {
30
31
inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024;
32
inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024;
33
inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;
34
inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5;
35
inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5;
36
inline static constexpr size_t DEFAULT_TTL_PERCENT = 50;
37
38
using uint128_t = UInt128;
39
40
enum FileCacheType {
41
    INDEX = 2,
42
    NORMAL = 1,
43
    DISPOSABLE = 0,
44
    TTL = 3,
45
};
46
std::string cache_type_to_surfix(FileCacheType type);
47
FileCacheType surfix_to_cache_type(const std::string& str);
48
49
FileCacheType string_to_cache_type(const std::string& str);
50
std::string cache_type_to_string(FileCacheType type);
51
52
struct UInt128Wrapper {
53
    uint128_t value_;
54
    [[nodiscard]] std::string to_string() const;
55
56
    UInt128Wrapper() = default;
57
103k
    explicit UInt128Wrapper(const uint128_t& value) : value_(value) {}
58
59
6.54M
    bool operator==(const UInt128Wrapper& other) const { return value_ == other.value_; }
60
61
61.7k
    uint64_t high() const { return static_cast<uint64_t>(value_ >> 64); }
62
61.7k
    uint64_t low() const { return static_cast<uint64_t>(value_); }
63
64
    friend std::ostream& operator<<(std::ostream& os, const UInt128Wrapper& wrapper) {
65
        os << "UInt128Wrapper(" << wrapper.high() << ", " << wrapper.low() << ")";
66
        return os;
67
    }
68
};
69
70
struct ReadStatistics {
71
    bool hit_cache = true;
72
    bool from_peer_cache = false;
73
    bool skip_cache = false;
74
    int64_t bytes_read = 0;
75
    int64_t bytes_read_from_local = 0;
76
    int64_t bytes_read_from_remote = 0;
77
    int64_t bytes_read_from_peer = 0;
78
    int64_t bytes_write_into_file_cache = 0;
79
    int64_t remote_read_timer = 0;
80
    int64_t peer_read_timer = 0;
81
    int64_t remote_wait_timer = 0; // wait for other downloader
82
    int64_t local_read_timer = 0;
83
    int64_t local_write_timer = 0;
84
    int64_t read_cache_file_directly_timer = 0;
85
    int64_t cache_get_or_set_timer = 0;
86
    int64_t lock_wait_timer = 0;
87
    int64_t get_timer = 0;
88
    int64_t set_timer = 0;
89
};
90
91
class BlockFileCache;
92
struct FileBlocksHolder;
93
using FileBlocksHolderPtr = std::unique_ptr<FileBlocksHolder>;
94
95
struct FileCacheAllocatorBuilder {
96
    bool _is_cold_data;
97
    uint64_t _expiration_time;
98
    UInt128Wrapper _cache_hash;
99
    BlockFileCache* _cache; // Only one ref, the lifetime is owned by FileCache
100
    FileBlocksHolderPtr allocate_cache_holder(size_t offset, size_t size, int64_t tablet_id) const;
101
};
102
103
struct KeyHash {
104
6.33M
    std::size_t operator()(const UInt128Wrapper& w) const {
105
6.33M
        return util_hash::HashLen16(w.value_.low(), w.value_.high());
106
6.33M
    }
107
};
108
109
using AccessKeyAndOffset = std::pair<UInt128Wrapper, size_t>;
110
struct KeyAndOffsetHash {
111
449k
    std::size_t operator()(const AccessKeyAndOffset& key) const {
112
449k
        return KeyHash()(key.first) ^ std::hash<uint64_t>()(key.second);
113
449k
    }
114
};
115
116
struct KeyMeta {
117
    uint64_t expiration_time; // absolute time
118
    FileCacheType type;
119
    int64_t tablet_id {0};
120
    std::string table_name;
121
    std::string partition_name;
122
    uint64_t context_id {0};
123
};
124
125
struct FileCacheKey {
126
    UInt128Wrapper hash;
127
    size_t offset;
128
    KeyMeta meta;
129
};
130
131
struct FileCacheSettings {
132
    size_t capacity {0};
133
    size_t disposable_queue_size {0};
134
    size_t disposable_queue_elements {0};
135
    size_t index_queue_size {0};
136
    size_t index_queue_elements {0};
137
    size_t query_queue_size {0};
138
    size_t query_queue_elements {0};
139
    size_t ttl_queue_size {0};
140
    size_t ttl_queue_elements {0};
141
    size_t max_file_block_size {0};
142
    size_t max_query_cache_size {0};
143
    std::string storage;
144
145
    // to string
146
    std::string to_string() const;
147
};
148
149
FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size,
150
                                          size_t normal_percent = DEFAULT_NORMAL_PERCENT,
151
                                          size_t disposable_percent = DEFAULT_DISPOSABLE_PERCENT,
152
                                          size_t index_percent = DEFAULT_INDEX_PERCENT,
153
                                          size_t ttl_percent = DEFAULT_TTL_PERCENT,
154
                                          const std::string& storage = "disk");
155
156
struct CacheContext {
157
740k
    CacheContext(const IOContext* io_context) {
158
740k
        if (io_context->expiration_time != 0) {
159
2
            cache_type = FileCacheType::TTL;
160
2
            expiration_time = io_context->expiration_time;
161
740k
        } else if (io_context->is_index_data) {
162
9.38k
            cache_type = FileCacheType::INDEX;
163
730k
        } else if (io_context->is_disposable) {
164
775
            cache_type = FileCacheType::DISPOSABLE;
165
729k
        } else {
166
729k
            cache_type = FileCacheType::NORMAL;
167
729k
        }
168
740k
        query_id = io_context->query_id ? *io_context->query_id : TUniqueId();
169
740k
        is_warmup = io_context->is_warmup;
170
740k
        table_name = io_context->table_name;
171
740k
        partition_name = io_context->partition_name;
172
740k
    }
173
15.1k
    CacheContext() = default;
174
    bool operator==(const CacheContext& rhs) const {
175
        return query_id == rhs.query_id && cache_type == rhs.cache_type &&
176
               expiration_time == rhs.expiration_time && is_cold_data == rhs.is_cold_data;
177
    }
178
    TUniqueId query_id;
179
    FileCacheType cache_type;
180
    int64_t expiration_time {0};
181
    bool is_cold_data {false};
182
    ReadStatistics* stats;
183
    bool is_warmup {false};
184
    int64_t tablet_id {0};
185
    std::string table_name;
186
    std::string partition_name;
187
    uint64_t context_id {0};
188
};
189
190
template <class Lock>
191
concept IsXLock = std::same_as<Lock, std::lock_guard<std::mutex>> ||
192
                  std::same_as<Lock, std::unique_lock<std::mutex>>;
193
194
class LRUQueue {
195
public:
196
1.46k
    LRUQueue() = default;
197
    LRUQueue(size_t max_size, size_t max_element_size, int64_t hot_data_interval)
198
725
            : max_size(max_size),
199
725
              max_element_size(max_element_size),
200
725
              hot_data_interval(hot_data_interval) {}
201
202
    struct HashFileKeyAndOffset {
203
662k
        std::size_t operator()(const std::pair<UInt128Wrapper, size_t>& pair) const {
204
662k
            return KeyHash()(pair.first) + pair.second;
205
662k
        }
206
    };
207
208
    struct FileKeyAndOffset {
209
        UInt128Wrapper hash;
210
        size_t offset;
211
        size_t size;
212
213
        FileKeyAndOffset(const UInt128Wrapper& hash, size_t offset, size_t size)
214
66.2k
                : hash(hash), offset(offset), size(size) {}
215
    };
216
217
    using Iterator = typename std::list<FileKeyAndOffset>::iterator;
218
219
22.9k
    size_t get_max_size() const { return max_size; }
220
688
    size_t get_max_element_size() const { return max_element_size; }
221
222
    template <class T>
223
        requires IsXLock<T>
224
31.1k
    size_t get_capacity(T& /* cache_lock */) const {
225
31.1k
        return cache_size;
226
31.1k
    }
227
228
689
    size_t get_capacity_unsafe() const { return cache_size; }
229
230
716
    size_t get_elements_num_unsafe() const { return queue.size(); }
231
232
5.24k
    size_t get_elements_num(std::lock_guard<std::mutex>& /* cache_lock */) const {
233
5.24k
        return queue.size();
234
5.24k
    }
235
236
    Iterator add(const UInt128Wrapper& hash, size_t offset, size_t size,
237
                 std::lock_guard<std::mutex>& cache_lock);
238
    template <class T>
239
        requires IsXLock<T>
240
48.2k
    void remove(Iterator queue_it, T& /* cache_lock */) {
241
48.2k
        cache_size -= queue_it->size;
242
48.2k
        map.erase(std::make_pair(queue_it->hash, queue_it->offset));
243
48.2k
        queue.erase(queue_it);
244
48.2k
    }
245
246
    void move_to_end(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock);
247
248
    void resize(Iterator queue_it, size_t new_size, std::lock_guard<std::mutex>& cache_lock);
249
250
    std::string to_string(std::lock_guard<std::mutex>& cache_lock) const;
251
252
    bool contains(const UInt128Wrapper& hash, size_t offset,
253
                  std::lock_guard<std::mutex>& cache_lock) const;
254
255
133k
    Iterator begin() { return queue.begin(); }
256
257
133k
    Iterator end() { return queue.end(); }
258
259
    void remove_all(std::lock_guard<std::mutex>& cache_lock);
260
261
    Iterator get(const UInt128Wrapper& hash, size_t offset,
262
                 std::lock_guard<std::mutex>& /* cache_lock */) const;
263
264
38.2k
    int64_t get_hot_data_interval() const { return hot_data_interval; }
265
266
108
    void clear(std::lock_guard<std::mutex>& cache_lock) {
267
108
        queue.clear();
268
108
        map.clear();
269
108
        cache_size = 0;
270
108
    }
271
272
    size_t levenshtein_distance_from(LRUQueue& base, std::lock_guard<std::mutex>& cache_lock);
273
274
    size_t max_size;
275
    size_t max_element_size;
276
    std::list<FileKeyAndOffset> queue;
277
    std::unordered_map<std::pair<UInt128Wrapper, size_t>, Iterator, HashFileKeyAndOffset> map;
278
    size_t cache_size = 0;
279
    int64_t hot_data_interval {0};
280
};
281
struct FileCacheInfo {
282
    UInt128Wrapper hash {0};
283
    uint64_t expiration_time {0};
284
    uint64_t size {0};
285
    size_t offset {0};
286
    bool is_tmp {false};
287
    FileCacheType cache_type {NORMAL};
288
289
    std::string to_string() const;
290
};
291
292
class InconsistencyType {
293
    uint32_t type;
294
295
public:
296
    enum : uint32_t {
297
        // No anomaly
298
        NONE = 0,
299
        // Missing a block cache metadata in _files
300
        NOT_LOADED = 1 << 0,
301
        // A block cache is missing in storage
302
        MISSING_IN_STORAGE = 1 << 1,
303
        // Size of a block cache recorded in _files is inconsistent with the storage
304
        SIZE_INCONSISTENT = 1 << 2,
305
        // Cache type of a block cache recorded in _files is inconsistent with the storage
306
        CACHE_TYPE_INCONSISTENT = 1 << 3,
307
        // Expiration time of a block cache recorded in _files is inconsistent with the storage
308
        EXPIRATION_TIME_INCONSISTENT = 1 << 4,
309
        // File in storage has a _tmp suffix, but the state of block cache in _files is not set to downloading
310
        TMP_FILE_EXPECT_DOWNLOADING_STATE = 1 << 5
311
    };
312
0
    InconsistencyType(uint32_t t = 0) : type(t) {}
313
0
    operator uint32_t&() { return type; }
314
315
    std::string to_string() const;
316
};
317
318
struct InconsistencyContext {
319
    // The infos in _files of BlockFileCache.
320
    std::vector<FileCacheInfo> infos_in_manager;
321
    std::vector<FileCacheInfo> infos_in_storage;
322
    std::vector<InconsistencyType> types;
323
};
324
325
std::optional<int64_t> get_tablet_id(std::string file_path);
326
327
} // namespace doris::io