Coverage Report

Created: 2026-06-01 18:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/cache/file_cache_common.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h
19
// and modified by Doris
20
21
#pragma once
22
#include <cstdint>
23
#include <string>
24
#include <vector>
25
26
#include "core/uint128.h"
27
#include "io/io_common.h"
28
29
namespace doris::io {
30
31
inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024;
32
inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024;
33
inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;
34
inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5;
35
inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5;
36
inline static constexpr size_t DEFAULT_TTL_PERCENT = 50;
37
38
using uint128_t = UInt128;
39
40
enum FileCacheType {
41
    INDEX = 2,
42
    NORMAL = 1,
43
    DISPOSABLE = 0,
44
    TTL = 3,
45
};
46
std::string cache_type_to_surfix(FileCacheType type);
47
FileCacheType surfix_to_cache_type(const std::string& str);
48
49
FileCacheType string_to_cache_type(const std::string& str);
50
std::string cache_type_to_string(FileCacheType type);
51
52
struct UInt128Wrapper {
53
    uint128_t value_;
54
    [[nodiscard]] std::string to_string() const;
55
56
    UInt128Wrapper() = default;
57
103k
    explicit UInt128Wrapper(const uint128_t& value) : value_(value) {}
58
59
9.44M
    bool operator==(const UInt128Wrapper& other) const { return value_ == other.value_; }
60
61
81.9k
    uint64_t high() const { return static_cast<uint64_t>(value_ >> 64); }
62
81.9k
    uint64_t low() const { return static_cast<uint64_t>(value_); }
63
64
    friend std::ostream& operator<<(std::ostream& os, const UInt128Wrapper& wrapper) {
65
        os << "UInt128Wrapper(" << wrapper.high() << ", " << wrapper.low() << ")";
66
        return os;
67
    }
68
};
69
70
struct ReadStatistics {
71
    bool hit_cache = true;
72
    bool from_peer_cache = false;
73
    bool skip_cache = false;
74
    int64_t bytes_read = 0;
75
    int64_t bytes_write_into_file_cache = 0;
76
    int64_t remote_read_timer = 0;
77
    int64_t peer_read_timer = 0;
78
    int64_t remote_wait_timer = 0; // wait for other downloader
79
    int64_t local_read_timer = 0;
80
    int64_t local_write_timer = 0;
81
    int64_t read_cache_file_directly_timer = 0;
82
    int64_t cache_get_or_set_timer = 0;
83
    int64_t lock_wait_timer = 0;
84
    int64_t get_timer = 0;
85
    int64_t set_timer = 0;
86
};
87
88
class BlockFileCache;
89
struct FileBlocksHolder;
90
using FileBlocksHolderPtr = std::unique_ptr<FileBlocksHolder>;
91
92
struct FileCacheAllocatorBuilder {
93
    bool _is_cold_data;
94
    uint64_t _expiration_time;
95
    UInt128Wrapper _cache_hash;
96
    BlockFileCache* _cache; // Only one ref, the lifetime is owned by FileCache
97
    FileBlocksHolderPtr allocate_cache_holder(size_t offset, size_t size, int64_t tablet_id) const;
98
};
99
100
struct KeyHash {
101
9.27M
    std::size_t operator()(const UInt128Wrapper& w) const {
102
9.27M
        return util_hash::HashLen16(w.value_.low(), w.value_.high());
103
9.27M
    }
104
};
105
106
using AccessKeyAndOffset = std::pair<UInt128Wrapper, size_t>;
107
struct KeyAndOffsetHash {
108
1.31M
    std::size_t operator()(const AccessKeyAndOffset& key) const {
109
1.31M
        return KeyHash()(key.first) ^ std::hash<uint64_t>()(key.second);
110
1.31M
    }
111
};
112
113
struct KeyMeta {
114
    uint64_t expiration_time; // absolute time
115
    FileCacheType type;
116
    int64_t tablet_id {0};
117
    std::string table_name;
118
    std::string partition_name;
119
    uint64_t context_id {0};
120
};
121
122
struct FileCacheKey {
123
    UInt128Wrapper hash;
124
    size_t offset;
125
    KeyMeta meta;
126
};
127
128
struct FileCacheSettings {
129
    size_t capacity {0};
130
    size_t disposable_queue_size {0};
131
    size_t disposable_queue_elements {0};
132
    size_t index_queue_size {0};
133
    size_t index_queue_elements {0};
134
    size_t query_queue_size {0};
135
    size_t query_queue_elements {0};
136
    size_t ttl_queue_size {0};
137
    size_t ttl_queue_elements {0};
138
    size_t max_file_block_size {0};
139
    size_t max_query_cache_size {0};
140
    std::string storage;
141
142
    // to string
143
    std::string to_string() const;
144
};
145
146
FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size,
147
                                          size_t normal_percent = DEFAULT_NORMAL_PERCENT,
148
                                          size_t disposable_percent = DEFAULT_DISPOSABLE_PERCENT,
149
                                          size_t index_percent = DEFAULT_INDEX_PERCENT,
150
                                          size_t ttl_percent = DEFAULT_TTL_PERCENT,
151
                                          const std::string& storage = "disk");
152
153
struct CacheContext {
154
743k
    CacheContext(const IOContext* io_context) {
155
743k
        if (io_context->expiration_time != 0) {
156
2
            cache_type = FileCacheType::TTL;
157
2
            expiration_time = io_context->expiration_time;
158
743k
        } else if (io_context->is_index_data) {
159
15.7k
            cache_type = FileCacheType::INDEX;
160
728k
        } else if (io_context->is_disposable) {
161
404
            cache_type = FileCacheType::DISPOSABLE;
162
727k
        } else {
163
727k
            cache_type = FileCacheType::NORMAL;
164
727k
        }
165
743k
        query_id = io_context->query_id ? *io_context->query_id : TUniqueId();
166
743k
        is_warmup = io_context->is_warmup;
167
743k
        table_name = io_context->table_name;
168
743k
        partition_name = io_context->partition_name;
169
743k
    }
170
15.4k
    CacheContext() = default;
171
    bool operator==(const CacheContext& rhs) const {
172
        return query_id == rhs.query_id && cache_type == rhs.cache_type &&
173
               expiration_time == rhs.expiration_time && is_cold_data == rhs.is_cold_data;
174
    }
175
    TUniqueId query_id;
176
    FileCacheType cache_type;
177
    int64_t expiration_time {0};
178
    bool is_cold_data {false};
179
    ReadStatistics* stats;
180
    bool is_warmup {false};
181
    int64_t tablet_id {0};
182
    std::string table_name;
183
    std::string partition_name;
184
    uint64_t context_id {0};
185
};
186
187
template <class Lock>
188
concept IsXLock = std::same_as<Lock, std::lock_guard<std::mutex>> ||
189
                  std::same_as<Lock, std::unique_lock<std::mutex>>;
190
191
class LRUQueue {
192
public:
193
1.44k
    LRUQueue() = default;
194
    LRUQueue(size_t max_size, size_t max_element_size, int64_t hot_data_interval)
195
713
            : max_size(max_size),
196
713
              max_element_size(max_element_size),
197
713
              hot_data_interval(hot_data_interval) {}
198
199
    struct HashFileKeyAndOffset {
200
695k
        std::size_t operator()(const std::pair<UInt128Wrapper, size_t>& pair) const {
201
695k
            return KeyHash()(pair.first) + pair.second;
202
695k
        }
203
    };
204
205
    struct FileKeyAndOffset {
206
        UInt128Wrapper hash;
207
        size_t offset;
208
        size_t size;
209
210
        FileKeyAndOffset(const UInt128Wrapper& hash, size_t offset, size_t size)
211
77.2k
                : hash(hash), offset(offset), size(size) {}
212
    };
213
214
    using Iterator = typename std::list<FileKeyAndOffset>::iterator;
215
216
23.2k
    size_t get_max_size() const { return max_size; }
217
688
    size_t get_max_element_size() const { return max_element_size; }
218
219
    template <class T>
220
        requires IsXLock<T>
221
31.3k
    size_t get_capacity(T& /* cache_lock */) const {
222
31.3k
        return cache_size;
223
31.3k
    }
224
225
689
    size_t get_capacity_unsafe() const { return cache_size; }
226
227
716
    size_t get_elements_num_unsafe() const { return queue.size(); }
228
229
5.21k
    size_t get_elements_num(std::lock_guard<std::mutex>& /* cache_lock */) const {
230
5.21k
        return queue.size();
231
5.21k
    }
232
233
    Iterator add(const UInt128Wrapper& hash, size_t offset, size_t size,
234
                 std::lock_guard<std::mutex>& cache_lock);
235
    template <class T>
236
        requires IsXLock<T>
237
58.2k
    void remove(Iterator queue_it, T& /* cache_lock */) {
238
58.2k
        cache_size -= queue_it->size;
239
58.2k
        map.erase(std::make_pair(queue_it->hash, queue_it->offset));
240
58.2k
        queue.erase(queue_it);
241
58.2k
    }
242
243
    void move_to_end(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock);
244
245
    void resize(Iterator queue_it, size_t new_size, std::lock_guard<std::mutex>& cache_lock);
246
247
    std::string to_string(std::lock_guard<std::mutex>& cache_lock) const;
248
249
    bool contains(const UInt128Wrapper& hash, size_t offset,
250
                  std::lock_guard<std::mutex>& cache_lock) const;
251
252
143k
    Iterator begin() { return queue.begin(); }
253
254
143k
    Iterator end() { return queue.end(); }
255
256
    void remove_all(std::lock_guard<std::mutex>& cache_lock);
257
258
    Iterator get(const UInt128Wrapper& hash, size_t offset,
259
                 std::lock_guard<std::mutex>& /* cache_lock */) const;
260
261
36.6k
    int64_t get_hot_data_interval() const { return hot_data_interval; }
262
263
108
    void clear(std::lock_guard<std::mutex>& cache_lock) {
264
108
        queue.clear();
265
108
        map.clear();
266
108
        cache_size = 0;
267
108
    }
268
269
    size_t levenshtein_distance_from(LRUQueue& base, std::lock_guard<std::mutex>& cache_lock);
270
271
    size_t max_size;
272
    size_t max_element_size;
273
    std::list<FileKeyAndOffset> queue;
274
    std::unordered_map<std::pair<UInt128Wrapper, size_t>, Iterator, HashFileKeyAndOffset> map;
275
    size_t cache_size = 0;
276
    int64_t hot_data_interval {0};
277
};
278
struct FileCacheInfo {
279
    UInt128Wrapper hash {0};
280
    uint64_t expiration_time {0};
281
    uint64_t size {0};
282
    size_t offset {0};
283
    bool is_tmp {false};
284
    FileCacheType cache_type {NORMAL};
285
286
    std::string to_string() const;
287
};
288
289
class InconsistencyType {
290
    uint32_t type;
291
292
public:
293
    enum : uint32_t {
294
        // No anomaly
295
        NONE = 0,
296
        // Missing a block cache metadata in _files
297
        NOT_LOADED = 1 << 0,
298
        // A block cache is missing in storage
299
        MISSING_IN_STORAGE = 1 << 1,
300
        // Size of a block cache recorded in _files is inconsistent with the storage
301
        SIZE_INCONSISTENT = 1 << 2,
302
        // Cache type of a block cache recorded in _files is inconsistent with the storage
303
        CACHE_TYPE_INCONSISTENT = 1 << 3,
304
        // Expiration time of a block cache recorded in _files is inconsistent with the storage
305
        EXPIRATION_TIME_INCONSISTENT = 1 << 4,
306
        // File in storage has a _tmp suffix, but the state of block cache in _files is not set to downloading
307
        TMP_FILE_EXPECT_DOWNLOADING_STATE = 1 << 5
308
    };
309
0
    InconsistencyType(uint32_t t = 0) : type(t) {}
310
0
    operator uint32_t&() { return type; }
311
312
    std::string to_string() const;
313
};
314
315
struct InconsistencyContext {
316
    // The infos in _files of BlockFileCache.
317
    std::vector<FileCacheInfo> infos_in_manager;
318
    std::vector<FileCacheInfo> infos_in_storage;
319
    std::vector<InconsistencyType> types;
320
};
321
322
std::optional<int64_t> get_tablet_id(std::string file_path);
323
324
} // namespace doris::io