Coverage Report

Created: 2026-03-12 14:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/cache/file_cache_common.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h
19
// and modified by Doris
20
21
#pragma once
22
#include <cstdint>
23
#include <vector>
24
25
#include "core/uint128.h"
26
#include "io/io_common.h"
27
28
namespace doris::io {
29
30
inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024;
31
inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024;
32
inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;
33
inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5;
34
inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5;
35
inline static constexpr size_t DEFAULT_TTL_PERCENT = 50;
36
37
using uint128_t = UInt128;
38
39
enum FileCacheType {
40
    INDEX = 2,
41
    NORMAL = 1,
42
    DISPOSABLE = 0,
43
    TTL = 3,
44
};
45
std::string cache_type_to_surfix(FileCacheType type);
46
FileCacheType surfix_to_cache_type(const std::string& str);
47
48
FileCacheType string_to_cache_type(const std::string& str);
49
std::string cache_type_to_string(FileCacheType type);
50
51
struct UInt128Wrapper {
52
    uint128_t value_;
53
    [[nodiscard]] std::string to_string() const;
54
55
    UInt128Wrapper() = default;
56
1.98M
    explicit UInt128Wrapper(const uint128_t& value) : value_(value) {}
57
58
9.79M
    bool operator==(const UInt128Wrapper& other) const { return value_ == other.value_; }
59
60
553k
    uint64_t high() const { return static_cast<uint64_t>(value_ >> 64); }
61
553k
    uint64_t low() const { return static_cast<uint64_t>(value_); }
62
63
    friend std::ostream& operator<<(std::ostream& os, const UInt128Wrapper& wrapper) {
64
        os << "UInt128Wrapper(" << wrapper.high() << ", " << wrapper.low() << ")";
65
        return os;
66
    }
67
};
68
69
struct ReadStatistics {
70
    bool hit_cache = true;
71
    bool from_peer_cache = false;
72
    bool skip_cache = false;
73
    int64_t bytes_read = 0;
74
    int64_t bytes_write_into_file_cache = 0;
75
    int64_t remote_read_timer = 0;
76
    int64_t peer_read_timer = 0;
77
    int64_t remote_wait_timer = 0; // wait for other downloader
78
    int64_t local_read_timer = 0;
79
    int64_t local_write_timer = 0;
80
    int64_t read_cache_file_directly_timer = 0;
81
    int64_t cache_get_or_set_timer = 0;
82
    int64_t lock_wait_timer = 0;
83
    int64_t get_timer = 0;
84
    int64_t set_timer = 0;
85
};
86
87
class BlockFileCache;
88
struct FileBlocksHolder;
89
using FileBlocksHolderPtr = std::unique_ptr<FileBlocksHolder>;
90
91
struct FileCacheAllocatorBuilder {
92
    bool _is_cold_data;
93
    uint64_t _expiration_time;
94
    UInt128Wrapper _cache_hash;
95
    BlockFileCache* _cache; // Only one ref, the lifetime is owned by FileCache
96
    FileBlocksHolderPtr allocate_cache_holder(size_t offset, size_t size, int64_t tablet_id) const;
97
};
98
99
struct KeyHash {
100
13.1M
    std::size_t operator()(const UInt128Wrapper& w) const {
101
13.1M
        return util_hash::HashLen16(w.value_.low(), w.value_.high());
102
13.1M
    }
103
};
104
105
using AccessKeyAndOffset = std::pair<UInt128Wrapper, size_t>;
106
struct KeyAndOffsetHash {
107
2.40M
    std::size_t operator()(const AccessKeyAndOffset& key) const {
108
2.40M
        return KeyHash()(key.first) ^ std::hash<uint64_t>()(key.second);
109
2.40M
    }
110
};
111
112
struct KeyMeta {
113
    uint64_t expiration_time; // absolute time
114
    FileCacheType type;
115
    int64_t tablet_id {0};
116
};
117
118
struct FileCacheKey {
119
    UInt128Wrapper hash;
120
    size_t offset;
121
    KeyMeta meta;
122
};
123
124
struct FileCacheSettings {
125
    size_t capacity {0};
126
    size_t disposable_queue_size {0};
127
    size_t disposable_queue_elements {0};
128
    size_t index_queue_size {0};
129
    size_t index_queue_elements {0};
130
    size_t query_queue_size {0};
131
    size_t query_queue_elements {0};
132
    size_t ttl_queue_size {0};
133
    size_t ttl_queue_elements {0};
134
    size_t max_file_block_size {0};
135
    size_t max_query_cache_size {0};
136
    std::string storage;
137
138
    // to string
139
    std::string to_string() const;
140
};
141
142
FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size,
143
                                          size_t normal_percent = DEFAULT_NORMAL_PERCENT,
144
                                          size_t disposable_percent = DEFAULT_DISPOSABLE_PERCENT,
145
                                          size_t index_percent = DEFAULT_INDEX_PERCENT,
146
                                          size_t ttl_percent = DEFAULT_TTL_PERCENT,
147
                                          const std::string& storage = "disk");
148
149
struct CacheContext {
150
2.94M
    CacheContext(const IOContext* io_context) {
151
2.94M
        if (io_context->expiration_time != 0) {
152
2
            cache_type = FileCacheType::TTL;
153
2
            expiration_time = io_context->expiration_time;
154
2.94M
        } else if (io_context->is_index_data) {
155
857k
            cache_type = FileCacheType::INDEX;
156
2.08M
        } else if (io_context->is_disposable) {
157
945k
            cache_type = FileCacheType::DISPOSABLE;
158
1.13M
        } else {
159
1.13M
            cache_type = FileCacheType::NORMAL;
160
1.13M
        }
161
2.94M
        query_id = io_context->query_id ? *io_context->query_id : TUniqueId();
162
2.94M
        is_warmup = io_context->is_warmup;
163
2.94M
    }
164
67.2k
    CacheContext() = default;
165
    bool operator==(const CacheContext& rhs) const {
166
        return query_id == rhs.query_id && cache_type == rhs.cache_type &&
167
               expiration_time == rhs.expiration_time && is_cold_data == rhs.is_cold_data;
168
    }
169
    TUniqueId query_id;
170
    FileCacheType cache_type;
171
    int64_t expiration_time {0};
172
    bool is_cold_data {false};
173
    ReadStatistics* stats;
174
    bool is_warmup {false};
175
    int64_t tablet_id {0};
176
};
177
178
template <class Lock>
179
concept IsXLock = std::same_as<Lock, std::lock_guard<std::mutex>> ||
180
                  std::same_as<Lock, std::unique_lock<std::mutex>>;
181
182
class LRUQueue {
183
public:
184
1.34k
    LRUQueue() = default;
185
    LRUQueue(size_t max_size, size_t max_element_size, int64_t hot_data_interval)
186
661
            : max_size(max_size),
187
661
              max_element_size(max_element_size),
188
661
              hot_data_interval(hot_data_interval) {}
189
190
    struct HashFileKeyAndOffset {
191
3.87M
        std::size_t operator()(const std::pair<UInt128Wrapper, size_t>& pair) const {
192
3.87M
            return KeyHash()(pair.first) + pair.second;
193
3.87M
        }
194
    };
195
196
    struct FileKeyAndOffset {
197
        UInt128Wrapper hash;
198
        size_t offset;
199
        size_t size;
200
201
        FileKeyAndOffset(const UInt128Wrapper& hash, size_t offset, size_t size)
202
709k
                : hash(hash), offset(offset), size(size) {}
203
    };
204
205
    using Iterator = typename std::list<FileKeyAndOffset>::iterator;
206
207
22.5k
    size_t get_max_size() const { return max_size; }
208
692
    size_t get_max_element_size() const { return max_element_size; }
209
210
    template <class T>
211
        requires IsXLock<T>
212
39.0k
    size_t get_capacity(T& /* cache_lock */) const {
213
39.0k
        return cache_size;
214
39.0k
    }
215
216
689
    size_t get_capacity_unsafe() const { return cache_size; }
217
218
716
    size_t get_elements_num_unsafe() const { return queue.size(); }
219
220
8.32k
    size_t get_elements_num(std::lock_guard<std::mutex>& /* cache_lock */) const {
221
8.32k
        return queue.size();
222
8.32k
    }
223
224
    Iterator add(const UInt128Wrapper& hash, size_t offset, size_t size,
225
                 std::lock_guard<std::mutex>& cache_lock);
226
    template <class T>
227
        requires IsXLock<T>
228
699k
    void remove(Iterator queue_it, T& /* cache_lock */) {
229
699k
        cache_size -= queue_it->size;
230
699k
        map.erase(std::make_pair(queue_it->hash, queue_it->offset));
231
699k
        queue.erase(queue_it);
232
699k
    }
233
234
    void move_to_end(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock);
235
236
    void resize(Iterator queue_it, size_t new_size, std::lock_guard<std::mutex>& cache_lock);
237
238
    std::string to_string(std::lock_guard<std::mutex>& cache_lock) const;
239
240
    bool contains(const UInt128Wrapper& hash, size_t offset,
241
                  std::lock_guard<std::mutex>& cache_lock) const;
242
243
738k
    Iterator begin() { return queue.begin(); }
244
245
738k
    Iterator end() { return queue.end(); }
246
247
    void remove_all(std::lock_guard<std::mutex>& cache_lock);
248
249
    Iterator get(const UInt128Wrapper& hash, size_t offset,
250
                 std::lock_guard<std::mutex>& /* cache_lock */) const;
251
252
14.9k
    int64_t get_hot_data_interval() const { return hot_data_interval; }
253
254
100
    void clear(std::lock_guard<std::mutex>& cache_lock) {
255
100
        queue.clear();
256
100
        map.clear();
257
100
        cache_size = 0;
258
100
    }
259
260
    size_t levenshtein_distance_from(LRUQueue& base, std::lock_guard<std::mutex>& cache_lock);
261
262
    size_t max_size;
263
    size_t max_element_size;
264
    std::list<FileKeyAndOffset> queue;
265
    std::unordered_map<std::pair<UInt128Wrapper, size_t>, Iterator, HashFileKeyAndOffset> map;
266
    size_t cache_size = 0;
267
    int64_t hot_data_interval {0};
268
};
269
struct FileCacheInfo {
270
    UInt128Wrapper hash {0};
271
    uint64_t expiration_time {0};
272
    uint64_t size {0};
273
    size_t offset {0};
274
    bool is_tmp {false};
275
    FileCacheType cache_type {NORMAL};
276
277
    std::string to_string() const;
278
};
279
280
class InconsistencyType {
281
    uint32_t type;
282
283
public:
284
    enum : uint32_t {
285
        // No anomaly
286
        NONE = 0,
287
        // Missing a block cache metadata in _files
288
        NOT_LOADED = 1 << 0,
289
        // A block cache is missing in storage
290
        MISSING_IN_STORAGE = 1 << 1,
291
        // Size of a block cache recorded in _files is inconsistent with the storage
292
        SIZE_INCONSISTENT = 1 << 2,
293
        // Cache type of a block cache recorded in _files is inconsistent with the storage
294
        CACHE_TYPE_INCONSISTENT = 1 << 3,
295
        // Expiration time of a block cache recorded in _files is inconsistent with the storage
296
        EXPIRATION_TIME_INCONSISTENT = 1 << 4,
297
        // File in storage has a _tmp suffix, but the state of block cache in _files is not set to downloading
298
        TMP_FILE_EXPECT_DOWNLOADING_STATE = 1 << 5
299
    };
300
0
    InconsistencyType(uint32_t t = 0) : type(t) {}
301
0
    operator uint32_t&() { return type; }
302
303
    std::string to_string() const;
304
};
305
306
struct InconsistencyContext {
307
    // The infos in _files of BlockFileCache.
308
    std::vector<FileCacheInfo> infos_in_manager;
309
    std::vector<FileCacheInfo> infos_in_storage;
310
    std::vector<InconsistencyType> types;
311
};
312
313
std::optional<int64_t> get_tablet_id(std::string file_path);
314
315
} // namespace doris::io