Coverage Report

Created: 2025-03-12 11:55

/root/doris/be/src/olap/lru_cache.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5
#pragma once
6
7
#include <butil/macros.h>
8
#include <bvar/bvar.h>
9
#include <glog/logging.h>
10
#include <gtest/gtest_prod.h>
11
12
#include <atomic>
13
#include <cassert>
14
#include <cstdint>
15
#include <cstdlib>
16
#include <cstring>
17
#include <functional>
18
#include <memory>
19
#include <set>
20
#include <string>
21
#include <utility>
22
23
#include "runtime/memory/lru_cache_value_base.h"
24
#include "util/doris_metrics.h"
25
#include "util/metrics.h"
26
27
namespace doris {
28
#include "common/compile_check_begin.h"
29
30
class Cache;
31
class LRUCachePolicy;
32
struct LRUHandle;
33
34
enum LRUCacheType {
35
    SIZE, // The capacity of cache is based on the memory size of cache entry, memory size = handle size + charge.
36
    NUMBER // The capacity of cache is based on the number of cache entry, number = charge, the weight of an entry.
37
};
38
39
static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
40
static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
41
static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
42
static constexpr bool DEFAULT_LRU_CACHE_IS_LRU_K = false;
43
44
class CacheKey {
45
public:
46
0
    CacheKey() : _size(0) {}
47
    // Create a slice that refers to d[0,n-1].
48
1.14M
    CacheKey(const char* d, size_t n) : _data(d), _size(n) {}
49
50
    // Create a slice that refers to the contents of "s"
51
26.7k
    CacheKey(const std::string& s) : _data(s.data()), _size(s.size()) {}
52
53
    // Create a slice that refers to s[0,strlen(s)-1]
54
50
    CacheKey(const char* s) : _data(s), _size(strlen(s)) {}
55
56
    ~CacheKey() = default;
57
58
    // Return a pointer to the beginning of the referenced data
59
1.73M
    const char* data() const { return _data; }
60
61
    // Return the length (in bytes) of the referenced data
62
2.88M
    size_t size() const { return _size; }
63
64
    // Return true if the length of the referenced data is zero
65
0
    bool empty() const { return _size == 0; }
66
67
    // Return the ith byte in the referenced data.
68
    // REQUIRES: n < size()
69
0
    char operator[](size_t n) const {
70
0
        assert(n < size());
71
0
        return _data[n];
72
0
    }
73
74
    // Change this slice to refer to an empty array
75
0
    void clear() {
76
0
        _data = nullptr;
77
0
        _size = 0;
78
0
    }
79
80
    // Drop the first "n" bytes from this slice.
81
0
    void remove_prefix(size_t n) {
82
0
        assert(n <= size());
83
0
        _data += n;
84
0
        _size -= n;
85
0
    }
86
87
    // Return a string that contains the copy of the referenced data.
88
0
    std::string to_string() const { return {_data, _size}; }
89
90
285k
    bool operator==(const CacheKey& other) const {
91
285k
        return ((size() == other.size()) && (memcmp(data(), other.data(), size()) == 0));
92
285k
    }
93
94
285k
    bool operator!=(const CacheKey& other) const { return !(*this == other); }
95
96
0
    int compare(const CacheKey& b) const {
97
0
        const size_t min_len = (_size < b._size) ? _size : b._size;
98
0
        int r = memcmp(_data, b._data, min_len);
99
0
        if (r == 0) {
100
0
            if (_size < b._size) {
101
0
                r = -1;
102
0
            } else if (_size > b._size) {
103
0
                r = +1;
104
0
            }
105
0
        }
106
0
        return r;
107
0
    }
108
109
    uint32_t hash(const char* data, size_t n, uint32_t seed) const;
110
111
    // Return true if "x" is a prefix of "*this"
112
0
    bool starts_with(const CacheKey& x) const {
113
0
        return ((_size >= x._size) && (memcmp(_data, x._data, x._size) == 0));
114
0
    }
115
116
private:
117
863k
    uint32_t _decode_fixed32(const char* ptr) const {
118
        // Load the raw bytes
119
863k
        uint32_t result;
120
863k
        memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
121
863k
        return result;
122
863k
    }
123
124
    const char* _data = nullptr;
125
    size_t _size;
126
};
127
128
// The entry with smaller CachePriority will evict firstly
129
enum class CachePriority { NORMAL = 0, DURABLE = 1 };
130
131
using CachePrunePredicate = std::function<bool(const LRUHandle*)>;
132
// CacheValueTimeExtractor can extract timestamp
133
// in cache value through the specified function,
134
// such as last_visit_time in InvertedIndexSearcherCache::CacheValue
135
using CacheValueTimeExtractor = std::function<int64_t(const void*)>;
136
struct PrunedInfo {
137
    int64_t pruned_count = 0;
138
    int64_t pruned_size = 0;
139
};
140
141
class Cache {
142
public:
143
483
    Cache() = default;
144
145
    // Destroys all existing entries by calling the "deleter"
146
    // function that was passed to the constructor.
147
476
    virtual ~Cache() = default;
148
149
    // Opaque handle to an entry stored in the cache.
150
    struct Handle {};
151
152
    // Insert a mapping from key->value into the cache and assign it
153
    // the specified charge against the total cache capacity.
154
    //
155
    // Returns a handle that corresponds to the mapping.  The caller
156
    // must call this->release(handle) when the returned mapping is no
157
    // longer needed.
158
    //
159
    // When the inserted entry is no longer needed, the key and
160
    // value will be passed to "deleter".
161
    //
162
    // if cache is lru k and cache is full, first insert of key will not succeed.
163
    //
164
    // Note: if is ShardedLRUCache, cache capacity = ShardedLRUCache_capacity / num_shards.
165
    virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
166
                           CachePriority priority = CachePriority::NORMAL) = 0;
167
168
    // If the cache has no mapping for "key", returns nullptr.
169
    //
170
    // Else return a handle that corresponds to the mapping.  The caller
171
    // must call this->release(handle) when the returned mapping is no
172
    // longer needed.
173
    virtual Handle* lookup(const CacheKey& key) = 0;
174
175
    // Release a mapping returned by a previous Lookup().
176
    // REQUIRES: handle must not have been released yet.
177
    // REQUIRES: handle must have been returned by a method on *this.
178
    virtual void release(Handle* handle) = 0;
179
180
    // Return the value encapsulated in a handle returned by a
181
    // successful lookup().
182
    // REQUIRES: handle must not have been released yet.
183
    // REQUIRES: handle must have been returned by a method on *this.
184
    virtual void* value(Handle* handle) = 0;
185
186
    // If the cache contains entry for key, erase it.  Note that the
187
    // underlying entry will be kept around until all existing handles
188
    // to it have been released.
189
    virtual void erase(const CacheKey& key) = 0;
190
191
    // Return a new numeric id.  May be used by multiple clients who are
192
    // sharing the same cache to partition the key space.  Typically the
193
    // client will allocate a new id at startup and prepend the id to
194
    // its cache keys.
195
    virtual uint64_t new_id() = 0;
196
197
    // Remove all cache entries that are not actively in use.  Memory-constrained
198
    // applications may wish to call this method to reduce memory usage.
199
    // Default implementation of Prune() does nothing.  Subclasses are strongly
200
    // encouraged to override the default implementation.  A future release of
201
    // leveldb may change prune() to a pure abstract method.
202
    // return num of entries being pruned.
203
0
    virtual PrunedInfo prune() { return {0, 0}; }
204
205
    // Same as prune(), but the entry will only be pruned if the predicate matched.
206
    // NOTICE: the predicate should be simple enough, or the prune_if() function
207
    // may hold lock for a long time to execute predicate.
208
0
    virtual PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) { return {0, 0}; }
209
210
    virtual int64_t get_usage() = 0;
211
212
    virtual PrunedInfo set_capacity(size_t capacity) = 0;
213
    virtual size_t get_capacity() = 0;
214
215
    virtual size_t get_element_count() = 0;
216
217
private:
218
    DISALLOW_COPY_AND_ASSIGN(Cache);
219
};
220
221
// An entry is a variable length heap-allocated structure.  Entries
222
// are kept in a circular doubly linked list ordered by access time.
223
// Note: member variables can only be POD types and raw pointer,
224
// cannot be class objects or smart pointers, because LRUHandle will be created using malloc.
225
struct LRUHandle {
226
    void* value = nullptr;
227
    struct LRUHandle* next_hash = nullptr; // next entry in hash table
228
    struct LRUHandle* next = nullptr;      // next entry in lru list
229
    struct LRUHandle* prev = nullptr;      // previous entry in lru list
230
    size_t charge;
231
    size_t key_length;
232
    size_t total_size; // Entry charge, used to limit cache capacity, LRUCacheType::SIZE including key length.
233
    bool in_cache; // Whether entry is in the cache.
234
    uint32_t refs;
235
    uint32_t hash; // Hash of key(); used for fast sharding and comparisons
236
    CachePriority priority = CachePriority::NORMAL;
237
    LRUCacheType type;
238
    int64_t last_visit_time; // Save the last visit time of this cache entry.
239
    char key_data[1];        // Beginning of key
240
    // Note! key_data must be at the end.
241
242
573k
    CacheKey key() const {
243
        // For cheaper lookups, we allow a temporary Handle object
244
        // to store a pointer to a key in "value".
245
573k
        if (next == this) {
246
0
            return *(reinterpret_cast<CacheKey*>(value));
247
573k
        } else {
248
573k
            return {key_data, key_length};
249
573k
        }
250
573k
    }
251
252
283k
    void free() {
253
283k
        if (value != nullptr) { // value allows null pointer.
254
283k
            delete (LRUCacheValueBase*)value;
255
283k
        }
256
283k
        ::free(this);
257
283k
    }
258
};
259
260
// We provide our own simple hash tablet since it removes a whole bunch
261
// of porting hacks and is also faster than some of the built-in hash
262
// tablet implementations in some of the compiler/runtime combinations
263
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
264
// 4.4.3's builtin hashtable.
265
266
class HandleTable {
267
public:
268
7.40k
    HandleTable() { _resize(); }
269
270
    ~HandleTable();
271
272
    LRUHandle* lookup(const CacheKey& key, uint32_t hash);
273
274
    LRUHandle* insert(LRUHandle* h);
275
276
    // Remove element from hash table by "key" and "hash".
277
    LRUHandle* remove(const CacheKey& key, uint32_t hash);
278
279
    // Remove element from hash table by "h", it would be faster
280
    // than the function above.
281
    // Return whether h is found and removed.
282
    bool remove(const LRUHandle* h);
283
284
    uint32_t element_count() const;
285
286
private:
287
    FRIEND_TEST(CacheTest, HandleTableTest);
288
289
    // The tablet consists of an array of buckets where each bucket is
290
    // a linked list of cache entries that hash into the bucket.
291
    uint32_t _length {};
292
    uint32_t _elems {};
293
    LRUHandle** _list = nullptr;
294
295
    // Return a pointer to slot that points to a cache entry that
296
    // matches key/hash.  If there is no such cache entry, return a
297
    // pointer to the trailing slot in the corresponding linked list.
298
    LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
299
300
    void _resize();
301
};
302
303
// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set,
304
// when need to free space, can first evict the begin of the set,
305
// because the begin element's timestamp is the oldest.
306
using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>;
307
308
// A single shard of sharded cache.
309
class LRUCache {
310
public:
311
    LRUCache(LRUCacheType type, bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K);
312
    ~LRUCache();
313
314
    // visits_lru_cache_key is the hash value of CacheKey.
315
    // If there is a hash conflict, a cache entry may be inserted early
316
    // and another cache entry with the same key hash may be inserted later.
317
    // Otherwise, this does not affect the correctness of the cache.
318
    using visits_lru_cache_key = uint32_t;
319
    using visits_lru_cache_pair = std::pair<visits_lru_cache_key, size_t>;
320
321
    // Separate from constructor so caller can easily make an array of LRUCache
322
    PrunedInfo set_capacity(size_t capacity);
323
7.39k
    void set_element_count_capacity(uint32_t element_count_capacity) {
324
7.39k
        _element_count_capacity = element_count_capacity;
325
7.39k
    }
326
327
    // Like Cache methods, but with an extra "hash" parameter.
328
    // Must call release on the returned handle pointer.
329
    Cache::Handle* insert(const CacheKey& key, uint32_t hash, void* value, size_t charge,
330
                          CachePriority priority = CachePriority::NORMAL);
331
    Cache::Handle* lookup(const CacheKey& key, uint32_t hash);
332
    void release(Cache::Handle* handle);
333
    void erase(const CacheKey& key, uint32_t hash);
334
    PrunedInfo prune();
335
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false);
336
337
    void set_cache_value_time_extractor(CacheValueTimeExtractor cache_value_time_extractor);
338
    void set_cache_value_check_timestamp(bool cache_value_check_timestamp);
339
340
    uint64_t get_lookup_count();
341
    uint64_t get_hit_count();
342
    uint64_t get_miss_count();
343
    uint64_t get_stampede_count();
344
345
    size_t get_usage();
346
    size_t get_capacity();
347
    size_t get_element_count();
348
349
private:
350
    void _lru_remove(LRUHandle* e);
351
    void _lru_append(LRUHandle* list, LRUHandle* e);
352
    bool _unref(LRUHandle* e);
353
    void _evict_from_lru(size_t total_size, LRUHandle** to_remove_head);
354
    void _evict_from_lru_with_time(size_t total_size, LRUHandle** to_remove_head);
355
    void _evict_one_entry(LRUHandle* e);
356
    bool _check_element_count_limit();
357
    bool _lru_k_insert_visits_list(size_t total_size, visits_lru_cache_key visits_key);
358
359
private:
360
    LRUCacheType _type;
361
362
    // Initialized before use.
363
    size_t _capacity = 0;
364
365
    // _mutex protects the following state.
366
    std::mutex _mutex;
367
    size_t _usage = 0;
368
369
    // Dummy head of LRU list.
370
    // Entries have refs==1 and in_cache==true.
371
    // _lru_normal.prev is newest entry, _lru_normal.next is oldest entry.
372
    LRUHandle _lru_normal;
373
    // _lru_durable.prev is newest entry, _lru_durable.next is oldest entry.
374
    LRUHandle _lru_durable;
375
376
    HandleTable _table;
377
378
    uint64_t _lookup_count = 0; // number of cache lookups
379
    uint64_t _hit_count = 0;    // number of cache hits
380
    uint64_t _miss_count = 0;   // number of cache misses
381
    uint64_t _stampede_count = 0;
382
383
    CacheValueTimeExtractor _cache_value_time_extractor;
384
    bool _cache_value_check_timestamp = false;
385
    LRUHandleSortedSet _sorted_normal_entries_with_timestamp;
386
    LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
387
388
    uint32_t _element_count_capacity = 0;
389
390
    bool _is_lru_k = false; // LRU-K algorithm, K=2
391
    std::list<visits_lru_cache_pair> _visits_lru_cache_list;
392
    std::unordered_map<visits_lru_cache_key, std::list<visits_lru_cache_pair>::iterator>
393
            _visits_lru_cache_map;
394
    size_t _visits_lru_cache_usage = 0;
395
};
396
397
class ShardedLRUCache : public Cache {
398
public:
399
    ~ShardedLRUCache() override;
400
    Handle* insert(const CacheKey& key, void* value, size_t charge,
401
                   CachePriority priority = CachePriority::NORMAL) override;
402
    Handle* lookup(const CacheKey& key) override;
403
    void release(Handle* handle) override;
404
    void erase(const CacheKey& key) override;
405
    void* value(Handle* handle) override;
406
    uint64_t new_id() override;
407
    PrunedInfo prune() override;
408
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override;
409
    int64_t get_usage() override;
410
    size_t get_element_count() override;
411
    PrunedInfo set_capacity(size_t capacity) override;
412
    size_t get_capacity() override;
413
414
private:
415
    // LRUCache can only be created and managed with LRUCachePolicy.
416
    friend class LRUCachePolicy;
417
418
    explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type,
419
                             uint32_t num_shards, uint32_t element_count_capacity, bool is_lru_k);
420
    explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type,
421
                             uint32_t num_shards,
422
                             CacheValueTimeExtractor cache_value_time_extractor,
423
                             bool cache_value_check_timestamp, uint32_t element_count_capacity,
424
                             bool is_lru_k);
425
426
    void update_cache_metrics() const;
427
428
private:
429
    static uint32_t _hash_slice(const CacheKey& s);
430
1.15M
    uint32_t _shard(uint32_t hash) const {
431
1.15M
        return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0;
432
1.15M
    }
433
434
    std::string _name;
435
    const int _num_shard_bits;
436
    const uint32_t _num_shards;
437
    LRUCache** _shards = nullptr;
438
    std::atomic<uint64_t> _last_id;
439
    std::mutex _mutex;
440
    size_t _capacity {0};
441
442
    std::shared_ptr<MetricEntity> _entity;
443
    IntGauge* cache_capacity = nullptr;
444
    IntGauge* cache_usage = nullptr;
445
    IntGauge* cache_element_count = nullptr;
446
    DoubleGauge* cache_usage_ratio = nullptr;
447
    IntCounter* cache_lookup_count = nullptr;
448
    IntCounter* cache_hit_count = nullptr;
449
    IntCounter* cache_miss_count = nullptr;
450
    IntCounter* cache_stampede_count = nullptr;
451
    DoubleGauge* cache_hit_ratio = nullptr;
452
    // bvars
453
    std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar;
454
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second;
455
    std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar;
456
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second;
457
};
458
459
// Compatible with ShardedLRUCache usage, but will not actually cache.
460
class DummyLRUCache : public Cache {
461
public:
462
    // Must call release on the returned handle pointer.
463
    Handle* insert(const CacheKey& key, void* value, size_t charge,
464
                   CachePriority priority = CachePriority::NORMAL) override;
465
185
    Handle* lookup(const CacheKey& key) override { return nullptr; };
466
    void release(Handle* handle) override;
467
146
    void erase(const CacheKey& key) override {};
468
    void* value(Handle* handle) override;
469
0
    uint64_t new_id() override { return 0; };
470
0
    PrunedInfo prune() override { return {0, 0}; };
471
0
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override {
472
0
        return {0, 0};
473
0
    };
474
0
    int64_t get_usage() override { return 0; };
475
0
    PrunedInfo set_capacity(size_t capacity) override { return {0, 0}; };
476
0
    size_t get_capacity() override { return 0; };
477
0
    size_t get_element_count() override { return 0; };
478
};
479
480
} // namespace doris
481
#include "common/compile_check_end.h"