Coverage Report

Created: 2026-02-06 15:21

/root/doris/be/src/olap/lru_cache.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style license that can be
3
// found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5
#pragma once
6
7
#include <assert.h>
8
#include <butil/macros.h>
9
#include <bvar/bvar.h>
10
#include <glog/logging.h>
11
#include <gtest/gtest_prod.h>
12
#include <stdint.h>
13
#include <stdlib.h>
14
#include <string.h>
15
16
#include <atomic>
17
#include <functional>
18
#include <memory>
19
#include <set>
20
#include <string>
21
#include <utility>
22
23
#include "runtime/memory/lru_cache_value_base.h"
24
#include "util/doris_metrics.h"
25
#include "util/metrics.h"
26
27
namespace doris {
28
29
#define OLAP_CACHE_STRING_TO_BUF(cur, str, r_len)                  \
30
    do {                                                           \
31
        if (r_len > str.size()) {                                  \
32
            memcpy(cur, str.c_str(), str.size());                  \
33
            r_len -= str.size();                                   \
34
            cur += str.size();                                     \
35
        } else {                                                   \
36
            LOG(WARNING) << "construct cache key buf not enough."; \
37
            return CacheKey(nullptr, 0);                           \
38
        }                                                          \
39
    } while (0)
40
41
#define OLAP_CACHE_NUMERIC_TO_BUF(cur, numeric, r_len)             \
42
    do {                                                           \
43
        if (r_len > sizeof(numeric)) {                             \
44
            memcpy(cur, &numeric, sizeof(numeric));                \
45
            r_len -= sizeof(numeric);                              \
46
            cur += sizeof(numeric);                                \
47
        } else {                                                   \
48
            LOG(WARNING) << "construct cache key buf not enough."; \
49
            return CacheKey(nullptr, 0);                           \
50
        }                                                          \
51
    } while (0)
52
53
class Cache;
54
class LRUCachePolicy;
55
struct LRUHandle;
56
57
enum LRUCacheType {
58
    SIZE, // The capacity of cache is based on the memory size of cache entry, memory size = handle size + charge.
59
    NUMBER // The capacity of cache is based on the number of cache entry, number = charge, the weight of an entry.
60
};
61
62
static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
63
static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
64
static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
65
66
class CacheKey {
67
public:
68
0
    CacheKey() : _data(nullptr), _size(0) {}
69
    // Create a slice that refers to d[0,n-1].
70
864k
    CacheKey(const char* d, size_t n) : _data(d), _size(n) {}
71
72
    // Create a slice that refers to the contents of "s"
73
23.3k
    CacheKey(const std::string& s) : _data(s.data()), _size(s.size()) {}
74
75
    // Create a slice that refers to s[0,strlen(s)-1]
76
42
    CacheKey(const char* s) : _data(s), _size(strlen(s)) {}
77
78
887k
    ~CacheKey() {}
79
80
    // Return a pointer to the beginning of the referenced data
81
1.31M
    const char* data() const { return _data; }
82
83
    // Return the length (in bytes) of the referenced data
84
2.16M
    size_t size() const { return _size; }
85
86
    // Return true if the length of the referenced data is zero
87
0
    bool empty() const { return _size == 0; }
88
89
    // Return the ith byte in the referenced data.
90
    // REQUIRES: n < size()
91
0
    char operator[](size_t n) const {
92
0
        assert(n < size());
93
0
        return _data[n];
94
0
    }
95
96
    // Change this slice to refer to an empty array
97
0
    void clear() {
98
0
        _data = nullptr;
99
0
        _size = 0;
100
0
    }
101
102
    // Drop the first "n" bytes from this slice.
103
0
    void remove_prefix(size_t n) {
104
0
        assert(n <= size());
105
0
        _data += n;
106
0
        _size -= n;
107
0
    }
108
109
    // Return a string that contains the copy of the referenced data.
110
0
    std::string to_string() const { return std::string(_data, _size); }
111
112
228k
    bool operator==(const CacheKey& other) const {
113
228k
        return ((size() == other.size()) && (memcmp(data(), other.data(), size()) == 0));
114
228k
    }
115
116
228k
    bool operator!=(const CacheKey& other) const { return !(*this == other); }
117
118
0
    int compare(const CacheKey& b) const {
119
0
        const size_t min_len = (_size < b._size) ? _size : b._size;
120
0
        int r = memcmp(_data, b._data, min_len);
121
0
        if (r == 0) {
122
0
            if (_size < b._size) {
123
0
                r = -1;
124
0
            } else if (_size > b._size) {
125
0
                r = +1;
126
0
            }
127
0
        }
128
0
        return r;
129
0
    }
130
131
    uint32_t hash(const char* data, size_t n, uint32_t seed) const;
132
133
    // Return true if "x" is a prefix of "*this"
134
0
    bool starts_with(const CacheKey& x) const {
135
0
        return ((_size >= x._size) && (memcmp(_data, x._data, x._size) == 0));
136
0
    }
137
138
private:
139
692k
    uint32_t _decode_fixed32(const char* ptr) const {
140
        // Load the raw bytes
141
692k
        uint32_t result;
142
692k
        memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
143
692k
        return result;
144
692k
    }
145
146
    const char* _data = nullptr;
147
    size_t _size;
148
};
149
150
// The entry with smaller CachePriority will evict firstly
151
enum class CachePriority { NORMAL = 0, DURABLE = 1 };
152
153
using CachePrunePredicate = std::function<bool(const LRUHandle*)>;
154
// CacheValueTimeExtractor can extract timestamp
155
// in cache value through the specified function,
156
// such as last_visit_time in InvertedIndexSearcherCache::CacheValue
157
using CacheValueTimeExtractor = std::function<int64_t(const void*)>;
158
struct PrunedInfo {
159
    int64_t pruned_count = 0;
160
    int64_t pruned_size = 0;
161
};
162
163
class Cache {
164
public:
165
330
    Cache() {}
166
167
    // Destroys all existing entries by calling the "deleter"
168
    // function that was passed to the constructor.
169
    virtual ~Cache();
170
171
    // Opaque handle to an entry stored in the cache.
172
    struct Handle {};
173
174
    // Insert a mapping from key->value into the cache and assign it
175
    // the specified charge against the total cache capacity.
176
    //
177
    // Returns a handle that corresponds to the mapping.  The caller
178
    // must call this->release(handle) when the returned mapping is no
179
    // longer needed.
180
    //
181
    // When the inserted entry is no longer needed, the key and
182
    // value will be passed to "deleter".
183
    virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
184
                           CachePriority priority = CachePriority::NORMAL) = 0;
185
186
    // If the cache has no mapping for "key", returns nullptr.
187
    //
188
    // Else return a handle that corresponds to the mapping.  The caller
189
    // must call this->release(handle) when the returned mapping is no
190
    // longer needed.
191
    virtual Handle* lookup(const CacheKey& key) = 0;
192
193
    // Release a mapping returned by a previous Lookup().
194
    // REQUIRES: handle must not have been released yet.
195
    // REQUIRES: handle must have been returned by a method on *this.
196
    virtual void release(Handle* handle) = 0;
197
198
    // Return the value encapsulated in a handle returned by a
199
    // successful lookup().
200
    // REQUIRES: handle must not have been released yet.
201
    // REQUIRES: handle must have been returned by a method on *this.
202
    virtual void* value(Handle* handle) = 0;
203
204
    // If the cache contains entry for key, erase it.  Note that the
205
    // underlying entry will be kept around until all existing handles
206
    // to it have been released.
207
    virtual void erase(const CacheKey& key) = 0;
208
209
    // Return a new numeric id.  May be used by multiple clients who are
210
    // sharing the same cache to partition the key space.  Typically the
211
    // client will allocate a new id at startup and prepend the id to
212
    // its cache keys.
213
    virtual uint64_t new_id() = 0;
214
215
    // Remove all cache entries that are not actively in use.  Memory-constrained
216
    // applications may wish to call this method to reduce memory usage.
217
    // Default implementation of Prune() does nothing.  Subclasses are strongly
218
    // encouraged to override the default implementation.  A future release of
219
    // leveldb may change prune() to a pure abstract method.
220
    // return num of entries being pruned.
221
0
    virtual PrunedInfo prune() { return {0, 0}; }
222
223
    // Same as prune(), but the entry will only be pruned if the predicate matched.
224
    // NOTICE: the predicate should be simple enough, or the prune_if() function
225
    // may hold lock for a long time to execute predicate.
226
0
    virtual PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) { return {0, 0}; }
227
228
    virtual int64_t get_usage() = 0;
229
230
    virtual size_t get_total_capacity() = 0;
231
232
private:
233
    DISALLOW_COPY_AND_ASSIGN(Cache);
234
};
235
236
// An entry is a variable length heap-allocated structure.  Entries
237
// are kept in a circular doubly linked list ordered by access time.
238
// Note: member variables can only be POD types and raw pointer,
239
// cannot be class objects or smart pointers, because LRUHandle will be created using malloc.
240
struct LRUHandle {
241
    void* value = nullptr;
242
    struct LRUHandle* next_hash = nullptr; // next entry in hash table
243
    struct LRUHandle* next = nullptr;      // next entry in lru list
244
    struct LRUHandle* prev = nullptr;      // previous entry in lru list
245
    size_t charge;
246
    size_t key_length;
247
    size_t total_size; // Entry charge, used to limit cache capacity, LRUCacheType::SIZE including key length.
248
    bool in_cache; // Whether entry is in the cache.
249
    uint32_t refs;
250
    uint32_t hash; // Hash of key(); used for fast sharding and comparisons
251
    CachePriority priority = CachePriority::NORMAL;
252
    LRUCacheType type;
253
    int64_t last_visit_time; // Save the last visit time of this cache entry.
254
    char key_data[1];        // Beginning of key
255
    // Note! key_data must be at the end.
256
257
436k
    CacheKey key() const {
258
        // For cheaper lookups, we allow a temporary Handle object
259
        // to store a pointer to a key in "value".
260
436k
        if (next == this) {
261
0
            return *(reinterpret_cast<CacheKey*>(value));
262
436k
        } else {
263
436k
            return CacheKey(key_data, key_length);
264
436k
        }
265
436k
    }
266
267
203k
    void free() {
268
203k
        if (value != nullptr) { // value allows null pointer.
269
203k
            delete (LRUCacheValueBase*)value;
270
203k
        }
271
203k
        ::free(this);
272
203k
    }
273
};
274
275
// We provide our own simple hash tablet since it removes a whole bunch
276
// of porting hacks and is also faster than some of the built-in hash
277
// tablet implementations in some of the compiler/runtime combinations
278
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
279
// 4.4.3's builtin hashtable.
280
281
class HandleTable {
282
public:
283
10.1k
    HandleTable() : _length(0), _elems(0), _list(nullptr) { _resize(); }
284
285
    ~HandleTable();
286
287
    LRUHandle* lookup(const CacheKey& key, uint32_t hash);
288
289
    LRUHandle* insert(LRUHandle* h);
290
291
    // Remove element from hash table by "key" and "hash".
292
    LRUHandle* remove(const CacheKey& key, uint32_t hash);
293
294
    // Remove element from hash table by "h", it would be faster
295
    // than the function above.
296
    // Return whether h is found and removed.
297
    bool remove(const LRUHandle* h);
298
299
    uint32_t element_count() const;
300
301
private:
302
    FRIEND_TEST(CacheTest, HandleTableTest);
303
304
    // The tablet consists of an array of buckets where each bucket is
305
    // a linked list of cache entries that hash into the bucket.
306
    uint32_t _length;
307
    uint32_t _elems;
308
    LRUHandle** _list = nullptr;
309
310
    // Return a pointer to slot that points to a cache entry that
311
    // matches key/hash.  If there is no such cache entry, return a
312
    // pointer to the trailing slot in the corresponding linked list.
313
    LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
314
315
    void _resize();
316
};
317
318
// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set,
319
// when need to free space, can first evict the begin of the set,
320
// because the begin element's timestamp is the oldest.
321
using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>;
322
323
// A single shard of sharded cache.
324
class LRUCache {
325
public:
326
    LRUCache(LRUCacheType type);
327
    ~LRUCache();
328
329
    // Separate from constructor so caller can easily make an array of LRUCache
330
10.1k
    void set_capacity(size_t capacity) { _capacity = capacity; }
331
10.1k
    void set_element_count_capacity(uint32_t element_count_capacity) {
332
10.1k
        _element_count_capacity = element_count_capacity;
333
10.1k
    }
334
335
    // Like Cache methods, but with an extra "hash" parameter.
336
    // Must call release on the returned handle pointer.
337
    Cache::Handle* insert(const CacheKey& key, uint32_t hash, void* value, size_t charge,
338
                          CachePriority priority = CachePriority::NORMAL);
339
    Cache::Handle* lookup(const CacheKey& key, uint32_t hash);
340
    void release(Cache::Handle* handle);
341
    void erase(const CacheKey& key, uint32_t hash);
342
    PrunedInfo prune();
343
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false);
344
345
    void set_cache_value_time_extractor(CacheValueTimeExtractor cache_value_time_extractor);
346
    void set_cache_value_check_timestamp(bool cache_value_check_timestamp);
347
348
0
    uint64_t get_lookup_count() const { return _lookup_count; }
349
0
    uint64_t get_hit_count() const { return _hit_count; }
350
45
    size_t get_usage() const { return _usage; }
351
0
    size_t get_capacity() const { return _capacity; }
352
0
    size_t get_element_count() const { return _table.element_count(); }
353
354
private:
355
    void _lru_remove(LRUHandle* e);
356
    void _lru_append(LRUHandle* list, LRUHandle* e);
357
    bool _unref(LRUHandle* e);
358
    void _evict_from_lru(size_t total_size, LRUHandle** to_remove_head);
359
    void _evict_from_lru_with_time(size_t total_size, LRUHandle** to_remove_head);
360
    void _evict_one_entry(LRUHandle* e);
361
    bool _check_element_count_limit();
362
363
private:
364
    LRUCacheType _type;
365
366
    // Initialized before use.
367
    size_t _capacity = 0;
368
369
    // _mutex protects the following state.
370
    std::mutex _mutex;
371
    size_t _usage = 0;
372
373
    // Dummy head of LRU list.
374
    // Entries have refs==1 and in_cache==true.
375
    // _lru_normal.prev is newest entry, _lru_normal.next is oldest entry.
376
    LRUHandle _lru_normal;
377
    // _lru_durable.prev is newest entry, _lru_durable.next is oldest entry.
378
    LRUHandle _lru_durable;
379
380
    HandleTable _table;
381
382
    uint64_t _lookup_count = 0; // number of cache lookups
383
    uint64_t _hit_count = 0;    // number of cache hits
384
385
    CacheValueTimeExtractor _cache_value_time_extractor;
386
    bool _cache_value_check_timestamp = false;
387
    LRUHandleSortedSet _sorted_normal_entries_with_timestamp;
388
    LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
389
390
    uint32_t _element_count_capacity = 0;
391
};
392
393
class ShardedLRUCache : public Cache {
394
public:
395
    virtual ~ShardedLRUCache();
396
    virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
397
                           CachePriority priority = CachePriority::NORMAL) override;
398
    virtual Handle* lookup(const CacheKey& key) override;
399
    virtual void release(Handle* handle) override;
400
    virtual void erase(const CacheKey& key) override;
401
    virtual void* value(Handle* handle) override;
402
    virtual uint64_t new_id() override;
403
    PrunedInfo prune() override;
404
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override;
405
    int64_t get_usage() override;
406
0
    size_t get_total_capacity() override { return _total_capacity; };
407
408
private:
409
    // LRUCache can only be created and managed with LRUCachePolicy.
410
    friend class LRUCachePolicy;
411
412
    explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
413
                             uint32_t num_shards, uint32_t element_count_capacity);
414
    explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type,
415
                             uint32_t num_shards,
416
                             CacheValueTimeExtractor cache_value_time_extractor,
417
                             bool cache_value_check_timestamp, uint32_t element_count_capacity);
418
419
    void update_cache_metrics() const;
420
421
private:
422
    static uint32_t _hash_slice(const CacheKey& s);
423
887k
    uint32_t _shard(uint32_t hash) {
424
887k
        return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0;
425
887k
    }
426
427
    std::string _name;
428
    const int _num_shard_bits;
429
    const uint32_t _num_shards;
430
    LRUCache** _shards = nullptr;
431
    std::atomic<uint64_t> _last_id;
432
    size_t _total_capacity;
433
434
    std::shared_ptr<MetricEntity> _entity;
435
    IntGauge* cache_capacity = nullptr;
436
    IntGauge* cache_usage = nullptr;
437
    IntGauge* cache_element_count = nullptr;
438
    DoubleGauge* cache_usage_ratio = nullptr;
439
    IntCounter* cache_lookup_count = nullptr;
440
    IntCounter* cache_hit_count = nullptr;
441
    DoubleGauge* cache_hit_ratio = nullptr;
442
    // bvars
443
    std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar;
444
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second;
445
    std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar;
446
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second;
447
};
448
449
// Compatible with ShardedLRUCache usage, but will not actually cache.
450
class DummyLRUCache : public Cache {
451
public:
452
    // Must call release on the returned handle pointer.
453
    Handle* insert(const CacheKey& key, void* value, size_t charge,
454
                   CachePriority priority = CachePriority::NORMAL) override;
455
176
    Handle* lookup(const CacheKey& key) override { return nullptr; };
456
    void release(Handle* handle) override;
457
0
    void erase(const CacheKey& key) override {};
458
    void* value(Handle* handle) override;
459
0
    uint64_t new_id() override { return 0; };
460
0
    PrunedInfo prune() override { return {0, 0}; };
461
0
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override {
462
0
        return {0, 0};
463
0
    };
464
0
    int64_t get_usage() override { return 0; };
465
0
    size_t get_total_capacity() override { return 0; };
466
};
467
468
} // namespace doris