Coverage Report

Created: 2026-03-11 16:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/lru_cache.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
19
// Use of this source code is governed by a BSD-style license that can be
20
// found in the LICENSE file. See the AUTHORS file for names of contributors.
21
22
#pragma once
23
24
#include <butil/macros.h>
25
#include <bvar/bvar.h>
26
#include <glog/logging.h>
27
#include <gtest/gtest_prod.h>
28
29
#include <atomic>
30
#include <cassert>
31
#include <cstdint>
32
#include <cstdlib>
33
#include <cstring>
34
#include <functional>
35
#include <memory>
36
#include <set>
37
#include <string>
38
#include <utility>
39
40
#include "common/metrics/doris_metrics.h"
41
#include "common/metrics/metrics.h"
42
#include "runtime/memory/lru_cache_value_base.h"
43
44
namespace doris {
45
#include "common/compile_check_begin.h"
46
47
class Cache;
48
class LRUCachePolicy;
49
struct LRUHandle;
50
51
enum LRUCacheType {
52
    SIZE, // The capacity of cache is based on the memory size of cache entry, memory size = handle size + charge.
53
    NUMBER // The capacity of cache is based on the number of cache entry, number = charge, the weight of an entry.
54
};
55
56
static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
57
static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
58
static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
59
static constexpr bool DEFAULT_LRU_CACHE_IS_LRU_K = false;
60
61
class CacheKey {
62
public:
63
0
    CacheKey() : _size(0) {}
64
    // Create a slice that refers to d[0,n-1].
65
1.28M
    CacheKey(const char* d, size_t n) : _data(d), _size(n) {}
66
67
    // Create a slice that refers to the contents of "s"
68
46.4k
    CacheKey(const std::string& s) : _data(s.data()), _size(s.size()) {}
69
70
    // Create a slice that refers to s[0,strlen(s)-1]
71
50
    CacheKey(const char* s) : _data(s), _size(strlen(s)) {}
72
73
    ~CacheKey() = default;
74
75
    // Return a pointer to the beginning of the referenced data
76
1.96M
    const char* data() const { return _data; }
77
78
    // Return the length (in bytes) of the referenced data
79
3.27M
    size_t size() const { return _size; }
80
81
    // Return true if the length of the referenced data is zero
82
0
    bool empty() const { return _size == 0; }
83
84
    // Return the ith byte in the referenced data.
85
    // REQUIRES: n < size()
86
0
    char operator[](size_t n) const {
87
0
        assert(n < size());
88
0
        return _data[n];
89
0
    }
90
91
    // Change this slice to refer to an empty array
92
0
    void clear() {
93
0
        _data = nullptr;
94
0
        _size = 0;
95
0
    }
96
97
    // Drop the first "n" bytes from this slice.
98
0
    void remove_prefix(size_t n) {
99
0
        assert(n <= size());
100
0
        _data += n;
101
0
        _size -= n;
102
0
    }
103
104
    // Return a string that contains the copy of the referenced data.
105
0
    std::string to_string() const { return {_data, _size}; }
106
107
322k
    bool operator==(const CacheKey& other) const {
108
322k
        return ((size() == other.size()) && (memcmp(data(), other.data(), size()) == 0));
109
322k
    }
110
111
322k
    bool operator!=(const CacheKey& other) const { return !(*this == other); }
112
113
0
    int compare(const CacheKey& b) const {
114
0
        const size_t min_len = (_size < b._size) ? _size : b._size;
115
0
        int r = memcmp(_data, b._data, min_len);
116
0
        if (r == 0) {
117
0
            if (_size < b._size) {
118
0
                r = -1;
119
0
            } else if (_size > b._size) {
120
0
                r = +1;
121
0
            }
122
0
        }
123
0
        return r;
124
0
    }
125
126
    uint32_t hash(const char* data, size_t n, uint32_t seed) const;
127
128
    // Return true if "x" is a prefix of "*this"
129
0
    bool starts_with(const CacheKey& x) const {
130
0
        return ((_size >= x._size) && (memcmp(_data, x._data, x._size) == 0));
131
0
    }
132
133
private:
134
1.23M
    uint32_t _decode_fixed32(const char* ptr) const {
135
        // Load the raw bytes
136
1.23M
        uint32_t result;
137
1.23M
        memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
138
1.23M
        return result;
139
1.23M
    }
140
141
    const char* _data = nullptr;
142
    size_t _size;
143
};
144
145
// The entry with smaller CachePriority will evict firstly
146
enum class CachePriority { NORMAL = 0, DURABLE = 1 };
147
148
using CachePrunePredicate = std::function<bool(const LRUHandle*)>;
149
// CacheValueTimeExtractor can extract timestamp
150
// in cache value through the specified function,
151
// such as last_visit_time in InvertedIndexSearcherCache::CacheValue
152
using CacheValueTimeExtractor = std::function<int64_t(const void*)>;
153
struct PrunedInfo {
154
    int64_t pruned_count = 0;
155
    int64_t pruned_size = 0;
156
};
157
158
class Cache {
159
public:
160
1.11k
    Cache() = default;
161
162
    // Destroys all existing entries by calling the "deleter"
163
    // function that was passed to the constructor.
164
1.10k
    virtual ~Cache() = default;
165
166
    // Opaque handle to an entry stored in the cache.
167
    struct Handle {};
168
169
    // Insert a mapping from key->value into the cache and assign it
170
    // the specified charge against the total cache capacity.
171
    //
172
    // Returns a handle that corresponds to the mapping.  The caller
173
    // must call this->release(handle) when the returned mapping is no
174
    // longer needed.
175
    //
176
    // When the inserted entry is no longer needed, the key and
177
    // value will be passed to "deleter".
178
    //
179
    // if cache is lru k and cache is full, first insert of key will not succeed.
180
    //
181
    // Note: if is ShardedLRUCache, cache capacity = ShardedLRUCache_capacity / num_shards.
182
    virtual Handle* insert(const CacheKey& key, void* value, size_t charge,
183
                           CachePriority priority = CachePriority::NORMAL) = 0;
184
185
    // If the cache has no mapping for "key", returns nullptr.
186
    //
187
    // Else return a handle that corresponds to the mapping.  The caller
188
    // must call this->release(handle) when the returned mapping is no
189
    // longer needed.
190
    virtual Handle* lookup(const CacheKey& key) = 0;
191
192
    // Release a mapping returned by a previous Lookup().
193
    // REQUIRES: handle must not have been released yet.
194
    // REQUIRES: handle must have been returned by a method on *this.
195
    virtual void release(Handle* handle) = 0;
196
197
    // Return the value encapsulated in a handle returned by a
198
    // successful lookup().
199
    // REQUIRES: handle must not have been released yet.
200
    // REQUIRES: handle must have been returned by a method on *this.
201
    virtual void* value(Handle* handle) = 0;
202
203
    // If the cache contains entry for key, erase it.  Note that the
204
    // underlying entry will be kept around until all existing handles
205
    // to it have been released.
206
    virtual void erase(const CacheKey& key) = 0;
207
208
    // Return a new numeric id.  May be used by multiple clients who are
209
    // sharing the same cache to partition the key space.  Typically the
210
    // client will allocate a new id at startup and prepend the id to
211
    // its cache keys.
212
    virtual uint64_t new_id() = 0;
213
214
    // Remove all cache entries that are not actively in use.  Memory-constrained
215
    // applications may wish to call this method to reduce memory usage.
216
    // Default implementation of Prune() does nothing.  Subclasses are strongly
217
    // encouraged to override the default implementation.  A future release of
218
    // leveldb may change prune() to a pure abstract method.
219
    // return num of entries being pruned.
220
0
    virtual PrunedInfo prune() { return {0, 0}; }
221
222
    // Same as prune(), but the entry will only be pruned if the predicate matched.
223
    // NOTICE: the predicate should be simple enough, or the prune_if() function
224
    // may hold lock for a long time to execute predicate.
225
0
    virtual PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) { return {0, 0}; }
226
227
    virtual void for_each_entry(const std::function<void(const LRUHandle*)>& visitor) = 0;
228
229
    virtual int64_t get_usage() = 0;
230
231
    virtual PrunedInfo set_capacity(size_t capacity) = 0;
232
    virtual size_t get_capacity() = 0;
233
234
    virtual size_t get_element_count() = 0;
235
236
private:
237
    DISALLOW_COPY_AND_ASSIGN(Cache);
238
};
239
240
// An entry is a variable length heap-allocated structure.  Entries
241
// are kept in a circular doubly linked list ordered by access time.
242
// Note: member variables can only be POD types and raw pointer,
243
// cannot be class objects or smart pointers, because LRUHandle will be created using malloc.
244
struct LRUHandle {
245
    void* value = nullptr;
246
    struct LRUHandle* next_hash = nullptr; // next entry in hash table
247
    struct LRUHandle* next = nullptr;      // next entry in lru list
248
    struct LRUHandle* prev = nullptr;      // previous entry in lru list
249
    size_t charge;
250
    size_t key_length;
251
    size_t total_size; // Entry charge, used to limit cache capacity, LRUCacheType::SIZE including key length.
252
    bool in_cache; // Whether entry is in the cache.
253
    uint32_t refs;
254
    uint32_t hash; // Hash of key(); used for fast sharding and comparisons
255
    CachePriority priority = CachePriority::NORMAL;
256
    LRUCacheType type;
257
    int64_t last_visit_time; // Save the last visit time of this cache entry.
258
    char key_data[1];        // Beginning of key
259
    // Note! key_data must be at the end.
260
261
645k
    CacheKey key() const {
262
        // For cheaper lookups, we allow a temporary Handle object
263
        // to store a pointer to a key in "value".
264
645k
        if (next == this) {
265
0
            return *(reinterpret_cast<CacheKey*>(value));
266
645k
        } else {
267
645k
            return {key_data, key_length};
268
645k
        }
269
645k
    }
270
271
317k
    void free() {
272
317k
        if (value != nullptr) { // value allows null pointer.
273
317k
            delete (LRUCacheValueBase*)value;
274
317k
        }
275
317k
        ::free(this);
276
317k
    }
277
};
278
279
// We provide our own simple hash tablet since it removes a whole bunch
280
// of porting hacks and is also faster than some of the built-in hash
281
// tablet implementations in some of the compiler/runtime combinations
282
// we have tested.  E.g., readrandom speeds up by ~5% over the g++
283
// 4.4.3's builtin hashtable.
284
285
class HandleTable {
286
public:
287
15.6k
    HandleTable() { _resize(); }
288
289
    ~HandleTable();
290
291
    LRUHandle* lookup(const CacheKey& key, uint32_t hash);
292
293
    LRUHandle* insert(LRUHandle* h);
294
295
    // Remove element from hash table by "key" and "hash".
296
    LRUHandle* remove(const CacheKey& key, uint32_t hash);
297
298
    // Remove element from hash table by "h", it would be faster
299
    // than the function above.
300
    // Return whether h is found and removed.
301
    bool remove(const LRUHandle* h);
302
303
    uint32_t element_count() const;
304
305
private:
306
    FRIEND_TEST(CacheTest, HandleTableTest);
307
308
    // The tablet consists of an array of buckets where each bucket is
309
    // a linked list of cache entries that hash into the bucket.
310
    uint32_t _length {};
311
    uint32_t _elems {};
312
    LRUHandle** _list = nullptr;
313
314
    // Return a pointer to slot that points to a cache entry that
315
    // matches key/hash.  If there is no such cache entry, return a
316
    // pointer to the trailing slot in the corresponding linked list.
317
    LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash);
318
319
    void _resize();
320
};
321
322
// pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set,
323
// when need to free space, can first evict the begin of the set,
324
// because the begin element's timestamp is the oldest.
325
using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>;
326
327
// A single shard of sharded cache.
328
class LRUCache {
329
public:
330
    LRUCache(LRUCacheType type, bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K);
331
    ~LRUCache();
332
333
    // visits_lru_cache_key is the hash value of CacheKey.
334
    // If there is a hash conflict, a cache entry may be inserted early
335
    // and another cache entry with the same key hash may be inserted later.
336
    // Otherwise, this does not affect the correctness of the cache.
337
    using visits_lru_cache_key = uint32_t;
338
    using visits_lru_cache_pair = std::pair<visits_lru_cache_key, size_t>;
339
340
    // Separate from constructor so caller can easily make an array of LRUCache
341
    PrunedInfo set_capacity(size_t capacity);
342
15.6k
    void set_element_count_capacity(uint32_t element_count_capacity) {
343
15.6k
        _element_count_capacity = element_count_capacity;
344
15.6k
    }
345
346
    // Like Cache methods, but with an extra "hash" parameter.
347
    // Must call release on the returned handle pointer.
348
    Cache::Handle* insert(const CacheKey& key, uint32_t hash, void* value, size_t charge,
349
                          CachePriority priority = CachePriority::NORMAL);
350
    Cache::Handle* lookup(const CacheKey& key, uint32_t hash);
351
    void release(Cache::Handle* handle);
352
    void erase(const CacheKey& key, uint32_t hash);
353
    PrunedInfo prune();
354
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false);
355
    void for_each_entry(const std::function<void(const LRUHandle*)>& visitor);
356
357
    void set_cache_value_time_extractor(CacheValueTimeExtractor cache_value_time_extractor);
358
    void set_cache_value_check_timestamp(bool cache_value_check_timestamp);
359
360
    uint64_t get_lookup_count();
361
    uint64_t get_hit_count();
362
    uint64_t get_miss_count();
363
    uint64_t get_stampede_count();
364
365
    size_t get_usage();
366
    size_t get_capacity();
367
    size_t get_element_count();
368
369
private:
370
    void _lru_remove(LRUHandle* e);
371
    void _lru_append(LRUHandle* list, LRUHandle* e);
372
    bool _unref(LRUHandle* e);
373
    void _evict_from_lru(size_t total_size, LRUHandle** to_remove_head);
374
    void _evict_from_lru_with_time(size_t total_size, LRUHandle** to_remove_head);
375
    void _evict_one_entry(LRUHandle* e);
376
    bool _check_element_count_limit();
377
    bool _lru_k_insert_visits_list(size_t total_size, visits_lru_cache_key visits_key);
378
379
private:
380
    LRUCacheType _type;
381
382
    // Initialized before use.
383
    size_t _capacity = 0;
384
385
    // _mutex protects the following state.
386
    std::mutex _mutex;
387
    size_t _usage = 0;
388
389
    // Dummy head of LRU list.
390
    // Entries have refs==1 and in_cache==true.
391
    // _lru_normal.prev is newest entry, _lru_normal.next is oldest entry.
392
    LRUHandle _lru_normal;
393
    // _lru_durable.prev is newest entry, _lru_durable.next is oldest entry.
394
    LRUHandle _lru_durable;
395
396
    HandleTable _table;
397
398
    uint64_t _lookup_count = 0; // number of cache lookups
399
    uint64_t _hit_count = 0;    // number of cache hits
400
    uint64_t _miss_count = 0;   // number of cache misses
401
    uint64_t _stampede_count = 0;
402
403
    CacheValueTimeExtractor _cache_value_time_extractor;
404
    bool _cache_value_check_timestamp = false;
405
    LRUHandleSortedSet _sorted_normal_entries_with_timestamp;
406
    LRUHandleSortedSet _sorted_durable_entries_with_timestamp;
407
408
    uint32_t _element_count_capacity = 0;
409
410
    bool _is_lru_k = false; // LRU-K algorithm, K=2
411
    std::list<visits_lru_cache_pair> _visits_lru_cache_list;
412
    std::unordered_map<visits_lru_cache_key, std::list<visits_lru_cache_pair>::iterator>
413
            _visits_lru_cache_map;
414
    size_t _visits_lru_cache_usage = 0;
415
};
416
417
class ShardedLRUCache : public Cache {
418
public:
419
    ~ShardedLRUCache() override;
420
    Handle* insert(const CacheKey& key, void* value, size_t charge,
421
                   CachePriority priority = CachePriority::NORMAL) override;
422
    Handle* lookup(const CacheKey& key) override;
423
    void release(Handle* handle) override;
424
    void erase(const CacheKey& key) override;
425
    void* value(Handle* handle) override;
426
    uint64_t new_id() override;
427
    PrunedInfo prune() override;
428
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override;
429
    void for_each_entry(const std::function<void(const LRUHandle*)>& visitor) override;
430
    int64_t get_usage() override;
431
    size_t get_element_count() override;
432
    PrunedInfo set_capacity(size_t capacity) override;
433
    size_t get_capacity() override;
434
435
private:
436
    // LRUCache can only be created and managed with LRUCachePolicy.
437
    friend class LRUCachePolicy;
438
439
    explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type,
440
                             uint32_t num_shards, uint32_t element_count_capacity, bool is_lru_k);
441
    explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type,
442
                             uint32_t num_shards,
443
                             CacheValueTimeExtractor cache_value_time_extractor,
444
                             bool cache_value_check_timestamp, uint32_t element_count_capacity,
445
                             bool is_lru_k);
446
447
    void update_cache_metrics() const;
448
449
private:
450
    static uint32_t _hash_slice(const CacheKey& s);
451
1.31M
    uint32_t _shard(uint32_t hash) const {
452
1.31M
        return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0;
453
1.31M
    }
454
455
    std::string _name;
456
    const int _num_shard_bits;
457
    const uint32_t _num_shards;
458
    LRUCache** _shards = nullptr;
459
    std::atomic<uint64_t> _last_id;
460
    std::mutex _mutex;
461
    size_t _capacity {0};
462
463
    std::shared_ptr<MetricEntity> _entity;
464
    IntGauge* cache_capacity = nullptr;
465
    IntGauge* cache_usage = nullptr;
466
    IntGauge* cache_element_count = nullptr;
467
    DoubleGauge* cache_usage_ratio = nullptr;
468
    IntCounter* cache_lookup_count = nullptr;
469
    IntCounter* cache_hit_count = nullptr;
470
    IntCounter* cache_miss_count = nullptr;
471
    IntCounter* cache_stampede_count = nullptr;
472
    DoubleGauge* cache_hit_ratio = nullptr;
473
    // bvars
474
    std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar;
475
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second;
476
    std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar;
477
    std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second;
478
};
479
480
// Compatible with ShardedLRUCache usage, but will not actually cache.
481
class DummyLRUCache : public Cache {
482
public:
483
    // Must call release on the returned handle pointer.
484
    Handle* insert(const CacheKey& key, void* value, size_t charge,
485
                   CachePriority priority = CachePriority::NORMAL) override;
486
187
    Handle* lookup(const CacheKey& key) override { return nullptr; };
487
    void release(Handle* handle) override;
488
146
    void erase(const CacheKey& key) override {};
489
    void* value(Handle* handle) override;
490
0
    uint64_t new_id() override { return 0; };
491
0
    PrunedInfo prune() override { return {0, 0}; };
492
0
    PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override {
493
0
        return {0, 0};
494
0
    };
495
0
    void for_each_entry(const std::function<void(const LRUHandle*)>& visitor) override {}
496
0
    int64_t get_usage() override { return 0; };
497
0
    PrunedInfo set_capacity(size_t capacity) override { return {0, 0}; };
498
0
    size_t get_capacity() override { return 0; };
499
0
    size_t get_element_count() override { return 0; };
500
};
501
502
} // namespace doris
503
#include "common/compile_check_end.h"