/root/doris/be/src/olap/lru_cache.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. See the AUTHORS file for names of contributors. |
4 | | |
5 | | #pragma once |
6 | | |
7 | | #include <assert.h> |
8 | | #include <butil/macros.h> |
9 | | #include <bvar/bvar.h> |
10 | | #include <glog/logging.h> |
11 | | #include <gtest/gtest_prod.h> |
12 | | #include <stdint.h> |
13 | | #include <stdlib.h> |
14 | | #include <string.h> |
15 | | |
16 | | #include <atomic> |
17 | | #include <functional> |
18 | | #include <memory> |
19 | | #include <set> |
20 | | #include <string> |
21 | | #include <utility> |
22 | | |
23 | | #include "runtime/memory/lru_cache_value_base.h" |
24 | | #include "util/doris_metrics.h" |
25 | | #include "util/metrics.h" |
26 | | |
27 | | namespace doris { |
28 | | |
29 | | #define OLAP_CACHE_STRING_TO_BUF(cur, str, r_len) \ |
30 | | do { \ |
31 | | if (r_len > str.size()) { \ |
32 | | memcpy(cur, str.c_str(), str.size()); \ |
33 | | r_len -= str.size(); \ |
34 | | cur += str.size(); \ |
35 | | } else { \ |
36 | | LOG(WARNING) << "construct cache key buf not enough."; \ |
37 | | return CacheKey(nullptr, 0); \ |
38 | | } \ |
39 | | } while (0) |
40 | | |
41 | | #define OLAP_CACHE_NUMERIC_TO_BUF(cur, numeric, r_len) \ |
42 | | do { \ |
43 | | if (r_len > sizeof(numeric)) { \ |
44 | | memcpy(cur, &numeric, sizeof(numeric)); \ |
45 | | r_len -= sizeof(numeric); \ |
46 | | cur += sizeof(numeric); \ |
47 | | } else { \ |
48 | | LOG(WARNING) << "construct cache key buf not enough."; \ |
49 | | return CacheKey(nullptr, 0); \ |
50 | | } \ |
51 | | } while (0) |
52 | | |
53 | | class Cache; |
54 | | class LRUCachePolicy; |
55 | | struct LRUHandle; |
56 | | |
57 | | enum LRUCacheType { |
58 | | SIZE, // The capacity of cache is based on the memory size of cache entry, memory size = handle size + charge. |
59 | | NUMBER // The capacity of cache is based on the number of cache entry, number = charge, the weight of an entry. |
60 | | }; |
61 | | |
62 | | static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE; |
63 | | static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32; |
64 | | static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0; |
65 | | |
66 | | class CacheKey { |
67 | | public: |
68 | 0 | CacheKey() : _data(nullptr), _size(0) {} |
69 | | // Create a slice that refers to d[0,n-1]. |
70 | 1.13M | CacheKey(const char* d, size_t n) : _data(d), _size(n) {} |
71 | | |
72 | | // Create a slice that refers to the contents of "s" |
73 | 21.5k | CacheKey(const std::string& s) : _data(s.data()), _size(s.size()) {} |
74 | | |
75 | | // Create a slice that refers to s[0,strlen(s)-1] |
76 | 42 | CacheKey(const char* s) : _data(s), _size(strlen(s)) {} |
77 | | |
78 | 1.15M | ~CacheKey() {} |
79 | | |
80 | | // Return a pointer to the beginning of the referenced data |
81 | 1.70M | const char* data() const { return _data; } |
82 | | |
83 | | // Return the length (in bytes) of the referenced data |
84 | 2.84M | size_t size() const { return _size; } |
85 | | |
86 | | // Return true if the length of the referenced data is zero |
87 | 0 | bool empty() const { return _size == 0; } |
88 | | |
89 | | // Return the ith byte in the referenced data. |
90 | | // REQUIRES: n < size() |
91 | 0 | char operator[](size_t n) const { |
92 | 0 | assert(n < size()); |
93 | 0 | return _data[n]; |
94 | 0 | } |
95 | | |
96 | | // Change this slice to refer to an empty array |
97 | 0 | void clear() { |
98 | 0 | _data = nullptr; |
99 | 0 | _size = 0; |
100 | 0 | } |
101 | | |
102 | | // Drop the first "n" bytes from this slice. |
103 | 0 | void remove_prefix(size_t n) { |
104 | 0 | assert(n <= size()); |
105 | 0 | _data += n; |
106 | 0 | _size -= n; |
107 | 0 | } |
108 | | |
109 | | // Return a string that contains the copy of the referenced data. |
110 | 0 | std::string to_string() const { return std::string(_data, _size); } |
111 | | |
112 | 274k | bool operator==(const CacheKey& other) const { |
113 | 274k | return ((size() == other.size()) && (memcmp(data(), other.data(), size()) == 0)); |
114 | 274k | } |
115 | | |
116 | 274k | bool operator!=(const CacheKey& other) const { return !(*this == other); } |
117 | | |
118 | 0 | int compare(const CacheKey& b) const { |
119 | 0 | const size_t min_len = (_size < b._size) ? _size : b._size; |
120 | 0 | int r = memcmp(_data, b._data, min_len); |
121 | 0 | if (r == 0) { |
122 | 0 | if (_size < b._size) { |
123 | 0 | r = -1; |
124 | 0 | } else if (_size > b._size) { |
125 | 0 | r = +1; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | return r; |
129 | 0 | } |
130 | | |
131 | | uint32_t hash(const char* data, size_t n, uint32_t seed) const; |
132 | | |
133 | | // Return true if "x" is a prefix of "*this" |
134 | 0 | bool starts_with(const CacheKey& x) const { |
135 | 0 | return ((_size >= x._size) && (memcmp(_data, x._data, x._size) == 0)); |
136 | 0 | } |
137 | | |
138 | | private: |
139 | 806k | uint32_t _decode_fixed32(const char* ptr) const { |
140 | | // Load the raw bytes |
141 | 806k | uint32_t result; |
142 | 806k | memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load |
143 | 806k | return result; |
144 | 806k | } |
145 | | |
146 | | const char* _data = nullptr; |
147 | | size_t _size; |
148 | | }; |
149 | | |
150 | | // The entry with smaller CachePriority will evict firstly |
151 | | enum class CachePriority { NORMAL = 0, DURABLE = 1 }; |
152 | | |
153 | | using CachePrunePredicate = std::function<bool(const LRUHandle*)>; |
154 | | // CacheValueTimeExtractor can extract timestamp |
155 | | // in cache value through the specified function, |
156 | | // such as last_visit_time in InvertedIndexSearcherCache::CacheValue |
157 | | using CacheValueTimeExtractor = std::function<int64_t(const void*)>; |
158 | | struct PrunedInfo { |
159 | | int64_t pruned_count = 0; |
160 | | int64_t pruned_size = 0; |
161 | | }; |
162 | | |
163 | | class Cache { |
164 | | public: |
165 | 325 | Cache() {} |
166 | | |
167 | | // Destroys all existing entries by calling the "deleter" |
168 | | // function that was passed to the constructor. |
169 | | virtual ~Cache(); |
170 | | |
171 | | // Opaque handle to an entry stored in the cache. |
172 | | struct Handle {}; |
173 | | |
174 | | // Insert a mapping from key->value into the cache and assign it |
175 | | // the specified charge against the total cache capacity. |
176 | | // |
177 | | // Returns a handle that corresponds to the mapping. The caller |
178 | | // must call this->release(handle) when the returned mapping is no |
179 | | // longer needed. |
180 | | // |
181 | | // When the inserted entry is no longer needed, the key and |
182 | | // value will be passed to "deleter". |
183 | | virtual Handle* insert(const CacheKey& key, void* value, size_t charge, |
184 | | CachePriority priority = CachePriority::NORMAL) = 0; |
185 | | |
186 | | // If the cache has no mapping for "key", returns nullptr. |
187 | | // |
188 | | // Else return a handle that corresponds to the mapping. The caller |
189 | | // must call this->release(handle) when the returned mapping is no |
190 | | // longer needed. |
191 | | virtual Handle* lookup(const CacheKey& key) = 0; |
192 | | |
193 | | // Release a mapping returned by a previous Lookup(). |
194 | | // REQUIRES: handle must not have been released yet. |
195 | | // REQUIRES: handle must have been returned by a method on *this. |
196 | | virtual void release(Handle* handle) = 0; |
197 | | |
198 | | // Return the value encapsulated in a handle returned by a |
199 | | // successful lookup(). |
200 | | // REQUIRES: handle must not have been released yet. |
201 | | // REQUIRES: handle must have been returned by a method on *this. |
202 | | virtual void* value(Handle* handle) = 0; |
203 | | |
204 | | // If the cache contains entry for key, erase it. Note that the |
205 | | // underlying entry will be kept around until all existing handles |
206 | | // to it have been released. |
207 | | virtual void erase(const CacheKey& key) = 0; |
208 | | |
209 | | // Return a new numeric id. May be used by multiple clients who are |
210 | | // sharing the same cache to partition the key space. Typically the |
211 | | // client will allocate a new id at startup and prepend the id to |
212 | | // its cache keys. |
213 | | virtual uint64_t new_id() = 0; |
214 | | |
215 | | // Remove all cache entries that are not actively in use. Memory-constrained |
216 | | // applications may wish to call this method to reduce memory usage. |
217 | | // Default implementation of Prune() does nothing. Subclasses are strongly |
218 | | // encouraged to override the default implementation. A future release of |
219 | | // leveldb may change prune() to a pure abstract method. |
220 | | // return num of entries being pruned. |
221 | 0 | virtual PrunedInfo prune() { return {0, 0}; } |
222 | | |
223 | | // Same as prune(), but the entry will only be pruned if the predicate matched. |
224 | | // NOTICE: the predicate should be simple enough, or the prune_if() function |
225 | | // may hold lock for a long time to execute predicate. |
226 | 0 | virtual PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) { return {0, 0}; } |
227 | | |
228 | | virtual int64_t get_usage() = 0; |
229 | | |
230 | | virtual PrunedInfo set_capacity(size_t capacity) = 0; |
231 | | virtual size_t get_capacity() = 0; |
232 | | |
233 | | virtual size_t get_element_count() = 0; |
234 | | |
235 | | private: |
236 | | DISALLOW_COPY_AND_ASSIGN(Cache); |
237 | | }; |
238 | | |
239 | | // An entry is a variable length heap-allocated structure. Entries |
240 | | // are kept in a circular doubly linked list ordered by access time. |
241 | | // Note: member variables can only be POD types and raw pointer, |
242 | | // cannot be class objects or smart pointers, because LRUHandle will be created using malloc. |
243 | | struct LRUHandle { |
244 | | void* value = nullptr; |
245 | | struct LRUHandle* next_hash = nullptr; // next entry in hash table |
246 | | struct LRUHandle* next = nullptr; // next entry in lru list |
247 | | struct LRUHandle* prev = nullptr; // previous entry in lru list |
248 | | size_t charge; |
249 | | size_t key_length; |
250 | | size_t total_size; // Entry charge, used to limit cache capacity, LRUCacheType::SIZE including key length. |
251 | | bool in_cache; // Whether entry is in the cache. |
252 | | uint32_t refs; |
253 | | uint32_t hash; // Hash of key(); used for fast sharding and comparisons |
254 | | CachePriority priority = CachePriority::NORMAL; |
255 | | LRUCacheType type; |
256 | | int64_t last_visit_time; // Save the last visit time of this cache entry. |
257 | | char key_data[1]; // Beginning of key |
258 | | // Note! key_data must be at the end. |
259 | | |
260 | 562k | CacheKey key() const { |
261 | | // For cheaper lookups, we allow a temporary Handle object |
262 | | // to store a pointer to a key in "value". |
263 | 562k | if (next == this) { |
264 | 0 | return *(reinterpret_cast<CacheKey*>(value)); |
265 | 562k | } else { |
266 | 562k | return CacheKey(key_data, key_length); |
267 | 562k | } |
268 | 562k | } |
269 | | |
270 | 283k | void free() { |
271 | 283k | if (value != nullptr) { // value allows null pointer. |
272 | 283k | delete (LRUCacheValueBase*)value; |
273 | 283k | } |
274 | 283k | ::free(this); |
275 | 283k | } |
276 | | }; |
277 | | |
278 | | // We provide our own simple hash tablet since it removes a whole bunch |
279 | | // of porting hacks and is also faster than some of the built-in hash |
280 | | // tablet implementations in some of the compiler/runtime combinations |
281 | | // we have tested. E.g., readrandom speeds up by ~5% over the g++ |
282 | | // 4.4.3's builtin hashtable. |
283 | | |
284 | | class HandleTable { |
285 | | public: |
286 | 5.72k | HandleTable() : _length(0), _elems(0), _list(nullptr) { _resize(); } |
287 | | |
288 | | ~HandleTable(); |
289 | | |
290 | | LRUHandle* lookup(const CacheKey& key, uint32_t hash); |
291 | | |
292 | | LRUHandle* insert(LRUHandle* h); |
293 | | |
294 | | // Remove element from hash table by "key" and "hash". |
295 | | LRUHandle* remove(const CacheKey& key, uint32_t hash); |
296 | | |
297 | | // Remove element from hash table by "h", it would be faster |
298 | | // than the function above. |
299 | | // Return whether h is found and removed. |
300 | | bool remove(const LRUHandle* h); |
301 | | |
302 | | uint32_t element_count() const; |
303 | | |
304 | | private: |
305 | | FRIEND_TEST(CacheTest, HandleTableTest); |
306 | | |
307 | | // The tablet consists of an array of buckets where each bucket is |
308 | | // a linked list of cache entries that hash into the bucket. |
309 | | uint32_t _length; |
310 | | uint32_t _elems; |
311 | | LRUHandle** _list = nullptr; |
312 | | |
313 | | // Return a pointer to slot that points to a cache entry that |
314 | | // matches key/hash. If there is no such cache entry, return a |
315 | | // pointer to the trailing slot in the corresponding linked list. |
316 | | LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash); |
317 | | |
318 | | void _resize(); |
319 | | }; |
320 | | |
321 | | // pair first is timestatmp, put <timestatmp, LRUHandle*> into asc set, |
322 | | // when need to free space, can first evict the begin of the set, |
323 | | // because the begin element's timestamp is the oldest. |
324 | | using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>; |
325 | | |
326 | | // A single shard of sharded cache. |
327 | | class LRUCache { |
328 | | public: |
329 | | LRUCache(LRUCacheType type); |
330 | | ~LRUCache(); |
331 | | |
332 | | // Separate from constructor so caller can easily make an array of LRUCache |
333 | | PrunedInfo set_capacity(size_t capacity); |
334 | 5.72k | void set_element_count_capacity(uint32_t element_count_capacity) { |
335 | 5.72k | _element_count_capacity = element_count_capacity; |
336 | 5.72k | } |
337 | | |
338 | | // Like Cache methods, but with an extra "hash" parameter. |
339 | | // Must call release on the returned handle pointer. |
340 | | Cache::Handle* insert(const CacheKey& key, uint32_t hash, void* value, size_t charge, |
341 | | CachePriority priority = CachePriority::NORMAL); |
342 | | Cache::Handle* lookup(const CacheKey& key, uint32_t hash); |
343 | | void release(Cache::Handle* handle); |
344 | | void erase(const CacheKey& key, uint32_t hash); |
345 | | PrunedInfo prune(); |
346 | | PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false); |
347 | | |
348 | | void set_cache_value_time_extractor(CacheValueTimeExtractor cache_value_time_extractor); |
349 | | void set_cache_value_check_timestamp(bool cache_value_check_timestamp); |
350 | | |
351 | | uint64_t get_lookup_count(); |
352 | | uint64_t get_hit_count(); |
353 | | uint64_t get_miss_count(); |
354 | | uint64_t get_stampede_count(); |
355 | | |
356 | | size_t get_usage(); |
357 | | size_t get_capacity(); |
358 | | size_t get_element_count(); |
359 | | |
360 | | private: |
361 | | void _lru_remove(LRUHandle* e); |
362 | | void _lru_append(LRUHandle* list, LRUHandle* e); |
363 | | bool _unref(LRUHandle* e); |
364 | | void _evict_from_lru(size_t total_size, LRUHandle** to_remove_head); |
365 | | void _evict_from_lru_with_time(size_t total_size, LRUHandle** to_remove_head); |
366 | | void _evict_one_entry(LRUHandle* e); |
367 | | bool _check_element_count_limit(); |
368 | | |
369 | | private: |
370 | | LRUCacheType _type; |
371 | | |
372 | | // Initialized before use. |
373 | | size_t _capacity = 0; |
374 | | |
375 | | // _mutex protects the following state. |
376 | | std::mutex _mutex; |
377 | | size_t _usage = 0; |
378 | | |
379 | | // Dummy head of LRU list. |
380 | | // Entries have refs==1 and in_cache==true. |
381 | | // _lru_normal.prev is newest entry, _lru_normal.next is oldest entry. |
382 | | LRUHandle _lru_normal; |
383 | | // _lru_durable.prev is newest entry, _lru_durable.next is oldest entry. |
384 | | LRUHandle _lru_durable; |
385 | | |
386 | | HandleTable _table; |
387 | | |
388 | | uint64_t _lookup_count = 0; // number of cache lookups |
389 | | uint64_t _hit_count = 0; // number of cache hits |
390 | | uint64_t _miss_count = 0; // number of cache misses |
391 | | uint64_t _stampede_count = 0; |
392 | | |
393 | | CacheValueTimeExtractor _cache_value_time_extractor; |
394 | | bool _cache_value_check_timestamp = false; |
395 | | LRUHandleSortedSet _sorted_normal_entries_with_timestamp; |
396 | | LRUHandleSortedSet _sorted_durable_entries_with_timestamp; |
397 | | |
398 | | uint32_t _element_count_capacity = 0; |
399 | | }; |
400 | | |
401 | | class ShardedLRUCache : public Cache { |
402 | | public: |
403 | | virtual ~ShardedLRUCache(); |
404 | | virtual Handle* insert(const CacheKey& key, void* value, size_t charge, |
405 | | CachePriority priority = CachePriority::NORMAL) override; |
406 | | virtual Handle* lookup(const CacheKey& key) override; |
407 | | virtual void release(Handle* handle) override; |
408 | | virtual void erase(const CacheKey& key) override; |
409 | | virtual void* value(Handle* handle) override; |
410 | | virtual uint64_t new_id() override; |
411 | | PrunedInfo prune() override; |
412 | | PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override; |
413 | | int64_t get_usage() override; |
414 | | size_t get_element_count() override; |
415 | | PrunedInfo set_capacity(size_t capacity) override; |
416 | | size_t get_capacity() override; |
417 | | |
418 | | private: |
419 | | // LRUCache can only be created and managed with LRUCachePolicy. |
420 | | friend class LRUCachePolicy; |
421 | | |
422 | | explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, |
423 | | uint32_t num_shards, uint32_t element_count_capacity); |
424 | | explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, |
425 | | uint32_t num_shards, |
426 | | CacheValueTimeExtractor cache_value_time_extractor, |
427 | | bool cache_value_check_timestamp, uint32_t element_count_capacity); |
428 | | |
429 | | void update_cache_metrics() const; |
430 | | |
431 | | private: |
432 | | static uint32_t _hash_slice(const CacheKey& s); |
433 | 1.13M | uint32_t _shard(uint32_t hash) { |
434 | 1.13M | return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0; |
435 | 1.13M | } |
436 | | |
437 | | std::string _name; |
438 | | const int _num_shard_bits; |
439 | | const uint32_t _num_shards; |
440 | | LRUCache** _shards = nullptr; |
441 | | std::atomic<uint64_t> _last_id; |
442 | | std::mutex _mutex; |
443 | | size_t _capacity {0}; |
444 | | |
445 | | std::shared_ptr<MetricEntity> _entity; |
446 | | IntGauge* cache_capacity = nullptr; |
447 | | IntGauge* cache_usage = nullptr; |
448 | | IntGauge* cache_element_count = nullptr; |
449 | | DoubleGauge* cache_usage_ratio = nullptr; |
450 | | IntAtomicCounter* cache_lookup_count = nullptr; |
451 | | IntAtomicCounter* cache_hit_count = nullptr; |
452 | | IntAtomicCounter* cache_miss_count = nullptr; |
453 | | IntAtomicCounter* cache_stampede_count = nullptr; |
454 | | DoubleGauge* cache_hit_ratio = nullptr; |
455 | | // bvars |
456 | | std::unique_ptr<bvar::Adder<uint64_t>> _hit_count_bvar; |
457 | | std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _hit_count_per_second; |
458 | | std::unique_ptr<bvar::Adder<uint64_t>> _lookup_count_bvar; |
459 | | std::unique_ptr<bvar::PerSecond<bvar::Adder<uint64_t>>> _lookup_count_per_second; |
460 | | }; |
461 | | |
462 | | // Compatible with ShardedLRUCache usage, but will not actually cache. |
463 | | class DummyLRUCache : public Cache { |
464 | | public: |
465 | | // Must call release on the returned handle pointer. |
466 | | Handle* insert(const CacheKey& key, void* value, size_t charge, |
467 | | CachePriority priority = CachePriority::NORMAL) override; |
468 | 176 | Handle* lookup(const CacheKey& key) override { return nullptr; }; |
469 | | void release(Handle* handle) override; |
470 | 146 | void erase(const CacheKey& key) override {}; |
471 | | void* value(Handle* handle) override; |
472 | 0 | uint64_t new_id() override { return 0; }; |
473 | 0 | PrunedInfo prune() override { return {0, 0}; }; |
474 | 0 | PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override { |
475 | 0 | return {0, 0}; |
476 | 0 | }; |
477 | 0 | int64_t get_usage() override { return 0; }; |
478 | 0 | PrunedInfo set_capacity(size_t capacity) override { return {0, 0}; }; |
479 | 0 | size_t get_capacity() override { return 0; }; |
480 | 0 | size_t get_element_count() override { return 0; }; |
481 | | }; |
482 | | |
483 | | } // namespace doris |