/root/doris/be/src/olap/page_cache.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <butil/macros.h> | 
| 21 |  | #include <gen_cpp/segment_v2.pb.h> | 
| 22 |  | #include <stddef.h> | 
| 23 |  | #include <stdint.h> | 
| 24 |  |  | 
| 25 |  | #include <memory> | 
| 26 |  | #include <string> | 
| 27 |  | #include <utility> | 
| 28 |  |  | 
| 29 |  | #include "olap/lru_cache.h" | 
| 30 |  | #include "runtime/memory/lru_cache_policy.h" | 
| 31 |  | #include "runtime/memory/mem_tracker_limiter.h" | 
| 32 |  | #include "util/slice.h" | 
| 33 |  | #include "vec/common/allocator.h" | 
| 34 |  | #include "vec/common/allocator_fwd.h" | 
| 35 |  |  | 
| 36 |  | namespace doris { | 
| 37 |  |  | 
| 38 |  | class PageCacheHandle; | 
| 39 |  |  | 
| 40 |  | template <typename T> | 
| 41 |  | class MemoryTrackedPageBase : public LRUCacheValueBase { | 
| 42 |  | public: | 
| 43 |  |     MemoryTrackedPageBase() = default; | 
| 44 |  |     MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type); | 
| 45 |  |  | 
| 46 |  |     MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete; | 
| 47 |  |     MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete; | 
| 48 | 44.7k |     ~MemoryTrackedPageBase() = default; _ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEED2Ev| Line | Count | Source |  | 48 | 2 |     ~MemoryTrackedPageBase() = default; | 
_ZN5doris21MemoryTrackedPageBaseIPcED2Ev| Line | Count | Source |  | 48 | 44.7k |     ~MemoryTrackedPageBase() = default; | 
 | 
| 49 |  |  | 
| 50 | 64.5k |     T data() { return _data; }_ZN5doris21MemoryTrackedPageBaseIPcE4dataEv| Line | Count | Source |  | 50 | 58.3k |     T data() { return _data; } | 
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEE4dataEv| Line | Count | Source |  | 50 | 6.23k |     T data() { return _data; } | 
 | 
| 51 | 983 |     size_t size() { return _size; } | 
| 52 |  |  | 
| 53 |  | protected: | 
| 54 |  |     T _data; | 
| 55 |  |     size_t _size = 0; | 
| 56 |  |     std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator; | 
| 57 |  | }; | 
| 58 |  |  | 
| 59 |  | class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> { | 
| 60 |  | public: | 
| 61 |  |     MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type); | 
| 62 |  |  | 
| 63 | 47.1k |     size_t capacity() { return this->_capacity; } | 
| 64 |  |  | 
| 65 |  |     ~MemoryTrackedPageWithPageEntity() override; | 
| 66 |  |  | 
| 67 | 25.5k |     void reset_size(size_t n) { | 
| 68 | 25.5k |         DCHECK(n <= this->_capacity); | 
| 69 | 25.5k |         this->_size = n; | 
| 70 | 25.5k |     } | 
| 71 |  |  | 
| 72 |  | private: | 
| 73 |  |     size_t _capacity = 0; | 
| 74 |  | }; | 
| 75 |  |  | 
| 76 |  | template <typename T> | 
| 77 |  | class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> { | 
| 78 |  | public: | 
| 79 |  |     MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type); | 
| 80 |  |  | 
| 81 |  |     ~MemoryTrackedPageWithPagePtr() override; | 
| 82 |  |  | 
| 83 | 4.92k |     void set_data(std::shared_ptr<T> data) { this->_data = data; } | 
| 84 |  | }; | 
| 85 |  |  | 
| 86 |  | using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>; | 
| 87 |  | using DataPage = MemoryTrackedPageWithPageEntity; | 
| 88 |  |  | 
| 89 |  | // Wrapper around Cache, and used for cache page of column data | 
| 90 |  | // in Segment. | 
| 91 |  | // TODO(zc): We should add some metric to see cache hit/miss rate. | 
| 92 |  | class StoragePageCache { | 
| 93 |  | public: | 
| 94 |  |     // The unique key identifying entries in the page cache. | 
| 95 |  |     // Each cached page corresponds to a specific offset within | 
| 96 |  |     // a file. | 
| 97 |  |     // | 
| 98 |  |     // TODO(zc): Now we use file name(std::string) as a part of | 
| 99 |  |     // key, which is not efficient. We should make it better later | 
| 100 |  |     struct CacheKey { | 
| 101 |  |         CacheKey(std::string fname_, size_t fsize_, int64_t offset_) | 
| 102 | 33.4k |                 : fname(std::move(fname_)), fsize(fsize_), offset(offset_) {} | 
| 103 |  |         std::string fname; | 
| 104 |  |         size_t fsize; | 
| 105 |  |         int64_t offset; | 
| 106 |  |  | 
| 107 |  |         // Encode to a flat binary which can be used as LRUCache's key | 
| 108 | 23.4k |         std::string encode() const { | 
| 109 | 23.4k |             std::string key_buf(fname); | 
| 110 | 23.4k |             key_buf.append((char*)&fsize, sizeof(fsize)); | 
| 111 | 23.4k |             key_buf.append((char*)&offset, sizeof(offset)); | 
| 112 | 23.4k |             return key_buf; | 
| 113 | 23.4k |         } | 
| 114 |  |     }; | 
| 115 |  |  | 
| 116 |  |     class DataPageCache : public LRUCachePolicy { | 
| 117 |  |     public: | 
| 118 |  |         DataPageCache(size_t capacity, uint32_t num_shards) | 
| 119 |  |                 : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity, | 
| 120 |  |                                  LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec, | 
| 121 | 4 |                                  num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) { | 
| 122 | 4 |         } | 
| 123 |  |     }; | 
| 124 |  |  | 
| 125 |  |     class IndexPageCache : public LRUCachePolicy { | 
| 126 |  |     public: | 
| 127 |  |         IndexPageCache(size_t capacity, uint32_t num_shards) | 
| 128 |  |                 : LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity, | 
| 129 |  |                                  LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec, | 
| 130 | 3 |                                  num_shards) {} | 
| 131 |  |     }; | 
| 132 |  |  | 
| 133 |  |     class PKIndexPageCache : public LRUCachePolicy { | 
| 134 |  |     public: | 
| 135 |  |         PKIndexPageCache(size_t capacity, uint32_t num_shards) | 
| 136 |  |                 : LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity, | 
| 137 |  |                                  LRUCacheType::SIZE, | 
| 138 | 5 |                                  config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {} | 
| 139 |  |     }; | 
| 140 |  |  | 
| 141 |  |     static constexpr uint32_t kDefaultNumShards = 16; | 
| 142 |  |  | 
| 143 |  |     // Create global instance of this class | 
| 144 |  |     static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage, | 
| 145 |  |                                                  int64_t pk_index_cache_capacity, | 
| 146 |  |                                                  uint32_t num_shards = kDefaultNumShards); | 
| 147 |  |  | 
| 148 |  |     // Return global instance. | 
| 149 |  |     // Client should call create_global_cache before. | 
| 150 | 37.5k |     static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); } | 
| 151 |  |  | 
| 152 |  |     StoragePageCache(size_t capacity, int32_t index_cache_percentage, | 
| 153 |  |                      int64_t pk_index_cache_capacity, uint32_t num_shards); | 
| 154 |  |  | 
| 155 |  |     // Lookup the given page in the cache. | 
| 156 |  |     // | 
| 157 |  |     // If the page is found, the cache entry will be written into handle. | 
| 158 |  |     // PageCacheHandle will release cache entry to cache when it | 
| 159 |  |     // destructs. | 
| 160 |  |     // | 
| 161 |  |     // Cache type selection is determined by page_type argument | 
| 162 |  |     // | 
| 163 |  |     // Return true if entry is found, otherwise return false. | 
| 164 |  |     bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type); | 
| 165 |  |  | 
| 166 |  |     // Insert a page with key into this cache. | 
| 167 |  |     // Given handle will be set to valid reference. | 
| 168 |  |     // This function is thread-safe, and when two clients insert two same key | 
| 169 |  |     // concurrently, this function can assure that only one page is cached. | 
| 170 |  |     // The in_memory page will have higher priority. | 
| 171 |  |     void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle, | 
| 172 |  |                 segment_v2::PageTypePB page_type, bool in_memory = false); | 
| 173 |  |  | 
| 174 |  |     // Insert a std::share_ptr which points to a page into this cache. | 
| 175 |  |     // size should be the size of the page instead of shared_ptr. | 
| 176 |  |     // Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr | 
| 177 |  |     // Since we are using std::shared_ptr, so lify cycle of the page is not managed by | 
| 178 |  |     // this cache alone. | 
| 179 |  |     // User could store a weak_ptr to the page, and lock it when needed. | 
| 180 |  |     // See Segment::_get_segment_footer for example. | 
| 181 |  |     template <typename T> | 
| 182 |  |     void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle, | 
| 183 |  |                 segment_v2::PageTypePB page_type, bool in_memory = false); | 
| 184 |  |  | 
| 185 | 11.0k |     std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) { | 
| 186 | 11.0k |         return _get_page_cache(page_type)->mem_tracker(); | 
| 187 | 11.0k |     } | 
| 188 |  |  | 
| 189 |  | private: | 
| 190 |  |     StoragePageCache(); | 
| 191 |  |  | 
| 192 |  |     int32_t _index_cache_percentage = 0; | 
| 193 |  |     std::unique_ptr<DataPageCache> _data_page_cache; | 
| 194 |  |     std::unique_ptr<IndexPageCache> _index_page_cache; | 
| 195 |  |     // Cache data for primary key index data page, seperated from data | 
| 196 |  |     // page cache to make it for flexible. we need this cache When construct | 
| 197 |  |     // delete bitmap in unique key with mow | 
| 198 |  |     std::unique_ptr<PKIndexPageCache> _pk_index_page_cache; | 
| 199 |  |  | 
| 200 | 34.4k |     LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) { | 
| 201 | 34.4k |         switch (page_type) { | 
| 202 | 1.71k |         case segment_v2::DATA_PAGE: {  Branch (202:9): [True: 1.71k, False: 32.7k]
 | 
| 203 | 1.71k |             return _data_page_cache.get(); | 
| 204 | 0 |         } | 
| 205 | 32.1k |         case segment_v2::INDEX_PAGE: {  Branch (205:9): [True: 32.1k, False: 2.36k]
 | 
| 206 | 32.1k |             return _index_page_cache.get(); | 
| 207 | 0 |         } | 
| 208 | 647 |         case segment_v2::PRIMARY_KEY_INDEX_PAGE: {  Branch (208:9): [True: 647, False: 33.8k]
 | 
| 209 | 647 |             return _pk_index_page_cache.get(); | 
| 210 | 0 |         } | 
| 211 | 0 |         default:   Branch (211:9): [True: 0, False: 34.4k]
 | 
| 212 | 0 |             LOG(FATAL) << "get error type page cache"; | 
| 213 | 0 |             __builtin_unreachable(); | 
| 214 | 34.4k |         } | 
| 215 | 0 |         LOG(FATAL) << "__builtin_unreachable"; | 
| 216 | 0 |         __builtin_unreachable(); | 
| 217 | 34.4k |     } | 
| 218 |  | }; | 
| 219 |  |  | 
| 220 |  | // A handle for StoragePageCache entry. This class make it easy to handle | 
| 221 |  | // Cache entry. Users don't need to release the obtained cache entry. This | 
| 222 |  | // class will release the cache entry when it is destroyed. | 
| 223 |  | class PageCacheHandle { | 
| 224 |  | public: | 
| 225 | 123k |     PageCacheHandle() = default; | 
| 226 |  |     PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle) | 
| 227 | 13.3k |             : _cache(cache), _handle(handle) {} | 
| 228 | 164k |     ~PageCacheHandle() { | 
| 229 | 164k |         if (_handle != nullptr) {  Branch (229:13): [True: 13.3k, False: 151k]
 | 
| 230 | 13.3k |             _cache->release(_handle); | 
| 231 | 13.3k |         } | 
| 232 | 164k |     } | 
| 233 |  |  | 
| 234 | 28.1k |     PageCacheHandle(PageCacheHandle&& other) noexcept { | 
| 235 |  |         // we can use std::exchange if we switch c++14 on | 
| 236 | 28.1k |         std::swap(_cache, other._cache); | 
| 237 | 28.1k |         std::swap(_handle, other._handle); | 
| 238 | 28.1k |     } | 
| 239 |  |  | 
| 240 | 56.0k |     PageCacheHandle& operator=(PageCacheHandle&& other) noexcept { | 
| 241 | 56.0k |         std::swap(_cache, other._cache); | 
| 242 | 56.0k |         std::swap(_handle, other._handle); | 
| 243 | 56.0k |         return *this; | 
| 244 | 56.0k |     } | 
| 245 |  |  | 
| 246 | 0 |     LRUCachePolicy* cache() const { return _cache; } | 
| 247 |  |     Slice data() const; | 
| 248 |  |  | 
| 249 |  |     template <typename T> | 
| 250 | 6.23k |     T get() const { | 
| 251 | 6.23k |         static_assert(std::is_same<typename std::remove_cv<T>::type, | 
| 252 | 6.23k |                                    std::shared_ptr<typename T::element_type>>::value, | 
| 253 | 6.23k |                       "T must be a std::shared_ptr"); | 
| 254 | 6.23k |         using ValueType = typename T::element_type; // Type that shared_ptr points to | 
| 255 | 6.23k |         MemoryTrackedPageWithPagePtr<ValueType>* page = | 
| 256 | 6.23k |                 (MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle); | 
| 257 | 6.23k |         return page->data(); | 
| 258 | 6.23k |     } | 
| 259 |  |  | 
| 260 |  | private: | 
| 261 |  |     LRUCachePolicy* _cache = nullptr; | 
| 262 |  |     Cache::Handle* _handle = nullptr; | 
| 263 |  |  | 
| 264 |  |     // Don't allow copy and assign | 
| 265 |  |     DISALLOW_COPY_AND_ASSIGN(PageCacheHandle); | 
| 266 |  | }; | 
| 267 |  |  | 
| 268 |  | } // namespace doris |