Coverage Report

Created: 2026-04-02 18:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/cache/page_cache.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <memory>
26
#include <string>
27
#include <utility>
28
29
#include "core/allocator.h"
30
#include "core/allocator_fwd.h"
31
#include "runtime/memory/lru_cache_policy.h"
32
#include "runtime/memory/mem_tracker_limiter.h"
33
#include "util/lru_cache.h"
34
#include "util/slice.h"
35
36
namespace doris {
37
38
class PageCacheHandle;
39
40
template <typename T>
41
class MemoryTrackedPageBase : public LRUCacheValueBase {
42
public:
43
    MemoryTrackedPageBase() = default;
44
    MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
45
    // Construct with an explicit mem-tracker (for caches outside StoragePageCache,
46
    // e.g. AnnIndexIVFListCache).
47
    MemoryTrackedPageBase(size_t b, std::shared_ptr<MemTrackerLimiter> mem_tracker);
48
49
    MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete;
50
    MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete;
51
44.4k
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEED2Ev
Line
Count
Source
51
3
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseIPcED2Ev
Line
Count
Source
51
44.4k
    ~MemoryTrackedPageBase() = default;
52
53
64.1k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseIPcE4dataEv
Line
Count
Source
53
58.2k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEE4dataEv
Line
Count
Source
53
5.87k
    T data() { return _data; }
54
1.09k
    size_t size() { return _size; }
55
56
protected:
57
    T _data;
58
    size_t _size = 0;
59
    std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator;
60
};
61
62
class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> {
63
public:
64
    MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
65
    // Construct with an explicit mem-tracker.
66
    MemoryTrackedPageWithPageEntity(size_t b, std::shared_ptr<MemTrackerLimiter> mem_tracker);
67
68
46.5k
    size_t capacity() { return this->_capacity; }
69
70
    ~MemoryTrackedPageWithPageEntity() override;
71
72
25.3k
    void reset_size(size_t n) {
73
25.3k
        DCHECK(n <= this->_capacity);
74
25.3k
        this->_size = n;
75
25.3k
    }
76
77
private:
78
    size_t _capacity = 0;
79
};
80
81
template <typename T>
82
class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> {
83
public:
84
    MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type);
85
86
    ~MemoryTrackedPageWithPagePtr() override;
87
88
4.89k
    void set_data(std::shared_ptr<T> data) { this->_data = data; }
89
};
90
91
using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>;
92
using DataPage = MemoryTrackedPageWithPageEntity;
93
94
// Wrapper around Cache, and used for cache page of column data
95
// in Segment.
96
// TODO(zc): We should add some metric to see cache hit/miss rate.
97
class StoragePageCache {
98
public:
99
    // The unique key identifying entries in the page cache.
100
    // Each cached page corresponds to a specific offset within
101
    // a file.
102
    //
103
    // TODO(zc): Now we use file name(std::string) as a part of
104
    // key, which is not efficient. We should make it better later
105
    struct CacheKey {
106
        CacheKey(std::string fname_, size_t fsize_, int64_t offset_)
107
32.9k
                : fname(std::move(fname_)), fsize(fsize_), offset(offset_) {}
108
        std::string fname;
109
        size_t fsize;
110
        int64_t offset;
111
112
        // Encode to a flat binary which can be used as LRUCache's key
113
23.3k
        std::string encode() const {
114
23.3k
            std::string key_buf(fname);
115
23.3k
            key_buf.append((char*)&fsize, sizeof(fsize));
116
23.3k
            key_buf.append((char*)&offset, sizeof(offset));
117
23.3k
            return key_buf;
118
23.3k
        }
119
    };
120
121
    class DataPageCache : public LRUCachePolicy {
122
    public:
123
        DataPageCache(size_t capacity, uint32_t num_shards)
124
9
                : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
125
9
                                 LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
126
9
                                 num_shards, /*element_count_capacity*/ 0, /*enable_prune*/ true,
127
9
                                 /*is lru-k*/ true) {}
128
    };
129
130
    class IndexPageCache : public LRUCachePolicy {
131
    public:
132
        IndexPageCache(size_t capacity, uint32_t num_shards)
133
9
                : LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
134
9
                                 LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
135
9
                                 num_shards, /*element_count_capacity*/ 0, /*enable_prune*/ true,
136
9
                                 /*is lru-k*/ false) {}
137
    };
138
139
    class PKIndexPageCache : public LRUCachePolicy {
140
    public:
141
        PKIndexPageCache(size_t capacity, uint32_t num_shards)
142
9
                : LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
143
9
                                 LRUCacheType::SIZE,
144
9
                                 config::pk_index_page_cache_stale_sweep_time_sec, num_shards,
145
9
                                 /*element_count_capacity*/ 0, /*enable_prune*/ true,
146
9
                                 /*is lru-k*/ false) {}
147
    };
148
149
    static constexpr uint32_t kDefaultNumShards = 16;
150
151
    // Create global instance of this class
152
    static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage,
153
                                                 int64_t pk_index_cache_capacity,
154
                                                 uint32_t num_shards = kDefaultNumShards);
155
156
    // Return global instance.
157
    // Client should call create_global_cache before.
158
37.9k
    static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); }
159
160
    StoragePageCache(size_t capacity, int32_t index_cache_percentage,
161
                     int64_t pk_index_cache_capacity, uint32_t num_shards);
162
163
    // Lookup the given page in the cache.
164
    //
165
    // If the page is found, the cache entry will be written into handle.
166
    // PageCacheHandle will release cache entry to cache when it
167
    // destructs.
168
    //
169
    // Cache type selection is determined by page_type argument
170
    //
171
    // Return true if entry is found, otherwise return false.
172
    bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type);
173
174
    // Insert a page with key into this cache.
175
    // Given handle will be set to valid reference.
176
    // This function is thread-safe, and when two clients insert two same key
177
    // concurrently, this function can assure that only one page is cached.
178
    // The in_memory page will have higher priority.
179
    void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle,
180
                segment_v2::PageTypePB page_type, bool in_memory = false);
181
182
    // Insert a std::share_ptr which points to a page into this cache.
183
    // size should be the size of the page instead of shared_ptr.
184
    // Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr
185
    // Since we are using std::shared_ptr, so lify cycle of the page is not managed by
186
    // this cache alone.
187
    // User could store a weak_ptr to the page, and lock it when needed.
188
    // See Segment::_get_segment_footer for example.
189
    template <typename T>
190
    void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle,
191
                segment_v2::PageTypePB page_type, bool in_memory = false);
192
193
11.0k
    std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) {
194
11.0k
        return _get_page_cache(page_type)->mem_tracker();
195
11.0k
    }
196
197
    // Erase the page with key from this cache.
198
43
    void erase(const CacheKey& key, segment_v2::PageTypePB page_type) {
199
43
        auto* cache = _get_page_cache(page_type);
200
43
        cache->erase(key.encode());
201
43
    }
202
203
private:
204
    StoragePageCache();
205
206
    int32_t _index_cache_percentage = 0;
207
    std::unique_ptr<DataPageCache> _data_page_cache;
208
    std::unique_ptr<IndexPageCache> _index_page_cache;
209
    // Cache data for primary key index data page, seperated from data
210
    // page cache to make it for flexible. we need this cache When construct
211
    // delete bitmap in unique key with mow
212
    std::unique_ptr<PKIndexPageCache> _pk_index_page_cache;
213
214
34.4k
    LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) {
215
34.4k
        switch (page_type) {
216
2.31k
        case segment_v2::DATA_PAGE: {
217
2.31k
            return _data_page_cache.get();
218
0
        }
219
31.4k
        case segment_v2::INDEX_PAGE: {
220
31.4k
            return _index_page_cache.get();
221
0
        }
222
647
        case segment_v2::PRIMARY_KEY_INDEX_PAGE: {
223
647
            return _pk_index_page_cache.get();
224
0
        }
225
0
        default:
226
0
            throw Exception(Status::FatalError("get error type page cache"));
227
34.4k
        }
228
0
        throw Exception(Status::FatalError("__builtin_unreachable"));
229
34.4k
    }
230
};
231
232
// A handle for StoragePageCache entry. This class make it easy to handle
233
// Cache entry. Users don't need to release the obtained cache entry. This
234
// class will release the cache entry when it is destroyed.
235
class PageCacheHandle {
236
public:
237
120k
    PageCacheHandle() = default;
238
    PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
239
13.0k
            : _cache(cache), _handle(handle) {}
240
161k
    ~PageCacheHandle() {
241
161k
        if (_handle != nullptr) {
242
13.0k
            _cache->release(_handle);
243
13.0k
        }
244
161k
    }
245
246
27.7k
    PageCacheHandle(PageCacheHandle&& other) noexcept {
247
        // we can use std::exchange if we switch c++14 on
248
27.7k
        std::swap(_cache, other._cache);
249
27.7k
        std::swap(_handle, other._handle);
250
27.7k
    }
251
252
55.4k
    PageCacheHandle& operator=(PageCacheHandle&& other) noexcept {
253
55.4k
        std::swap(_cache, other._cache);
254
55.4k
        std::swap(_handle, other._handle);
255
55.4k
        return *this;
256
55.4k
    }
257
258
196
    LRUCachePolicy* cache() const { return _cache; }
259
    Slice data() const;
260
261
    template <typename T>
262
5.87k
    T get() const {
263
5.87k
        static_assert(std::is_same<typename std::remove_cv<T>::type,
264
5.87k
                                   std::shared_ptr<typename T::element_type>>::value,
265
5.87k
                      "T must be a std::shared_ptr");
266
5.87k
        using ValueType = typename T::element_type; // Type that shared_ptr points to
267
5.87k
        MemoryTrackedPageWithPagePtr<ValueType>* page =
268
5.87k
                (MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle);
269
5.87k
        return page->data();
270
5.87k
    }
271
272
private:
273
    LRUCachePolicy* _cache = nullptr;
274
    Cache::Handle* _handle = nullptr;
275
276
    // Don't allow copy and assign
277
    DISALLOW_COPY_AND_ASSIGN(PageCacheHandle);
278
};
279
280
} // namespace doris