Coverage Report

Created: 2025-09-12 17:50

/root/doris/be/src/olap/page_cache.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <memory>
26
#include <string>
27
#include <utility>
28
29
#include "olap/lru_cache.h"
30
#include "runtime/memory/lru_cache_policy.h"
31
#include "runtime/memory/mem_tracker_limiter.h"
32
#include "util/slice.h"
33
#include "vec/common/allocator.h"
34
#include "vec/common/allocator_fwd.h"
35
36
namespace doris {
37
38
class PageCacheHandle;
39
40
template <typename T>
41
class MemoryTrackedPageBase : public LRUCacheValueBase {
42
public:
43
    MemoryTrackedPageBase() = default;
44
    MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
45
46
    MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete;
47
    MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete;
48
44.8k
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEED2Ev
Line
Count
Source
48
2
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseIPcED2Ev
Line
Count
Source
48
44.8k
    ~MemoryTrackedPageBase() = default;
49
50
64.5k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseIPcE4dataEv
Line
Count
Source
50
58.3k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEE4dataEv
Line
Count
Source
50
6.23k
    T data() { return _data; }
51
983
    size_t size() { return _size; }
52
53
protected:
54
    T _data;
55
    size_t _size = 0;
56
    std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator;
57
};
58
59
class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> {
60
public:
61
    MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
62
63
47.1k
    size_t capacity() { return this->_capacity; }
64
65
    ~MemoryTrackedPageWithPageEntity() override;
66
67
25.6k
    void reset_size(size_t n) {
68
25.6k
        DCHECK(n <= this->_capacity);
69
25.6k
        this->_size = n;
70
25.6k
    }
71
72
private:
73
    size_t _capacity = 0;
74
};
75
76
template <typename T>
77
class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> {
78
public:
79
    MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type);
80
81
    ~MemoryTrackedPageWithPagePtr() override;
82
83
4.92k
    void set_data(std::shared_ptr<T> data) { this->_data = data; }
84
};
85
86
using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>;
87
using DataPage = MemoryTrackedPageWithPageEntity;
88
89
// Wrapper around Cache, and used for cache page of column data
90
// in Segment.
91
// TODO(zc): We should add some metric to see cache hit/miss rate.
92
class StoragePageCache {
93
public:
94
    // The unique key identifying entries in the page cache.
95
    // Each cached page corresponds to a specific offset within
96
    // a file.
97
    //
98
    // TODO(zc): Now we use file name(std::string) as a part of
99
    // key, which is not efficient. We should make it better later
100
    struct CacheKey {
101
        CacheKey(std::string fname_, size_t fsize_, int64_t offset_)
102
33.4k
                : fname(std::move(fname_)), fsize(fsize_), offset(offset_) {}
103
        std::string fname;
104
        size_t fsize;
105
        int64_t offset;
106
107
        // Encode to a flat binary which can be used as LRUCache's key
108
23.4k
        std::string encode() const {
109
23.4k
            std::string key_buf(fname);
110
23.4k
            key_buf.append((char*)&fsize, sizeof(fsize));
111
23.4k
            key_buf.append((char*)&offset, sizeof(offset));
112
23.4k
            return key_buf;
113
23.4k
        }
114
    };
115
116
    class DataPageCache : public LRUCachePolicy {
117
    public:
118
        DataPageCache(size_t capacity, uint32_t num_shards)
119
                : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
120
                                 LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
121
4
                                 num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
122
4
        }
123
    };
124
125
    class IndexPageCache : public LRUCachePolicy {
126
    public:
127
        IndexPageCache(size_t capacity, uint32_t num_shards)
128
                : LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
129
                                 LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
130
3
                                 num_shards) {}
131
    };
132
133
    class PKIndexPageCache : public LRUCachePolicy {
134
    public:
135
        PKIndexPageCache(size_t capacity, uint32_t num_shards)
136
                : LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
137
                                 LRUCacheType::SIZE,
138
5
                                 config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {}
139
    };
140
141
    static constexpr uint32_t kDefaultNumShards = 16;
142
143
    // Create global instance of this class
144
    static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage,
145
                                                 int64_t pk_index_cache_capacity,
146
                                                 uint32_t num_shards = kDefaultNumShards);
147
148
    // Return global instance.
149
    // Client should call create_global_cache before.
150
37.5k
    static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); }
151
152
    StoragePageCache(size_t capacity, int32_t index_cache_percentage,
153
                     int64_t pk_index_cache_capacity, uint32_t num_shards);
154
155
    // Lookup the given page in the cache.
156
    //
157
    // If the page is found, the cache entry will be written into handle.
158
    // PageCacheHandle will release cache entry to cache when it
159
    // destructs.
160
    //
161
    // Cache type selection is determined by page_type argument
162
    //
163
    // Return true if entry is found, otherwise return false.
164
    bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type);
165
166
    // Insert a page with key into this cache.
167
    // Given handle will be set to valid reference.
168
    // This function is thread-safe, and when two clients insert two same key
169
    // concurrently, this function can assure that only one page is cached.
170
    // The in_memory page will have higher priority.
171
    void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle,
172
                segment_v2::PageTypePB page_type, bool in_memory = false);
173
174
    // Insert a std::share_ptr which points to a page into this cache.
175
    // size should be the size of the page instead of shared_ptr.
176
    // Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr
177
    // Since we are using std::shared_ptr, so lify cycle of the page is not managed by
178
    // this cache alone.
179
    // User could store a weak_ptr to the page, and lock it when needed.
180
    // See Segment::_get_segment_footer for example.
181
    template <typename T>
182
    void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle,
183
                segment_v2::PageTypePB page_type, bool in_memory = false);
184
185
11.0k
    std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) {
186
11.0k
        return _get_page_cache(page_type)->mem_tracker();
187
11.0k
    }
188
189
private:
190
    StoragePageCache();
191
192
    int32_t _index_cache_percentage = 0;
193
    std::unique_ptr<DataPageCache> _data_page_cache;
194
    std::unique_ptr<IndexPageCache> _index_page_cache;
195
    // Cache data for primary key index data page, seperated from data
196
    // page cache to make it for flexible. we need this cache When construct
197
    // delete bitmap in unique key with mow
198
    std::unique_ptr<PKIndexPageCache> _pk_index_page_cache;
199
200
34.4k
    LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) {
201
34.4k
        switch (page_type) {
202
17.7k
        case segment_v2::DATA_PAGE: {
  Branch (202:9): [True: 17.7k, False: 16.6k]
203
17.7k
            return _data_page_cache.get();
204
0
        }
205
16.0k
        case segment_v2::INDEX_PAGE: {
  Branch (205:9): [True: 16.0k, False: 18.4k]
206
16.0k
            return _index_page_cache.get();
207
0
        }
208
647
        case segment_v2::PRIMARY_KEY_INDEX_PAGE: {
  Branch (208:9): [True: 647, False: 33.8k]
209
647
            return _pk_index_page_cache.get();
210
0
        }
211
0
        default:
  Branch (211:9): [True: 0, False: 34.4k]
212
0
            LOG(FATAL) << "get error type page cache";
213
0
            __builtin_unreachable();
214
34.4k
        }
215
0
        LOG(FATAL) << "__builtin_unreachable";
216
0
        __builtin_unreachable();
217
34.4k
    }
218
};
219
220
// A handle for StoragePageCache entry. This class make it easy to handle
221
// Cache entry. Users don't need to release the obtained cache entry. This
222
// class will release the cache entry when it is destroyed.
223
class PageCacheHandle {
224
public:
225
123k
    PageCacheHandle() = default;
226
    PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
227
13.3k
            : _cache(cache), _handle(handle) {}
228
164k
    ~PageCacheHandle() {
229
164k
        if (_handle != nullptr) {
  Branch (229:13): [True: 13.3k, False: 151k]
230
13.3k
            _cache->release(_handle);
231
13.3k
        }
232
164k
    }
233
234
28.1k
    PageCacheHandle(PageCacheHandle&& other) noexcept {
235
        // we can use std::exchange if we switch c++14 on
236
28.1k
        std::swap(_cache, other._cache);
237
28.1k
        std::swap(_handle, other._handle);
238
28.1k
    }
239
240
56.1k
    PageCacheHandle& operator=(PageCacheHandle&& other) noexcept {
241
56.1k
        std::swap(_cache, other._cache);
242
56.1k
        std::swap(_handle, other._handle);
243
56.1k
        return *this;
244
56.1k
    }
245
246
0
    LRUCachePolicy* cache() const { return _cache; }
247
    Slice data() const;
248
249
    template <typename T>
250
6.23k
    T get() const {
251
6.23k
        static_assert(std::is_same<typename std::remove_cv<T>::type,
252
6.23k
                                   std::shared_ptr<typename T::element_type>>::value,
253
6.23k
                      "T must be a std::shared_ptr");
254
6.23k
        using ValueType = typename T::element_type; // Type that shared_ptr points to
255
6.23k
        MemoryTrackedPageWithPagePtr<ValueType>* page =
256
6.23k
                (MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle);
257
6.23k
        return page->data();
258
6.23k
    }
259
260
private:
261
    LRUCachePolicy* _cache = nullptr;
262
    Cache::Handle* _handle = nullptr;
263
264
    // Don't allow copy and assign
265
    DISALLOW_COPY_AND_ASSIGN(PageCacheHandle);
266
};
267
268
} // namespace doris