Coverage Report

Created: 2025-04-22 21:03

/root/doris/be/src/olap/page_cache.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <memory>
26
#include <string>
27
#include <utility>
28
29
#include "olap/lru_cache.h"
30
#include "runtime/memory/lru_cache_policy.h"
31
#include "runtime/memory/mem_tracker_limiter.h"
32
#include "util/slice.h"
33
#include "vec/common/allocator.h"
34
#include "vec/common/allocator_fwd.h"
35
36
namespace doris {
37
38
class PageCacheHandle;
39
40
template <typename T>
41
class MemoryTrackedPageBase : public LRUCacheValueBase {
42
public:
43
    MemoryTrackedPageBase() = default;
44
    MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
45
46
    MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete;
47
    MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete;
48
36.0k
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEED2Ev
Line
Count
Source
48
2
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseIPcED2Ev
Line
Count
Source
48
36.0k
    ~MemoryTrackedPageBase() = default;
49
50
53.9k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseIPcE4dataEv
Line
Count
Source
50
47.4k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEE4dataEv
Line
Count
Source
50
6.50k
    T data() { return _data; }
51
753
    size_t size() { return _size; }
52
53
protected:
54
    T _data;
55
    size_t _size = 0;
56
    std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator;
57
};
58
59
class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> {
60
public:
61
    MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
62
63
36.1k
    size_t capacity() { return this->_capacity; }
64
65
    ~MemoryTrackedPageWithPageEntity() override;
66
67
20.0k
    void reset_size(size_t n) {
68
20.0k
        DCHECK(n <= this->_capacity);
69
20.0k
        this->_size = n;
70
20.0k
    }
71
72
private:
73
    size_t _capacity = 0;
74
};
75
76
template <typename T>
77
class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> {
78
public:
79
    MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type);
80
81
    ~MemoryTrackedPageWithPagePtr() override;
82
83
4.78k
    void set_data(std::shared_ptr<T> data) { this->_data = data; }
84
};
85
86
using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>;
87
using DataPage = MemoryTrackedPageWithPageEntity;
88
89
// Wrapper around Cache, and used for cache page of column data
90
// in Segment.
91
// TODO(zc): We should add some metric to see cache hit/miss rate.
92
class StoragePageCache {
93
public:
94
    // The unique key identifying entries in the page cache.
95
    // Each cached page corresponds to a specific offset within
96
    // a file.
97
    //
98
    // TODO(zc): Now we use file name(std::string) as a part of
99
    // key, which is not efficient. We should make it better later
100
    struct CacheKey {
101
        CacheKey(std::string fname_, size_t fsize_, int64_t offset_)
102
27.9k
                : fname(std::move(fname_)), fsize(fsize_), offset(offset_) {}
103
        std::string fname;
104
        size_t fsize;
105
        int64_t offset;
106
107
        // Encode to a flat binary which can be used as LRUCache's key
108
23.1k
        std::string encode() const {
109
23.1k
            std::string key_buf(fname);
110
23.1k
            key_buf.append((char*)&fsize, sizeof(fsize));
111
23.1k
            key_buf.append((char*)&offset, sizeof(offset));
112
23.1k
            return key_buf;
113
23.1k
        }
114
    };
115
116
    class DataPageCache : public LRUCachePolicy {
117
    public:
118
        DataPageCache(size_t capacity, uint32_t num_shards)
119
                : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
120
                                 LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
121
4
                                 num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
122
4
        }
123
    };
124
125
    class IndexPageCache : public LRUCachePolicy {
126
    public:
127
        IndexPageCache(size_t capacity, uint32_t num_shards)
128
                : LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
129
                                 LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
130
3
                                 num_shards) {}
131
    };
132
133
    class PKIndexPageCache : public LRUCachePolicy {
134
    public:
135
        PKIndexPageCache(size_t capacity, uint32_t num_shards)
136
                : LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
137
                                 LRUCacheType::SIZE,
138
5
                                 config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {}
139
    };
140
141
    static constexpr uint32_t kDefaultNumShards = 16;
142
143
    // Create global instance of this class
144
    static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage,
145
                                                 int64_t pk_index_cache_capacity,
146
                                                 uint32_t num_shards = kDefaultNumShards);
147
148
    // Return global instance.
149
    // Client should call create_global_cache before.
150
31.5k
    static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); }
151
152
    StoragePageCache(size_t capacity, int32_t index_cache_percentage,
153
                     int64_t pk_index_cache_capacity, uint32_t num_shards);
154
155
    // Lookup the given page in the cache.
156
    //
157
    // If the page is found, the cache entry will be written into handle.
158
    // PageCacheHandle will release cache entry to cache when it
159
    // destructs.
160
    //
161
    // Cache type selection is determined by page_type argument
162
    //
163
    // Return true if entry is found, otherwise return false.
164
    bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type);
165
166
    // Insert a page with key into this cache.
167
    // Given handle will be set to valid reference.
168
    // This function is thread-safe, and when two clients insert two same key
169
    // concurrently, this function can assure that only one page is cached.
170
    // The in_memory page will have higher priority.
171
    void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle,
172
                segment_v2::PageTypePB page_type, bool in_memory = false);
173
174
    // Insert a std::share_ptr which points to a page into this cache.
175
    // size should be the size of the page instead of shared_ptr.
176
    // Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr
177
    // Since we are using std::shared_ptr, so lify cycle of the page is not managed by
178
    // this cache alone.
179
    // User could store a weak_ptr to the page, and lock it when needed.
180
    // See Segment::_get_segment_footer for example.
181
    template <typename T>
182
    void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle,
183
                segment_v2::PageTypePB page_type, bool in_memory = false);
184
185
10.7k
    std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) {
186
10.7k
        return _get_page_cache(page_type)->mem_tracker();
187
10.7k
    }
188
189
private:
190
    StoragePageCache();
191
192
    int32_t _index_cache_percentage = 0;
193
    std::unique_ptr<DataPageCache> _data_page_cache;
194
    std::unique_ptr<IndexPageCache> _index_page_cache;
195
    // Cache data for primary key index data page, seperated from data
196
    // page cache to make it for flexible. we need this cache When construct
197
    // delete bitmap in unique key with mow
198
    std::unique_ptr<PKIndexPageCache> _pk_index_page_cache;
199
200
33.9k
    LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) {
201
33.9k
        switch (page_type) {
202
17.8k
        case segment_v2::DATA_PAGE: {
203
17.8k
            return _data_page_cache.get();
204
0
        }
205
15.4k
        case segment_v2::INDEX_PAGE: {
206
15.4k
            return _index_page_cache.get();
207
0
        }
208
647
        case segment_v2::PRIMARY_KEY_INDEX_PAGE: {
209
647
            return _pk_index_page_cache.get();
210
0
        }
211
0
        default:
212
0
            throw Exception(Status::FatalError("get error type page cache"));
213
33.9k
        }
214
0
        throw Exception(Status::FatalError("__builtin_unreachable"));
215
33.9k
    }
216
};
217
218
// A handle for StoragePageCache entry. This class make it easy to handle
219
// Cache entry. Users don't need to release the obtained cache entry. This
220
// class will release the cache entry when it is destroyed.
221
class PageCacheHandle {
222
public:
223
100k
    PageCacheHandle() = default;
224
    PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
225
13.3k
            : _cache(cache), _handle(handle) {}
226
138k
    ~PageCacheHandle() {
227
138k
        if (_handle != nullptr) {
228
13.3k
            _cache->release(_handle);
229
13.3k
        }
230
138k
    }
231
232
24.4k
    PageCacheHandle(PageCacheHandle&& other) noexcept {
233
        // we can use std::exchange if we switch c++14 on
234
24.4k
        std::swap(_cache, other._cache);
235
24.4k
        std::swap(_handle, other._handle);
236
24.4k
    }
237
238
47.1k
    PageCacheHandle& operator=(PageCacheHandle&& other) noexcept {
239
47.1k
        std::swap(_cache, other._cache);
240
47.1k
        std::swap(_handle, other._handle);
241
47.1k
        return *this;
242
47.1k
    }
243
244
0
    LRUCachePolicy* cache() const { return _cache; }
245
    Slice data() const;
246
247
    template <typename T>
248
6.50k
    T get() const {
249
6.50k
        static_assert(std::is_same<typename std::remove_cv<T>::type,
250
6.50k
                                   std::shared_ptr<typename T::element_type>>::value,
251
6.50k
                      "T must be a std::shared_ptr");
252
6.50k
        using ValueType = typename T::element_type; // Type that shared_ptr points to
253
6.50k
        MemoryTrackedPageWithPagePtr<ValueType>* page =
254
6.50k
                (MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle);
255
6.50k
        return page->data();
256
6.50k
    }
257
258
private:
259
    LRUCachePolicy* _cache = nullptr;
260
    Cache::Handle* _handle = nullptr;
261
262
    // Don't allow copy and assign
263
    DISALLOW_COPY_AND_ASSIGN(PageCacheHandle);
264
};
265
266
} // namespace doris