Coverage Report

Created: 2025-04-27 02:50

/root/doris/be/src/olap/page_cache.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/macros.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
#include <stddef.h>
23
#include <stdint.h>
24
25
#include <memory>
26
#include <string>
27
#include <utility>
28
29
#include "olap/lru_cache.h"
30
#include "runtime/memory/lru_cache_policy.h"
31
#include "runtime/memory/mem_tracker_limiter.h"
32
#include "util/slice.h"
33
#include "vec/common/allocator.h"
34
#include "vec/common/allocator_fwd.h"
35
36
namespace doris {
37
38
class PageCacheHandle;
39
40
template <typename T>
41
class MemoryTrackedPageBase : public LRUCacheValueBase {
42
public:
43
    MemoryTrackedPageBase() = default;
44
    MemoryTrackedPageBase(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
45
46
    MemoryTrackedPageBase(const MemoryTrackedPageBase&) = delete;
47
    MemoryTrackedPageBase& operator=(const MemoryTrackedPageBase&) = delete;
48
72.0k
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEED2Ev
Line
Count
Source
48
4
    ~MemoryTrackedPageBase() = default;
_ZN5doris21MemoryTrackedPageBaseIPcED2Ev
Line
Count
Source
48
72.0k
    ~MemoryTrackedPageBase() = default;
49
50
107k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseIPcE4dataEv
Line
Count
Source
50
94.8k
    T data() { return _data; }
_ZN5doris21MemoryTrackedPageBaseISt10shared_ptrINS_10segment_v215SegmentFooterPBEEE4dataEv
Line
Count
Source
50
13.0k
    T data() { return _data; }
51
1.50k
    size_t size() { return _size; }
52
53
protected:
54
    T _data;
55
    size_t _size = 0;
56
    std::shared_ptr<MemTrackerLimiter> _mem_tracker_by_allocator;
57
};
58
59
class MemoryTrackedPageWithPageEntity : Allocator<false>, public MemoryTrackedPageBase<char*> {
60
public:
61
    MemoryTrackedPageWithPageEntity(size_t b, bool use_cache, segment_v2::PageTypePB page_type);
62
63
72.2k
    size_t capacity() { return this->_capacity; }
64
65
    ~MemoryTrackedPageWithPageEntity() override;
66
67
40.0k
    void reset_size(size_t n) {
68
40.0k
        DCHECK(n <= this->_capacity);
69
40.0k
        this->_size = n;
70
40.0k
    }
71
72
private:
73
    size_t _capacity = 0;
74
};
75
76
template <typename T>
77
class MemoryTrackedPageWithPagePtr : public MemoryTrackedPageBase<std::shared_ptr<T>> {
78
public:
79
    MemoryTrackedPageWithPagePtr(size_t b, segment_v2::PageTypePB page_type);
80
81
    ~MemoryTrackedPageWithPagePtr() override;
82
83
9.57k
    void set_data(std::shared_ptr<T> data) { this->_data = data; }
84
};
85
86
using SemgnetFooterPBPage = MemoryTrackedPageWithPagePtr<segment_v2::SegmentFooterPB>;
87
using DataPage = MemoryTrackedPageWithPageEntity;
88
89
// Wrapper around Cache, and used for cache page of column data
90
// in Segment.
91
// TODO(zc): We should add some metric to see cache hit/miss rate.
92
class StoragePageCache {
93
public:
94
    // The unique key identifying entries in the page cache.
95
    // Each cached page corresponds to a specific offset within
96
    // a file.
97
    //
98
    // TODO(zc): Now we use file name(std::string) as a part of
99
    // key, which is not efficient. We should make it better later
100
    struct CacheKey {
101
        CacheKey(std::string fname_, size_t fsize_, int64_t offset_)
102
55.8k
                : fname(std::move(fname_)), fsize(fsize_), offset(offset_) {}
103
        std::string fname;
104
        size_t fsize;
105
        int64_t offset;
106
107
        // Encode to a flat binary which can be used as LRUCache's key
108
46.3k
        std::string encode() const {
109
46.3k
            std::string key_buf(fname);
110
46.3k
            key_buf.append((char*)&fsize, sizeof(fsize));
111
46.3k
            key_buf.append((char*)&offset, sizeof(offset));
112
46.3k
            return key_buf;
113
46.3k
        }
114
    };
115
116
    class DataPageCache : public LRUCachePolicy {
117
    public:
118
        DataPageCache(size_t capacity, uint32_t num_shards)
119
                : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity,
120
                                 LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec,
121
8
                                 num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) {
122
8
        }
123
    };
124
125
    class IndexPageCache : public LRUCachePolicy {
126
    public:
127
        IndexPageCache(size_t capacity, uint32_t num_shards)
128
                : LRUCachePolicy(CachePolicy::CacheType::INDEXPAGE_CACHE, capacity,
129
                                 LRUCacheType::SIZE, config::index_page_cache_stale_sweep_time_sec,
130
6
                                 num_shards) {}
131
    };
132
133
    class PKIndexPageCache : public LRUCachePolicy {
134
    public:
135
        PKIndexPageCache(size_t capacity, uint32_t num_shards)
136
                : LRUCachePolicy(CachePolicy::CacheType::PK_INDEX_PAGE_CACHE, capacity,
137
                                 LRUCacheType::SIZE,
138
10
                                 config::pk_index_page_cache_stale_sweep_time_sec, num_shards) {}
139
    };
140
141
    static constexpr uint32_t kDefaultNumShards = 16;
142
143
    // Create global instance of this class
144
    static StoragePageCache* create_global_cache(size_t capacity, int32_t index_cache_percentage,
145
                                                 int64_t pk_index_cache_capacity,
146
                                                 uint32_t num_shards = kDefaultNumShards);
147
148
    // Return global instance.
149
    // Client should call create_global_cache before.
150
63.1k
    static StoragePageCache* instance() { return ExecEnv::GetInstance()->get_storage_page_cache(); }
151
152
    StoragePageCache(size_t capacity, int32_t index_cache_percentage,
153
                     int64_t pk_index_cache_capacity, uint32_t num_shards);
154
155
    // Lookup the given page in the cache.
156
    //
157
    // If the page is found, the cache entry will be written into handle.
158
    // PageCacheHandle will release cache entry to cache when it
159
    // destructs.
160
    //
161
    // Cache type selection is determined by page_type argument
162
    //
163
    // Return true if entry is found, otherwise return false.
164
    bool lookup(const CacheKey& key, PageCacheHandle* handle, segment_v2::PageTypePB page_type);
165
166
    // Insert a page with key into this cache.
167
    // Given handle will be set to valid reference.
168
    // This function is thread-safe, and when two clients insert two same key
169
    // concurrently, this function can assure that only one page is cached.
170
    // The in_memory page will have higher priority.
171
    void insert(const CacheKey& key, DataPage* data, PageCacheHandle* handle,
172
                segment_v2::PageTypePB page_type, bool in_memory = false);
173
174
    // Insert a std::share_ptr which points to a page into this cache.
175
    // size should be the size of the page instead of shared_ptr.
176
    // Internal implementation will wrap shared_ptr with MemoryTrackedPageWithPagePtr
177
    // Since we are using std::shared_ptr, so lify cycle of the page is not managed by
178
    // this cache alone.
179
    // User could store a weak_ptr to the page, and lock it when needed.
180
    // See Segment::_get_segment_footer for example.
181
    template <typename T>
182
    void insert(const CacheKey& key, T data, size_t size, PageCacheHandle* handle,
183
                segment_v2::PageTypePB page_type, bool in_memory = false);
184
185
21.5k
    std::shared_ptr<MemTrackerLimiter> mem_tracker(segment_v2::PageTypePB page_type) {
186
21.5k
        return _get_page_cache(page_type)->mem_tracker();
187
21.5k
    }
188
189
private:
190
    StoragePageCache();
191
192
    int32_t _index_cache_percentage = 0;
193
    std::unique_ptr<DataPageCache> _data_page_cache;
194
    std::unique_ptr<IndexPageCache> _index_page_cache;
195
    // Cache data for primary key index data page, seperated from data
196
    // page cache to make it for flexible. we need this cache When construct
197
    // delete bitmap in unique key with mow
198
    std::unique_ptr<PKIndexPageCache> _pk_index_page_cache;
199
200
67.9k
    LRUCachePolicy* _get_page_cache(segment_v2::PageTypePB page_type) {
201
67.9k
        switch (page_type) {
202
35.6k
        case segment_v2::DATA_PAGE: {
203
35.6k
            return _data_page_cache.get();
204
0
        }
205
30.9k
        case segment_v2::INDEX_PAGE: {
206
30.9k
            return _index_page_cache.get();
207
0
        }
208
1.29k
        case segment_v2::PRIMARY_KEY_INDEX_PAGE: {
209
1.29k
            return _pk_index_page_cache.get();
210
0
        }
211
0
        default:
212
0
            throw Exception(Status::FatalError("get error type page cache"));
213
67.9k
        }
214
0
        throw Exception(Status::FatalError("__builtin_unreachable"));
215
67.9k
    }
216
};
217
218
// A handle for StoragePageCache entry. This class make it easy to handle
219
// Cache entry. Users don't need to release the obtained cache entry. This
220
// class will release the cache entry when it is destroyed.
221
class PageCacheHandle {
222
public:
223
201k
    PageCacheHandle() = default;
224
    PageCacheHandle(LRUCachePolicy* cache, Cache::Handle* handle)
225
26.6k
            : _cache(cache), _handle(handle) {}
226
276k
    ~PageCacheHandle() {
227
276k
        if (_handle != nullptr) {
228
26.6k
            _cache->release(_handle);
229
26.6k
        }
230
276k
    }
231
232
48.8k
    PageCacheHandle(PageCacheHandle&& other) noexcept {
233
        // we can use std::exchange if we switch c++14 on
234
48.8k
        std::swap(_cache, other._cache);
235
48.8k
        std::swap(_handle, other._handle);
236
48.8k
    }
237
238
94.3k
    PageCacheHandle& operator=(PageCacheHandle&& other) noexcept {
239
94.3k
        std::swap(_cache, other._cache);
240
94.3k
        std::swap(_handle, other._handle);
241
94.3k
        return *this;
242
94.3k
    }
243
244
0
    LRUCachePolicy* cache() const { return _cache; }
245
    Slice data() const;
246
247
    template <typename T>
248
13.0k
    T get() const {
249
13.0k
        static_assert(std::is_same<typename std::remove_cv<T>::type,
250
13.0k
                                   std::shared_ptr<typename T::element_type>>::value,
251
13.0k
                      "T must be a std::shared_ptr");
252
13.0k
        using ValueType = typename T::element_type; // Type that shared_ptr points to
253
13.0k
        MemoryTrackedPageWithPagePtr<ValueType>* page =
254
13.0k
                (MemoryTrackedPageWithPagePtr<ValueType>*)_cache->value(_handle);
255
13.0k
        return page->data();
256
13.0k
    }
257
258
private:
259
    LRUCachePolicy* _cache = nullptr;
260
    Cache::Handle* _handle = nullptr;
261
262
    // Don't allow copy and assign
263
    DISALLOW_COPY_AND_ASSIGN(PageCacheHandle);
264
};
265
266
} // namespace doris