/root/doris/be/src/olap/segment_loader.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <atomic> |
26 | | #include <memory> |
27 | | #include <ostream> |
28 | | #include <string> |
29 | | #include <utility> |
30 | | #include <vector> |
31 | | |
32 | | #include "common/cast_set.h" |
33 | | #include "common/status.h" |
34 | | #include "olap/lru_cache.h" |
35 | | #include "olap/olap_common.h" // for rowset id |
36 | | #include "olap/rowset/segment_v2/segment.h" |
37 | | #include "runtime/memory/lru_cache_policy.h" |
38 | | #include "util/time.h" |
39 | | |
40 | | namespace doris { |
41 | | #include "common/compile_check_begin.h" |
42 | | |
43 | | class SegmentCacheHandle; |
44 | | class BetaRowset; |
45 | | |
46 | | // SegmentLoader is used to load the Segment of BetaRowset. |
47 | | // An LRUCache is encapsulated inside it, which is used to cache the opened segments. |
48 | | // The caller should use the following method to load and obtain |
49 | | // the segments of a specified rowset: |
50 | | // |
51 | | // SegmentCacheHandle cache_handle; |
52 | | // RETURN_IF_ERROR(SegmentCache::instance()->load_segments(_rowset, &cache_handle)); |
53 | | // for (auto& seg_ptr : cache_handle.value()->segments) { |
54 | | // ... visit segment ... |
55 | | // } |
56 | | // |
57 | | // Make sure that cache_handle is valid during the segment usage period. |
58 | | using BetaRowsetSharedPtr = std::shared_ptr<BetaRowset>; |
59 | | |
60 | | class SegmentCache : public LRUCachePolicy { |
61 | | public: |
62 | | using LRUCachePolicy::insert; |
63 | | // The cache key or segment lru cache |
64 | | struct CacheKey { |
65 | | CacheKey(RowsetId rowset_id_, int64_t segment_id_) |
66 | 6.34k | : rowset_id(rowset_id_), segment_id(segment_id_) {} |
67 | | RowsetId rowset_id; |
68 | | int64_t segment_id; |
69 | | |
70 | | // Encode to a flat binary which can be used as LRUCache's key |
71 | 10.5k | [[nodiscard]] std::string encode() const { |
72 | 10.5k | return rowset_id.to_string() + std::to_string(segment_id); |
73 | 10.5k | } |
74 | | }; |
75 | | |
76 | | // The cache value of segment lru cache. |
77 | | // Holding all opened segments of a rowset. |
78 | | class CacheValue : public LRUCacheValueBase { |
79 | | public: |
80 | 4.18k | CacheValue(segment_v2::SegmentSharedPtr segment_) : segment(std::move(segment_)) {} |
81 | | |
82 | | const segment_v2::SegmentSharedPtr segment; |
83 | | }; |
84 | | |
85 | | SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit) |
86 | 1 | : LRUCachePolicy( |
87 | 1 | CachePolicy::CacheType::SEGMENT_CACHE, memory_bytes_limit, LRUCacheType::SIZE, |
88 | 1 | config::tablet_rowset_stale_sweep_time_sec, DEFAULT_LRU_CACHE_NUM_SHARDS * 2, |
89 | 1 | cast_set<uint32_t>(segment_num_limit), config::enable_segment_cache_prune) {} |
90 | | |
91 | | // Lookup the given segment in the cache. |
92 | | // If the segment is found, the cache entry will be written into handle. |
93 | | // Return true if entry is found, otherwise return false. |
94 | | bool lookup(const SegmentCache::CacheKey& key, SegmentCacheHandle* handle); |
95 | | |
96 | | // Insert a cache entry by key. |
97 | | // And the cache entry will be returned in handle. |
98 | | // This function is thread-safe. |
99 | | void insert(const SegmentCache::CacheKey& key, CacheValue& value, SegmentCacheHandle* handle); |
100 | | |
101 | | void erase(const SegmentCache::CacheKey& key); |
102 | | }; |
103 | | |
104 | | class SegmentLoader { |
105 | | public: |
106 | | static SegmentLoader* instance(); |
107 | | |
108 | | // Create global instance of this class. |
109 | | // "capacity" is the capacity of lru cache. |
110 | | // TODO: Currently we use the number of rowset as the cache capacity. |
111 | | // That is, the limit of cache is the number of rowset. |
112 | | // This is because currently we cannot accurately estimate the memory occupied by a segment. |
113 | | // After the estimation of segment memory usage is provided later, it is recommended |
114 | | // to use Memory as the capacity limit of the cache. |
115 | | |
116 | 1 | SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) { |
117 | 1 | _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, segment_num_count); |
118 | 1 | } |
119 | | |
120 | | // Load segments of "rowset", return the "cache_handle" which contains segments. |
121 | | // If use_cache is true, it will be loaded from _cache. |
122 | | Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, |
123 | | bool use_cache = false, bool need_load_pk_index_and_bf = false, |
124 | | OlapReaderStatistics* index_load_stats = nullptr); |
125 | | |
126 | | // Load one segment of "rowset", return the "cache_handle" which contains segments. |
127 | | // If use_cache is true, it will be loaded from _cache. |
128 | | Status load_segment(const BetaRowsetSharedPtr& rowset, int64_t segment_id, |
129 | | SegmentCacheHandle* cache_handle, bool use_cache = false, |
130 | | bool need_load_pk_index_and_bf = false, |
131 | | OlapReaderStatistics* index_load_stats = nullptr); |
132 | | |
133 | | void erase_segment(const SegmentCache::CacheKey& key); |
134 | | |
135 | | void erase_segments(const RowsetId& rowset_id, int64_t num_segments); |
136 | | |
137 | 2 | int64_t cache_mem_usage() const { |
138 | 2 | #ifdef BE_TEST |
139 | 2 | return _cache_mem_usage; |
140 | | #else |
141 | | return _segment_cache->value_mem_consumption(); |
142 | | #endif |
143 | 2 | } |
144 | | |
145 | | private: |
146 | | SegmentLoader(); |
147 | | std::unique_ptr<SegmentCache> _segment_cache; |
148 | | // Just used for BE UT |
149 | | int64_t _cache_mem_usage = 0; |
150 | | }; |
151 | | |
152 | | // A handle for a single rowset from segment lru cache. |
153 | | // The handle can ensure that the segment is valid |
154 | | // and will not be closed while the holder of the handle is accessing the segment. |
155 | | // The handle will automatically release the cache entry when it is destroyed. |
156 | | // So the caller need to make sure the handle is valid in lifecycle. |
157 | | class SegmentCacheHandle { |
158 | | public: |
159 | 6.33k | SegmentCacheHandle() = default; |
160 | 6.33k | ~SegmentCacheHandle() = default; |
161 | | |
162 | 4.18k | void push_segment(LRUCachePolicy* cache, Cache::Handle* handle) { |
163 | 4.18k | segments.push_back(((SegmentCache::CacheValue*)cache->value(handle))->segment); |
164 | 4.18k | cache->release(handle); |
165 | 4.18k | } |
166 | | |
167 | 2.15k | void push_segment(segment_v2::SegmentSharedPtr segment) { |
168 | 2.15k | segments.push_back(std::move(segment)); |
169 | 2.15k | } |
170 | | |
171 | 6.33k | std::vector<segment_v2::SegmentSharedPtr>& get_segments() { return segments; } |
172 | | |
173 | 17 | [[nodiscard]] bool is_inited() const { return _init; } |
174 | | |
175 | 16 | void set_inited() { |
176 | 16 | DCHECK(!_init); |
177 | 16 | _init = true; |
178 | 16 | } |
179 | | |
180 | 0 | segment_v2::SegmentSharedPtr pop_unhealthy_segment() { |
181 | 0 | if (segments.empty()) { |
182 | 0 | return nullptr; |
183 | 0 | } |
184 | 0 | segment_v2::SegmentSharedPtr last_segment = segments.back(); |
185 | 0 | if (last_segment->healthy_status().ok()) { |
186 | 0 | return nullptr; |
187 | 0 | } |
188 | 0 | segments.pop_back(); |
189 | 0 | return last_segment; |
190 | 0 | } |
191 | | |
192 | | private: |
193 | | std::vector<segment_v2::SegmentSharedPtr> segments; |
194 | | bool _init {false}; |
195 | | |
196 | | // Don't allow copy and assign |
197 | | DISALLOW_COPY_AND_ASSIGN(SegmentCacheHandle); |
198 | | }; |
199 | | |
200 | | #include "common/compile_check_end.h" |
201 | | } // namespace doris |