/root/doris/be/src/olap/segment_loader.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <butil/macros.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | #include <stdint.h> |
24 | | |
25 | | #include <atomic> |
26 | | #include <memory> |
27 | | #include <ostream> |
28 | | #include <string> |
29 | | #include <utility> |
30 | | #include <vector> |
31 | | |
32 | | #include "common/status.h" |
33 | | #include "olap/lru_cache.h" |
34 | | #include "olap/olap_common.h" // for rowset id |
35 | | #include "olap/rowset/segment_v2/segment.h" |
36 | | #include "runtime/memory/lru_cache_policy.h" |
37 | | #include "util/time.h" |
38 | | |
39 | | namespace doris { |
40 | | |
41 | | class SegmentCacheHandle; |
42 | | class BetaRowset; |
43 | | |
44 | | // SegmentLoader is used to load the Segment of BetaRowset. |
45 | | // An LRUCache is encapsulated inside it, which is used to cache the opened segments. |
46 | | // The caller should use the following method to load and obtain |
47 | | // the segments of a specified rowset: |
48 | | // |
49 | | // SegmentCacheHandle cache_handle; |
50 | | // RETURN_IF_ERROR(SegmentCache::instance()->load_segments(_rowset, &cache_handle)); |
51 | | // for (auto& seg_ptr : cache_handle.value()->segments) { |
52 | | // ... visit segment ... |
53 | | // } |
54 | | // |
55 | | // Make sure that cache_handle is valid during the segment usage period. |
56 | | using BetaRowsetSharedPtr = std::shared_ptr<BetaRowset>; |
57 | | |
58 | | class SegmentCache : public LRUCachePolicy { |
59 | | public: |
60 | | using LRUCachePolicy::insert; |
61 | | // The cache key or segment lru cache |
62 | | struct CacheKey { |
63 | | CacheKey(RowsetId rowset_id_, int64_t segment_id_) |
64 | 4.83k | : rowset_id(rowset_id_), segment_id(segment_id_) {} |
65 | | RowsetId rowset_id; |
66 | | int64_t segment_id; |
67 | | |
68 | | // Encode to a flat binary which can be used as LRUCache's key |
69 | 9.02k | [[nodiscard]] std::string encode() const { |
70 | 9.02k | return rowset_id.to_string() + std::to_string(segment_id); |
71 | 9.02k | } |
72 | | }; |
73 | | |
74 | | // The cache value of segment lru cache. |
75 | | // Holding all opened segments of a rowset. |
76 | | class CacheValue : public LRUCacheValueBase { |
77 | | public: |
78 | 4.18k | ~CacheValue() override { segment.reset(); } |
79 | | |
80 | | segment_v2::SegmentSharedPtr segment; |
81 | | }; |
82 | | |
83 | | SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit) |
84 | | : LRUCachePolicy(CachePolicy::CacheType::SEGMENT_CACHE, memory_bytes_limit, |
85 | | LRUCacheType::SIZE, config::tablet_rowset_stale_sweep_time_sec, |
86 | 1 | DEFAULT_LRU_CACHE_NUM_SHARDS * 2, segment_num_limit) {} |
87 | | |
88 | | // Lookup the given segment in the cache. |
89 | | // If the segment is found, the cache entry will be written into handle. |
90 | | // Return true if entry is found, otherwise return false. |
91 | | bool lookup(const SegmentCache::CacheKey& key, SegmentCacheHandle* handle); |
92 | | |
93 | | // Insert a cache entry by key. |
94 | | // And the cache entry will be returned in handle. |
95 | | // This function is thread-safe. |
96 | | void insert(const SegmentCache::CacheKey& key, CacheValue& value, SegmentCacheHandle* handle); |
97 | | |
98 | | void erase(const SegmentCache::CacheKey& key); |
99 | | }; |
100 | | |
101 | | class SegmentLoader { |
102 | | public: |
103 | | static SegmentLoader* instance(); |
104 | | |
105 | | // Create global instance of this class. |
106 | | // "capacity" is the capacity of lru cache. |
107 | | // TODO: Currently we use the number of rowset as the cache capacity. |
108 | | // That is, the limit of cache is the number of rowset. |
109 | | // This is because currently we cannot accurately estimate the memory occupied by a segment. |
110 | | // After the estimation of segment memory usage is provided later, it is recommended |
111 | | // to use Memory as the capacity limit of the cache. |
112 | | |
113 | 1 | SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) { |
114 | 1 | _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, segment_num_count); |
115 | 1 | } |
116 | | |
117 | | // Load segments of "rowset", return the "cache_handle" which contains segments. |
118 | | // If use_cache is true, it will be loaded from _cache. |
119 | | Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, |
120 | | bool use_cache = false, bool need_load_pk_index_and_bf = false, |
121 | | OlapReaderStatistics* index_load_stats = nullptr); |
122 | | |
123 | | void erase_segment(const SegmentCache::CacheKey& key); |
124 | | |
125 | | void erase_segments(const RowsetId& rowset_id, int64_t num_segments); |
126 | | |
127 | | // Just used for BE UT |
128 | 2 | int64_t cache_mem_usage() const { return _cache_mem_usage; } |
129 | | |
130 | | private: |
131 | | SegmentLoader(); |
132 | | std::unique_ptr<SegmentCache> _segment_cache; |
133 | | // Just used for BE UT |
134 | | int64_t _cache_mem_usage = 0; |
135 | | }; |
136 | | |
137 | | // A handle for a single rowset from segment lru cache. |
138 | | // The handle can ensure that the segment is valid |
139 | | // and will not be closed while the holder of the handle is accessing the segment. |
140 | | // The handle will automatically release the cache entry when it is destroyed. |
141 | | // So the caller need to make sure the handle is valid in lifecycle. |
142 | | class SegmentCacheHandle { |
143 | | public: |
144 | 528 | SegmentCacheHandle() = default; |
145 | 528 | ~SegmentCacheHandle() = default; |
146 | | |
147 | 4.18k | void push_segment(LRUCachePolicy* cache, Cache::Handle* handle) { |
148 | 4.18k | segments.push_back(((SegmentCache::CacheValue*)cache->value(handle))->segment); |
149 | 4.18k | cache->release(handle); |
150 | 4.18k | } |
151 | | |
152 | 645 | void push_segment(segment_v2::SegmentSharedPtr segment) { |
153 | 645 | segments.push_back(std::move(segment)); |
154 | 645 | } |
155 | | |
156 | 530 | std::vector<segment_v2::SegmentSharedPtr>& get_segments() { return segments; } |
157 | | |
158 | 529 | [[nodiscard]] bool is_inited() const { return _init; } |
159 | | |
160 | 528 | void set_inited() { |
161 | 528 | DCHECK(!_init); |
162 | 528 | _init = true; |
163 | 528 | } |
164 | | |
165 | 0 | segment_v2::SegmentSharedPtr pop_unhealthy_segment() { |
166 | 0 | if (segments.empty()) { |
167 | 0 | return nullptr; |
168 | 0 | } |
169 | 0 | segment_v2::SegmentSharedPtr last_segment = segments.back(); |
170 | 0 | if (last_segment->healthy_status().ok()) { |
171 | 0 | return nullptr; |
172 | 0 | } |
173 | 0 | segments.pop_back(); |
174 | 0 | return last_segment; |
175 | 0 | } |
176 | | |
177 | | private: |
178 | | std::vector<segment_v2::SegmentSharedPtr> segments; |
179 | | bool _init {false}; |
180 | | |
181 | | // Don't allow copy and assign |
182 | | DISALLOW_COPY_AND_ASSIGN(SegmentCacheHandle); |
183 | | }; |
184 | | |
185 | | } // namespace doris |