be/src/io/cache/file_cache_common.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | #include <cstdint> |
23 | | #include <vector> |
24 | | |
25 | | #include "core/uint128.h" |
26 | | #include "io/io_common.h" |
27 | | |
28 | | namespace doris::io { |
29 | | |
30 | | inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 * 1024; |
31 | | inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 1024; |
32 | | inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40; |
33 | | inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5; |
34 | | inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5; |
35 | | inline static constexpr size_t DEFAULT_TTL_PERCENT = 50; |
36 | | |
37 | | using uint128_t = UInt128; |
38 | | |
39 | | enum FileCacheType { |
40 | | INDEX = 2, |
41 | | NORMAL = 1, |
42 | | DISPOSABLE = 0, |
43 | | TTL = 3, |
44 | | }; |
45 | | std::string cache_type_to_surfix(FileCacheType type); |
46 | | FileCacheType surfix_to_cache_type(const std::string& str); |
47 | | |
48 | | FileCacheType string_to_cache_type(const std::string& str); |
49 | | std::string cache_type_to_string(FileCacheType type); |
50 | | |
51 | | struct UInt128Wrapper { |
52 | | uint128_t value_; |
53 | | [[nodiscard]] std::string to_string() const; |
54 | | |
55 | | UInt128Wrapper() = default; |
56 | 1.98M | explicit UInt128Wrapper(const uint128_t& value) : value_(value) {} |
57 | | |
58 | 9.79M | bool operator==(const UInt128Wrapper& other) const { return value_ == other.value_; } |
59 | | |
60 | 553k | uint64_t high() const { return static_cast<uint64_t>(value_ >> 64); } |
61 | 553k | uint64_t low() const { return static_cast<uint64_t>(value_); } |
62 | | |
63 | | friend std::ostream& operator<<(std::ostream& os, const UInt128Wrapper& wrapper) { |
64 | | os << "UInt128Wrapper(" << wrapper.high() << ", " << wrapper.low() << ")"; |
65 | | return os; |
66 | | } |
67 | | }; |
68 | | |
69 | | struct ReadStatistics { |
70 | | bool hit_cache = true; |
71 | | bool from_peer_cache = false; |
72 | | bool skip_cache = false; |
73 | | int64_t bytes_read = 0; |
74 | | int64_t bytes_write_into_file_cache = 0; |
75 | | int64_t remote_read_timer = 0; |
76 | | int64_t peer_read_timer = 0; |
77 | | int64_t remote_wait_timer = 0; // wait for other downloader |
78 | | int64_t local_read_timer = 0; |
79 | | int64_t local_write_timer = 0; |
80 | | int64_t read_cache_file_directly_timer = 0; |
81 | | int64_t cache_get_or_set_timer = 0; |
82 | | int64_t lock_wait_timer = 0; |
83 | | int64_t get_timer = 0; |
84 | | int64_t set_timer = 0; |
85 | | }; |
86 | | |
87 | | class BlockFileCache; |
88 | | struct FileBlocksHolder; |
89 | | using FileBlocksHolderPtr = std::unique_ptr<FileBlocksHolder>; |
90 | | |
91 | | struct FileCacheAllocatorBuilder { |
92 | | bool _is_cold_data; |
93 | | uint64_t _expiration_time; |
94 | | UInt128Wrapper _cache_hash; |
95 | | BlockFileCache* _cache; // Only one ref, the lifetime is owned by FileCache |
96 | | FileBlocksHolderPtr allocate_cache_holder(size_t offset, size_t size, int64_t tablet_id) const; |
97 | | }; |
98 | | |
99 | | struct KeyHash { |
100 | 13.1M | std::size_t operator()(const UInt128Wrapper& w) const { |
101 | 13.1M | return util_hash::HashLen16(w.value_.low(), w.value_.high()); |
102 | 13.1M | } |
103 | | }; |
104 | | |
105 | | using AccessKeyAndOffset = std::pair<UInt128Wrapper, size_t>; |
106 | | struct KeyAndOffsetHash { |
107 | 2.40M | std::size_t operator()(const AccessKeyAndOffset& key) const { |
108 | 2.40M | return KeyHash()(key.first) ^ std::hash<uint64_t>()(key.second); |
109 | 2.40M | } |
110 | | }; |
111 | | |
112 | | struct KeyMeta { |
113 | | uint64_t expiration_time; // absolute time |
114 | | FileCacheType type; |
115 | | int64_t tablet_id {0}; |
116 | | }; |
117 | | |
118 | | struct FileCacheKey { |
119 | | UInt128Wrapper hash; |
120 | | size_t offset; |
121 | | KeyMeta meta; |
122 | | }; |
123 | | |
124 | | struct FileCacheSettings { |
125 | | size_t capacity {0}; |
126 | | size_t disposable_queue_size {0}; |
127 | | size_t disposable_queue_elements {0}; |
128 | | size_t index_queue_size {0}; |
129 | | size_t index_queue_elements {0}; |
130 | | size_t query_queue_size {0}; |
131 | | size_t query_queue_elements {0}; |
132 | | size_t ttl_queue_size {0}; |
133 | | size_t ttl_queue_elements {0}; |
134 | | size_t max_file_block_size {0}; |
135 | | size_t max_query_cache_size {0}; |
136 | | std::string storage; |
137 | | |
138 | | // to string |
139 | | std::string to_string() const; |
140 | | }; |
141 | | |
142 | | FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size, |
143 | | size_t normal_percent = DEFAULT_NORMAL_PERCENT, |
144 | | size_t disposable_percent = DEFAULT_DISPOSABLE_PERCENT, |
145 | | size_t index_percent = DEFAULT_INDEX_PERCENT, |
146 | | size_t ttl_percent = DEFAULT_TTL_PERCENT, |
147 | | const std::string& storage = "disk"); |
148 | | |
149 | | struct CacheContext { |
150 | 2.94M | CacheContext(const IOContext* io_context) { |
151 | 2.94M | if (io_context->expiration_time != 0) { |
152 | 2 | cache_type = FileCacheType::TTL; |
153 | 2 | expiration_time = io_context->expiration_time; |
154 | 2.94M | } else if (io_context->is_index_data) { |
155 | 857k | cache_type = FileCacheType::INDEX; |
156 | 2.08M | } else if (io_context->is_disposable) { |
157 | 945k | cache_type = FileCacheType::DISPOSABLE; |
158 | 1.13M | } else { |
159 | 1.13M | cache_type = FileCacheType::NORMAL; |
160 | 1.13M | } |
161 | 2.94M | query_id = io_context->query_id ? *io_context->query_id : TUniqueId(); |
162 | 2.94M | is_warmup = io_context->is_warmup; |
163 | 2.94M | } |
164 | 67.2k | CacheContext() = default; |
165 | | bool operator==(const CacheContext& rhs) const { |
166 | | return query_id == rhs.query_id && cache_type == rhs.cache_type && |
167 | | expiration_time == rhs.expiration_time && is_cold_data == rhs.is_cold_data; |
168 | | } |
169 | | TUniqueId query_id; |
170 | | FileCacheType cache_type; |
171 | | int64_t expiration_time {0}; |
172 | | bool is_cold_data {false}; |
173 | | ReadStatistics* stats; |
174 | | bool is_warmup {false}; |
175 | | int64_t tablet_id {0}; |
176 | | }; |
177 | | |
178 | | template <class Lock> |
179 | | concept IsXLock = std::same_as<Lock, std::lock_guard<std::mutex>> || |
180 | | std::same_as<Lock, std::unique_lock<std::mutex>>; |
181 | | |
182 | | class LRUQueue { |
183 | | public: |
184 | 1.34k | LRUQueue() = default; |
185 | | LRUQueue(size_t max_size, size_t max_element_size, int64_t hot_data_interval) |
186 | 661 | : max_size(max_size), |
187 | 661 | max_element_size(max_element_size), |
188 | 661 | hot_data_interval(hot_data_interval) {} |
189 | | |
190 | | struct HashFileKeyAndOffset { |
191 | 3.87M | std::size_t operator()(const std::pair<UInt128Wrapper, size_t>& pair) const { |
192 | 3.87M | return KeyHash()(pair.first) + pair.second; |
193 | 3.87M | } |
194 | | }; |
195 | | |
196 | | struct FileKeyAndOffset { |
197 | | UInt128Wrapper hash; |
198 | | size_t offset; |
199 | | size_t size; |
200 | | |
201 | | FileKeyAndOffset(const UInt128Wrapper& hash, size_t offset, size_t size) |
202 | 709k | : hash(hash), offset(offset), size(size) {} |
203 | | }; |
204 | | |
205 | | using Iterator = typename std::list<FileKeyAndOffset>::iterator; |
206 | | |
207 | 22.5k | size_t get_max_size() const { return max_size; } |
208 | 692 | size_t get_max_element_size() const { return max_element_size; } |
209 | | |
210 | | template <class T> |
211 | | requires IsXLock<T> |
212 | 39.0k | size_t get_capacity(T& /* cache_lock */) const { |
213 | 39.0k | return cache_size; |
214 | 39.0k | } |
215 | | |
216 | 689 | size_t get_capacity_unsafe() const { return cache_size; } |
217 | | |
218 | 716 | size_t get_elements_num_unsafe() const { return queue.size(); } |
219 | | |
220 | 8.32k | size_t get_elements_num(std::lock_guard<std::mutex>& /* cache_lock */) const { |
221 | 8.32k | return queue.size(); |
222 | 8.32k | } |
223 | | |
224 | | Iterator add(const UInt128Wrapper& hash, size_t offset, size_t size, |
225 | | std::lock_guard<std::mutex>& cache_lock); |
226 | | template <class T> |
227 | | requires IsXLock<T> |
228 | 699k | void remove(Iterator queue_it, T& /* cache_lock */) { |
229 | 699k | cache_size -= queue_it->size; |
230 | 699k | map.erase(std::make_pair(queue_it->hash, queue_it->offset)); |
231 | 699k | queue.erase(queue_it); |
232 | 699k | } |
233 | | |
234 | | void move_to_end(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock); |
235 | | |
236 | | void resize(Iterator queue_it, size_t new_size, std::lock_guard<std::mutex>& cache_lock); |
237 | | |
238 | | std::string to_string(std::lock_guard<std::mutex>& cache_lock) const; |
239 | | |
240 | | bool contains(const UInt128Wrapper& hash, size_t offset, |
241 | | std::lock_guard<std::mutex>& cache_lock) const; |
242 | | |
243 | 738k | Iterator begin() { return queue.begin(); } |
244 | | |
245 | 738k | Iterator end() { return queue.end(); } |
246 | | |
247 | | void remove_all(std::lock_guard<std::mutex>& cache_lock); |
248 | | |
249 | | Iterator get(const UInt128Wrapper& hash, size_t offset, |
250 | | std::lock_guard<std::mutex>& /* cache_lock */) const; |
251 | | |
252 | 14.9k | int64_t get_hot_data_interval() const { return hot_data_interval; } |
253 | | |
254 | 100 | void clear(std::lock_guard<std::mutex>& cache_lock) { |
255 | 100 | queue.clear(); |
256 | 100 | map.clear(); |
257 | 100 | cache_size = 0; |
258 | 100 | } |
259 | | |
260 | | size_t levenshtein_distance_from(LRUQueue& base, std::lock_guard<std::mutex>& cache_lock); |
261 | | |
262 | | size_t max_size; |
263 | | size_t max_element_size; |
264 | | std::list<FileKeyAndOffset> queue; |
265 | | std::unordered_map<std::pair<UInt128Wrapper, size_t>, Iterator, HashFileKeyAndOffset> map; |
266 | | size_t cache_size = 0; |
267 | | int64_t hot_data_interval {0}; |
268 | | }; |
269 | | struct FileCacheInfo { |
270 | | UInt128Wrapper hash {0}; |
271 | | uint64_t expiration_time {0}; |
272 | | uint64_t size {0}; |
273 | | size_t offset {0}; |
274 | | bool is_tmp {false}; |
275 | | FileCacheType cache_type {NORMAL}; |
276 | | |
277 | | std::string to_string() const; |
278 | | }; |
279 | | |
280 | | class InconsistencyType { |
281 | | uint32_t type; |
282 | | |
283 | | public: |
284 | | enum : uint32_t { |
285 | | // No anomaly |
286 | | NONE = 0, |
287 | | // Missing a block cache metadata in _files |
288 | | NOT_LOADED = 1 << 0, |
289 | | // A block cache is missing in storage |
290 | | MISSING_IN_STORAGE = 1 << 1, |
291 | | // Size of a block cache recorded in _files is inconsistent with the storage |
292 | | SIZE_INCONSISTENT = 1 << 2, |
293 | | // Cache type of a block cache recorded in _files is inconsistent with the storage |
294 | | CACHE_TYPE_INCONSISTENT = 1 << 3, |
295 | | // Expiration time of a block cache recorded in _files is inconsistent with the storage |
296 | | EXPIRATION_TIME_INCONSISTENT = 1 << 4, |
297 | | // File in storage has a _tmp suffix, but the state of block cache in _files is not set to downloading |
298 | | TMP_FILE_EXPECT_DOWNLOADING_STATE = 1 << 5 |
299 | | }; |
300 | 0 | InconsistencyType(uint32_t t = 0) : type(t) {} |
301 | 0 | operator uint32_t&() { return type; } |
302 | | |
303 | | std::string to_string() const; |
304 | | }; |
305 | | |
306 | | struct InconsistencyContext { |
307 | | // The infos in _files of BlockFileCache. |
308 | | std::vector<FileCacheInfo> infos_in_manager; |
309 | | std::vector<FileCacheInfo> infos_in_storage; |
310 | | std::vector<InconsistencyType> types; |
311 | | }; |
312 | | |
313 | | std::optional<int64_t> get_tablet_id(std::string file_path); |
314 | | |
315 | | } // namespace doris::io |