be/src/io/cache/file_cache_common.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Cache/FileCache_fwd.h |
19 | | // and modified by Doris |
20 | | |
21 | | #include "io/cache/file_cache_common.h" |
22 | | |
23 | | #include "common/config.h" |
24 | | #include "exec/common/hex.h" |
25 | | #include "io/cache/block_file_cache.h" |
26 | | |
27 | | namespace doris::io { |
28 | | |
29 | 114k | std::string cache_type_to_surfix(FileCacheType type) { |
30 | 114k | switch (type) { |
31 | 24.1k | case FileCacheType::INDEX: |
32 | 24.1k | return "_idx"; |
33 | 3.37k | case FileCacheType::DISPOSABLE: |
34 | 3.37k | return "_disposable"; |
35 | 86.5k | case FileCacheType::NORMAL: |
36 | 86.5k | return ""; |
37 | 0 | case FileCacheType::TTL: |
38 | 0 | return "_ttl"; |
39 | 0 | case FileCacheType::COLD_NORMAL: |
40 | 0 | return "_cold_normal"; |
41 | 114k | } |
42 | 0 | return ""; |
43 | 114k | } |
44 | | |
45 | 3 | FileCacheType surfix_to_cache_type(const std::string& str) { |
46 | 3 | if (str == "idx") { |
47 | 1 | return FileCacheType::INDEX; |
48 | 2 | } else if (str == "disposable") { |
49 | 1 | return FileCacheType::DISPOSABLE; |
50 | 1 | } else if (str == "ttl") { |
51 | 1 | return FileCacheType::TTL; |
52 | 1 | } else if (str == "cold_normal") { |
53 | 0 | return FileCacheType::COLD_NORMAL; |
54 | 0 | } |
55 | 3 | DCHECK(false) << "The string is " << str; |
56 | 0 | return FileCacheType::DISPOSABLE; |
57 | 3 | } |
58 | | |
59 | 9.42k | FileCacheType string_to_cache_type(const std::string& str) { |
60 | 9.42k | if (str == "normal") { |
61 | 1.88k | return FileCacheType::NORMAL; |
62 | 7.54k | } else if (str == "index") { |
63 | 1.88k | return FileCacheType::INDEX; |
64 | 5.65k | } else if (str == "disposable") { |
65 | 1.88k | return FileCacheType::DISPOSABLE; |
66 | 3.77k | } else if (str == "ttl") { |
67 | 1.88k | return FileCacheType::TTL; |
68 | 1.88k | } else if (str == "cold_normal") { |
69 | 1.88k | return FileCacheType::COLD_NORMAL; |
70 | 1.88k | } |
71 | 9.42k | DCHECK(false) << "The string is " << str; |
72 | 0 | return FileCacheType::NORMAL; |
73 | 9.42k | } |
74 | 7.62k | std::string cache_type_to_string(FileCacheType type) { |
75 | 7.62k | switch (type) { |
76 | 3.06k | case FileCacheType::INDEX: |
77 | 3.06k | return "index"; |
78 | 178 | case FileCacheType::DISPOSABLE: |
79 | 178 | return "disposable"; |
80 | 4.10k | case FileCacheType::NORMAL: |
81 | 4.10k | return "normal"; |
82 | 276 | case FileCacheType::TTL: |
83 | 276 | return "ttl"; |
84 | 0 | case FileCacheType::COLD_NORMAL: |
85 | 0 | return "cold_normal"; |
86 | 7.62k | } |
87 | 7.62k | DCHECK(false) << "unknown type: " << type; |
88 | 0 | return "normal"; |
89 | 7.62k | } |
90 | | |
91 | 172 | std::string FileCacheSettings::to_string() const { |
92 | 172 | std::stringstream ss; |
93 | 172 | ss << "capacity: " << capacity << ", max_file_block_size: " << max_file_block_size |
94 | 172 | << ", max_query_cache_size: " << max_query_cache_size |
95 | 172 | << ", disposable_queue_size: " << disposable_queue_size |
96 | 172 | << ", disposable_queue_elements: " << disposable_queue_elements |
97 | 172 | << ", index_queue_size: " << index_queue_size |
98 | 172 | << ", index_queue_elements: " << index_queue_elements |
99 | 172 | << ", ttl_queue_size: " << ttl_queue_size << ", ttl_queue_elements: " << ttl_queue_elements |
100 | 172 | << ", query_queue_size: " << query_queue_size |
101 | 172 | << ", query_queue_elements: " << query_queue_elements |
102 | 172 | << ", cold_query_queue_size: " << cold_query_queue_size |
103 | 172 | << ", cold_query_queue_elements: " << cold_query_queue_elements << ", storage: " << storage; |
104 | 172 | return ss.str(); |
105 | 172 | } |
106 | | |
107 | | FileCacheSettings get_file_cache_settings(size_t capacity, size_t max_query_cache_size, |
108 | | size_t normal_percent, size_t disposable_percent, |
109 | | size_t index_percent, size_t ttl_percent, |
110 | 9 | const std::string& storage) { |
111 | 9 | io::FileCacheSettings settings; |
112 | 9 | if (capacity == 0) { |
113 | 0 | return settings; |
114 | 0 | } |
115 | 9 | settings.capacity = capacity; |
116 | 9 | settings.max_file_block_size = config::file_cache_each_block_size; |
117 | 9 | settings.max_query_cache_size = max_query_cache_size; |
118 | 9 | size_t per_size = settings.capacity / 100; |
119 | 9 | settings.disposable_queue_size = per_size * disposable_percent; |
120 | 9 | settings.disposable_queue_elements = |
121 | 9 | std::max(settings.disposable_queue_size / settings.max_file_block_size, |
122 | 9 | REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); |
123 | | |
124 | 9 | settings.index_queue_size = per_size * index_percent; |
125 | 9 | settings.index_queue_elements = |
126 | 9 | std::max(settings.index_queue_size / settings.max_file_block_size, |
127 | 9 | REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); |
128 | | |
129 | 9 | settings.ttl_queue_size = per_size * ttl_percent; |
130 | 9 | settings.ttl_queue_elements = std::max(settings.ttl_queue_size / settings.max_file_block_size, |
131 | 9 | REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); |
132 | | |
133 | 9 | settings.query_queue_size = settings.capacity - settings.disposable_queue_size - |
134 | 9 | settings.index_queue_size - settings.ttl_queue_size; |
135 | 9 | settings.query_queue_elements = |
136 | 9 | std::max(settings.query_queue_size / settings.max_file_block_size, |
137 | 9 | REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS); |
138 | | |
139 | 9 | if (config::enable_file_cache_normal_queue_2qlru) { |
140 | 0 | size_t normal_queue_per_size = settings.query_queue_size / 100; |
141 | 0 | size_t normal_queue_per_elements = settings.query_queue_elements / 100; |
142 | |
|
143 | 0 | settings.cold_query_queue_size = |
144 | 0 | normal_queue_per_size * config::file_cache_2qlru_cold_blocks_percent; |
145 | 0 | settings.cold_query_queue_elements = |
146 | 0 | normal_queue_per_elements * config::file_cache_2qlru_cold_blocks_percent; |
147 | |
|
148 | 0 | settings.query_queue_size -= settings.cold_query_queue_size; |
149 | 0 | settings.query_queue_elements -= settings.cold_query_queue_elements; |
150 | 0 | } |
151 | | |
152 | 9 | settings.storage = storage; |
153 | 9 | return settings; |
154 | 9 | } |
155 | | |
156 | 566k | std::string UInt128Wrapper::to_string() const { |
157 | 566k | return get_hex_uint_lowercase(value_); |
158 | 566k | } |
159 | | |
160 | | FileBlocksHolderPtr FileCacheAllocatorBuilder::allocate_cache_holder(size_t offset, size_t size, |
161 | 7.17k | int64_t tablet_id) const { |
162 | 7.17k | CacheContext ctx; |
163 | 7.17k | ctx.cache_type = _expiration_time == 0 ? (config::enable_file_cache_normal_queue_2qlru |
164 | 7.17k | ? FileCacheType::COLD_NORMAL |
165 | 7.17k | : FileCacheType::NORMAL) |
166 | 7.17k | : FileCacheType::TTL; |
167 | 7.17k | ctx.expiration_time = _expiration_time; |
168 | 7.17k | ctx.is_cold_data = _is_cold_data; |
169 | 7.17k | ctx.tablet_id = tablet_id; |
170 | 7.17k | ReadStatistics stats; |
171 | 7.17k | ctx.stats = &stats; |
172 | 7.17k | auto holder = _cache->get_or_set(_cache_hash, offset, size, ctx); |
173 | 7.17k | return std::make_unique<FileBlocksHolder>(std::move(holder)); |
174 | 7.17k | } |
175 | | |
176 | | template size_t LRUQueue::get_capacity(std::lock_guard<std::mutex>& cache_lock) const; |
177 | | template void LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex>& cache_lock); |
178 | | |
179 | 0 | std::string FileCacheInfo::to_string() const { |
180 | 0 | std::stringstream ss; |
181 | 0 | ss << "Hash: " << hash.to_string() << "\n" |
182 | 0 | << "Expiration Time: " << expiration_time << "\n" |
183 | 0 | << "Offset: " << offset << "\n" |
184 | 0 | << "Cache Type: " << cache_type_to_string(cache_type) << "\n"; |
185 | 0 | return ss.str(); |
186 | 0 | } |
187 | | |
188 | 0 | std::string InconsistencyType::to_string() const { |
189 | 0 | std::string result = "Inconsistency Reason: "; |
190 | 0 | if (type == NONE) { |
191 | 0 | result += "NONE"; |
192 | 0 | } else { |
193 | 0 | if (type & NOT_LOADED) { |
194 | 0 | result += "NOT_LOADED "; |
195 | 0 | } |
196 | 0 | if (type & MISSING_IN_STORAGE) { |
197 | 0 | result += "MISSING_IN_STORAGE "; |
198 | 0 | } |
199 | 0 | if (type & SIZE_INCONSISTENT) { |
200 | 0 | result += "SIZE_INCONSISTENT "; |
201 | 0 | } |
202 | 0 | if (type & CACHE_TYPE_INCONSISTENT) { |
203 | 0 | result += "CACHE_TYPE_INCONSISTENT "; |
204 | 0 | } |
205 | 0 | if (type & EXPIRATION_TIME_INCONSISTENT) { |
206 | 0 | result += "EXPIRATION_TIME_INCONSISTENT "; |
207 | 0 | } |
208 | 0 | if (type & TMP_FILE_EXPECT_DOWNLOADING_STATE) { |
209 | 0 | result += "TMP_FILE_EXPECT_DOWNLOADING_STATE"; |
210 | 0 | } |
211 | 0 | } |
212 | 0 | result += "\n"; |
213 | 0 | return result; |
214 | 0 | } |
215 | | |
216 | 4.27M | std::optional<int64_t> get_tablet_id(std::string file_path) { |
217 | | // Expected path formats: |
218 | | // support both .dat and .idx file extensions |
219 | | // support formate see ut. storage_resource_test:StorageResourceTest.ParseTabletIdFromPath |
220 | | |
221 | 4.27M | if (file_path.empty()) { |
222 | 0 | return std::nullopt; |
223 | 0 | } |
224 | | |
225 | | // Find the position of "data/" in the path |
226 | 4.27M | std::string_view path_view = file_path; |
227 | 4.27M | std::string_view data_prefix = DATA_PREFIX; |
228 | 4.27M | size_t data_pos = path_view.find(data_prefix); |
229 | 4.27M | if (data_pos == std::string_view::npos) { |
230 | 1.28k | return std::nullopt; |
231 | 1.28k | } |
232 | | |
233 | 4.27M | if (data_prefix.length() + data_pos >= path_view.length()) { |
234 | 1 | return std::nullopt; |
235 | 1 | } |
236 | | |
237 | | // Extract the part after "data/" |
238 | 4.27M | path_view = path_view.substr(data_pos + data_prefix.length() + 1); |
239 | | |
240 | | // Check if path ends with .dat or .idx |
241 | 4.27M | if (!path_view.ends_with(".dat") && !path_view.ends_with(".idx")) { |
242 | 14.8k | return std::nullopt; |
243 | 14.8k | } |
244 | | |
245 | | // Count slashes in the remaining path |
246 | 4.25M | size_t slash_count = 0; |
247 | 289M | for (char c : path_view) { |
248 | 289M | if (c == '/') { |
249 | 4.25M | slash_count++; |
250 | 4.25M | } |
251 | 289M | } |
252 | | |
253 | | // Split path by '/' |
254 | 4.25M | std::vector<std::string_view> parts; |
255 | 4.25M | size_t start = 0; |
256 | 4.25M | size_t pos = 0; |
257 | 8.51M | while ((pos = path_view.find('/', start)) != std::string_view::npos) { |
258 | 4.25M | if (pos > start) { |
259 | 4.25M | parts.push_back(path_view.substr(start, pos - start)); |
260 | 4.25M | } |
261 | 4.25M | start = pos + 1; |
262 | 4.25M | } |
263 | 4.25M | if (start < path_view.length()) { |
264 | 4.25M | parts.push_back(path_view.substr(start)); |
265 | 4.25M | } |
266 | | |
267 | 4.25M | if (parts.empty()) { |
268 | 0 | return std::nullopt; |
269 | 0 | } |
270 | | |
271 | | // Determine path version based on slash count and extract tablet_id |
272 | | // Version 0: {tablet_id}/{rowset_id}_{seg_id}.dat (1 slash) |
273 | | // Version 1: {shard}/{tablet_id}/{rowset_id}/{seg_id}.dat (3 slashes) |
274 | | |
275 | 4.25M | if (slash_count == 1) { |
276 | | // Version 0 format: parts[0] should be tablet_id |
277 | 4.25M | if (parts.size() >= 1) { |
278 | 4.25M | try { |
279 | 4.25M | int64_t tablet_id = std::stoll(std::string(parts[0])); |
280 | 4.25M | return tablet_id; |
281 | 4.25M | } catch (const std::exception&) { |
282 | | // Not a valid number, return nullopt at last |
283 | 0 | } |
284 | 4.25M | } |
285 | 18.4E | } else if (slash_count == 3) { |
286 | | // Version 1 format: parts[1] should be tablet_id (parts[0] is shard) |
287 | 0 | if (parts.size() >= 2) { |
288 | 0 | try { |
289 | 0 | int64_t tablet_id = std::stoll(std::string(parts[1])); |
290 | 0 | return tablet_id; |
291 | 0 | } catch (const std::exception&) { |
292 | | // Not a valid number, return nullopt at last |
293 | 0 | } |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 18.4E | return std::nullopt; |
298 | 4.25M | } |
299 | | |
300 | | } // namespace doris::io |