be/src/cloud/cloud_internal_service.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "cloud/cloud_internal_service.h" |
19 | | |
20 | | #include <bthread/countdown_event.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <chrono> |
24 | | #include <limits> |
25 | | #include <list> |
26 | | #include <memory> |
27 | | #include <optional> |
28 | | #include <thread> |
29 | | #include <unordered_map> |
30 | | |
31 | | #include "cloud/cloud_storage_engine.h" |
32 | | #include "cloud/cloud_tablet.h" |
33 | | #include "cloud/cloud_tablet_mgr.h" |
34 | | #include "cloud/cloud_warm_up_manager.h" |
35 | | #include "cloud/cloud_warmup_metrics.h" |
36 | | #include "cloud/config.h" |
37 | | #include "io/cache/block_file_cache.h" |
38 | | #include "io/cache/block_file_cache_downloader.h" |
39 | | #include "io/cache/block_file_cache_factory.h" |
40 | | #include "runtime/thread_context.h" |
41 | | #include "runtime/workload_management/io_throttle.h" |
42 | | #include "util/async_io.h" |
43 | | #include "util/bvar_windowed_adder.h" |
44 | | #include "util/debug_points.h" |
45 | | |
46 | | namespace doris { |
47 | | #include "common/compile_check_avoid_begin.h" |
48 | | |
49 | | bvar::Adder<uint64_t> g_file_cache_get_by_peer_num("file_cache_get_by_peer_num"); |
50 | | bvar::Adder<uint64_t> g_file_cache_get_by_peer_blocks_num("file_cache_get_by_peer_blocks_num"); |
51 | | bvar::Adder<uint64_t> g_file_cache_get_by_peer_success_num("file_cache_get_by_peer_success_num"); |
52 | | bvar::Adder<uint64_t> g_file_cache_get_by_peer_failed_num("file_cache_get_by_peer_failed_num"); |
53 | | bvar::LatencyRecorder g_file_cache_get_by_peer_server_latency( |
54 | | "file_cache_get_by_peer_server_latency"); |
55 | | bvar::LatencyRecorder g_file_cache_get_by_peer_read_cache_file_latency( |
56 | | "file_cache_get_by_peer_read_cache_file_latency"); |
57 | | bvar::LatencyRecorder g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency( |
58 | | "cloud_internal_service_get_file_cache_meta_by_tablet_id_latency"); |
59 | | |
60 | | CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine, ExecEnv* exec_env) |
61 | 0 | : PInternalService(exec_env), _engine(engine) {} |
62 | | |
63 | 0 | CloudInternalServiceImpl::~CloudInternalServiceImpl() = default; |
64 | | |
65 | | void CloudInternalServiceImpl::alter_vault_sync(google::protobuf::RpcController* controller, |
66 | | const doris::PAlterVaultSyncRequest* request, |
67 | | PAlterVaultSyncResponse* response, |
68 | 0 | google::protobuf::Closure* done) { |
69 | 0 | LOG(INFO) << "alter be to sync vault info from Meta Service"; |
70 | | // If the vaults containing hdfs vault then it would try to create hdfs connection using jni |
71 | | // which would acuiqre one thread local jniEnv. But bthread context can't guarantee that the brpc |
72 | | // worker thread wouldn't do bthread switch between worker threads. |
73 | 0 | bool ret = _heavy_work_pool.try_offer([this, done]() { |
74 | 0 | brpc::ClosureGuard closure_guard(done); |
75 | 0 | _engine.sync_storage_vault(); |
76 | 0 | }); |
77 | 0 | if (!ret) { |
78 | 0 | brpc::ClosureGuard closure_guard(done); |
79 | 0 | LOG(WARNING) << "fail to offer alter_vault_sync request to the work pool, pool=" |
80 | 0 | << _heavy_work_pool.get_info(); |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | 0 | FileCacheType cache_type_to_pb(io::FileCacheType type) { |
85 | 0 | switch (type) { |
86 | 0 | case io::FileCacheType::TTL: |
87 | 0 | return FileCacheType::TTL; |
88 | 0 | case io::FileCacheType::INDEX: |
89 | 0 | return FileCacheType::INDEX; |
90 | 0 | case io::FileCacheType::NORMAL: |
91 | 0 | return FileCacheType::NORMAL; |
92 | 0 | default: |
93 | 0 | DCHECK(false); |
94 | 0 | } |
95 | 0 | return FileCacheType::NORMAL; |
96 | 0 | } |
97 | | |
98 | 0 | static int64_t current_unix_time_us() { |
99 | 0 | return std::chrono::duration_cast<std::chrono::microseconds>( |
100 | 0 | std::chrono::system_clock::now().time_since_epoch()) |
101 | 0 | .count(); |
102 | 0 | } |
103 | | |
104 | | static std::optional<int64_t> warm_up_rowset_cross_host_latency_us(int64_t start_unix_ts_us, |
105 | 0 | int64_t end_unix_ts_us) { |
106 | | // The start timestamp is generated by the caller BE. Mixed-version callers may omit it, and |
107 | | // system clocks across BEs are not guaranteed to be ordered. |
108 | 0 | if (start_unix_ts_us <= 0 || end_unix_ts_us < start_unix_ts_us) { |
109 | 0 | return std::nullopt; |
110 | 0 | } |
111 | 0 | return end_unix_ts_us - start_unix_ts_us; |
112 | 0 | } |
113 | | |
114 | | static void add_file_cache_block_meta_to_response( |
115 | | PGetFileCacheMetaResponse* resp, int64_t tablet_id, const std::string& rowset_id, |
116 | | int32_t segment_id, const std::string& file_name, |
117 | | const std::tuple<int64_t, int64_t, io::FileCacheType, int64_t>& tuple, |
118 | 0 | const RowsetSharedPtr& rowset, bool is_index) { |
119 | 0 | FileCacheBlockMeta* meta = resp->add_file_cache_block_metas(); |
120 | 0 | meta->set_tablet_id(tablet_id); |
121 | 0 | meta->set_rowset_id(rowset_id); |
122 | 0 | meta->set_segment_id(segment_id); |
123 | 0 | meta->set_file_name(file_name); |
124 | |
|
125 | 0 | if (!is_index) { |
126 | | // .dat |
127 | 0 | meta->set_file_size(rowset->rowset_meta()->segment_file_size(segment_id)); |
128 | 0 | meta->set_file_type(doris::FileType::SEGMENT_FILE); |
129 | 0 | } else { |
130 | | // .idx |
131 | 0 | const auto& idx_file_info = rowset->rowset_meta()->inverted_index_file_info(segment_id); |
132 | 0 | meta->set_file_size(idx_file_info.has_index_size() ? idx_file_info.index_size() : -1); |
133 | 0 | meta->set_file_type(doris::FileType::INVERTED_INDEX_FILE); |
134 | 0 | } |
135 | |
|
136 | 0 | meta->set_offset(std::get<0>(tuple)); |
137 | 0 | meta->set_size(std::get<1>(tuple)); |
138 | 0 | meta->set_cache_type(cache_type_to_pb(std::get<2>(tuple))); |
139 | 0 | meta->set_expiration_time(std::get<3>(tuple)); |
140 | 0 | } |
141 | | |
142 | | static void process_segment_file_cache_meta(PGetFileCacheMetaResponse* resp, |
143 | | const RowsetSharedPtr& rowset, int64_t tablet_id, |
144 | | const std::string& rowset_id, int32_t segment_id, |
145 | 0 | bool is_index) { |
146 | 0 | const char* extension = is_index ? ".idx" : ".dat"; |
147 | 0 | std::string file_name = fmt::format("{}_{}{}", rowset_id, segment_id, extension); |
148 | 0 | auto cache_key = io::BlockFileCache::hash(file_name); |
149 | 0 | auto* cache = io::FileCacheFactory::instance()->get_by_path(cache_key); |
150 | 0 | if (!cache) return; |
151 | 0 | auto segments_meta = cache->get_hot_blocks_meta(cache_key); |
152 | 0 | for (const auto& tuple : segments_meta) { |
153 | 0 | add_file_cache_block_meta_to_response(resp, tablet_id, rowset_id, segment_id, file_name, |
154 | 0 | tuple, rowset, is_index); |
155 | 0 | } |
156 | 0 | } |
157 | | |
158 | | void CloudInternalServiceImpl::get_file_cache_meta_by_tablet_id( |
159 | | google::protobuf::RpcController* controller [[maybe_unused]], |
160 | | const PGetFileCacheMetaRequest* request, PGetFileCacheMetaResponse* response, |
161 | 0 | google::protobuf::Closure* done) { |
162 | 0 | brpc::ClosureGuard closure_guard(done); |
163 | 0 | if (!config::enable_file_cache) { |
164 | 0 | LOG_WARNING("try to access tablet file cache meta, but file cache not enabled"); |
165 | 0 | return; |
166 | 0 | } |
167 | 0 | auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
168 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
169 | 0 | .count(); |
170 | 0 | std::ostringstream tablet_ids_stream; |
171 | 0 | int count = 0; |
172 | 0 | for (const auto& tablet_id : request->tablet_ids()) { |
173 | 0 | tablet_ids_stream << tablet_id << ", "; |
174 | 0 | count++; |
175 | 0 | if (count >= 10) { |
176 | 0 | break; |
177 | 0 | } |
178 | 0 | } |
179 | 0 | LOG(INFO) << "warm up get meta from this be, tablets num=" << request->tablet_ids().size() |
180 | 0 | << ", first 10 tablet_ids=[ " << tablet_ids_stream.str() << " ]"; |
181 | 0 | for (const auto& tablet_id : request->tablet_ids()) { |
182 | 0 | auto res = _engine.tablet_mgr().get_tablet(tablet_id); |
183 | 0 | if (!res.has_value()) { |
184 | 0 | LOG(ERROR) << "failed to get tablet: " << tablet_id |
185 | 0 | << " err msg: " << res.error().msg(); |
186 | 0 | continue; |
187 | 0 | } |
188 | 0 | CloudTabletSPtr tablet = std::move(res.value()); |
189 | 0 | auto st = tablet->sync_rowsets(); |
190 | 0 | if (!st) { |
191 | | // just log failed, try it best |
192 | 0 | LOG(WARNING) << "failed to sync rowsets: " << tablet_id |
193 | 0 | << " err msg: " << st.to_string(); |
194 | 0 | } |
195 | 0 | auto rowsets = tablet->get_snapshot_rowset(); |
196 | |
|
197 | 0 | for (const RowsetSharedPtr& rowset : rowsets) { |
198 | 0 | std::string rowset_id = rowset->rowset_id().to_string(); |
199 | 0 | for (int32_t segment_id = 0; segment_id < rowset->num_segments(); ++segment_id) { |
200 | 0 | process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id, |
201 | 0 | false); |
202 | 0 | process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id, |
203 | 0 | true); |
204 | 0 | } |
205 | 0 | } |
206 | 0 | } |
207 | 0 | auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
208 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
209 | 0 | .count(); |
210 | 0 | g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency << (end_ts - begin_ts); |
211 | 0 | LOG(INFO) << "get file cache meta by tablet ids = [ " << tablet_ids_stream.str() << " ] took " |
212 | 0 | << end_ts - begin_ts << " us"; |
213 | 0 | VLOG_DEBUG << "get file cache meta by tablet id request=" << request->DebugString() |
214 | 0 | << ", response=" << response->DebugString(); |
215 | 0 | } |
216 | | |
217 | | namespace { |
218 | | // Helper functions for fetch_peer_data |
219 | | |
220 | 0 | Status handle_peer_file_range_request(const std::string& path, PFetchPeerDataResponse* response) { |
221 | | // Read specific range [file_offset, file_offset+file_size) across cached blocks |
222 | 0 | auto datas = io::FileCacheFactory::instance()->get_cache_data_by_path(path); |
223 | 0 | for (auto& cb : datas) { |
224 | 0 | *(response->add_datas()) = std::move(cb); |
225 | 0 | } |
226 | 0 | return Status::OK(); |
227 | 0 | } |
228 | | |
229 | 0 | void set_error_response(PFetchPeerDataResponse* response, const std::string& error_msg) { |
230 | 0 | response->mutable_status()->add_error_msgs(error_msg); |
231 | 0 | response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR); |
232 | 0 | } |
233 | | |
234 | | Status read_file_block(const std::shared_ptr<io::FileBlock>& file_block, size_t file_size, |
235 | 0 | doris::CacheBlockPB* output) { |
236 | 0 | std::string data; |
237 | | // ATTN: calculate the rightmost boundary value of the block, due to inaccurate current block meta information. |
238 | | // see CachedRemoteFileReader::read_at_impl for more details. |
239 | | // Ensure file_size >= file_block->offset() to avoid underflow |
240 | 0 | if (file_size < file_block->offset()) { |
241 | 0 | LOG(WARNING) << "file_size (" << file_size << ") < file_block->offset(" |
242 | 0 | << file_block->offset() << ")"; |
243 | 0 | return Status::InternalError<false>("file_size less than block offset"); |
244 | 0 | } |
245 | 0 | size_t read_size = std::min(static_cast<size_t>(file_size - file_block->offset()), |
246 | 0 | file_block->range().size()); |
247 | 0 | data.resize(read_size); |
248 | |
|
249 | 0 | auto begin_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
250 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
251 | 0 | .count(); |
252 | |
|
253 | 0 | SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->s3_file_buffer_tracker()); |
254 | 0 | Slice slice(data.data(), data.size()); |
255 | 0 | Status read_st = file_block->read(slice, /*read_offset=*/0); |
256 | |
|
257 | 0 | auto end_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
258 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
259 | 0 | .count(); |
260 | 0 | g_file_cache_get_by_peer_read_cache_file_latency << (end_read_file_ts - begin_read_file_ts); |
261 | |
|
262 | 0 | if (read_st.ok()) { |
263 | 0 | output->set_block_offset(static_cast<int64_t>(file_block->offset())); |
264 | 0 | output->set_block_size(static_cast<int64_t>(read_size)); |
265 | 0 | output->set_data(std::move(data)); |
266 | 0 | return Status::OK(); |
267 | 0 | } else { |
268 | 0 | g_file_cache_get_by_peer_failed_num << 1; |
269 | 0 | LOG(WARNING) << "read cache block failed: " << read_st; |
270 | 0 | return read_st; |
271 | 0 | } |
272 | 0 | } |
273 | | |
274 | | Status handle_peer_file_cache_block_request(const PFetchPeerDataRequest* request, |
275 | 0 | PFetchPeerDataResponse* response) { |
276 | 0 | const auto& path = request->path(); |
277 | 0 | auto hash = io::BlockFileCache::hash(path); |
278 | 0 | auto* cache = io::FileCacheFactory::instance()->get_by_path(hash); |
279 | 0 | if (cache == nullptr) { |
280 | 0 | g_file_cache_get_by_peer_failed_num << 1; |
281 | 0 | set_error_response(response, "can't get file cache instance"); |
282 | 0 | return Status::InternalError<false>("can't get file cache instance"); |
283 | 0 | } |
284 | | |
285 | 0 | io::CacheContext ctx {}; |
286 | 0 | io::ReadStatistics local_stats; |
287 | 0 | ctx.stats = &local_stats; |
288 | |
|
289 | 0 | for (const auto& cb_req : request->cache_req()) { |
290 | 0 | size_t offset = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_offset())); |
291 | 0 | size_t size = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_size())); |
292 | 0 | auto holder = cache->get_or_set(hash, offset, size, ctx); |
293 | |
|
294 | 0 | for (auto& fb : holder.file_blocks) { |
295 | 0 | if (fb->state() != io::FileBlock::State::DOWNLOADED) { |
296 | 0 | g_file_cache_get_by_peer_failed_num << 1; |
297 | 0 | LOG(WARNING) << "read cache block failed, state=" << fb->state(); |
298 | 0 | set_error_response(response, "read cache file error"); |
299 | 0 | return Status::InternalError<false>("cache block not downloaded"); |
300 | 0 | } |
301 | | |
302 | 0 | g_file_cache_get_by_peer_blocks_num << 1; |
303 | 0 | doris::CacheBlockPB* out = response->add_datas(); |
304 | 0 | Status read_status = read_file_block(fb, request->file_size(), out); |
305 | 0 | if (!read_status.ok()) { |
306 | 0 | set_error_response(response, "read cache file error"); |
307 | 0 | return read_status; |
308 | 0 | } |
309 | 0 | } |
310 | 0 | } |
311 | | |
312 | 0 | return Status::OK(); |
313 | 0 | } |
314 | | } // namespace |
315 | | |
316 | | void CloudInternalServiceImpl::fetch_peer_data(google::protobuf::RpcController* controller |
317 | | [[maybe_unused]], |
318 | | const PFetchPeerDataRequest* request, |
319 | | PFetchPeerDataResponse* response, |
320 | 0 | google::protobuf::Closure* done) { |
321 | 0 | bool ret = _heavy_work_pool.try_offer([request, response, done]() { |
322 | 0 | brpc::ClosureGuard closure_guard(done); |
323 | 0 | g_file_cache_get_by_peer_num << 1; |
324 | |
|
325 | 0 | if (!config::enable_file_cache) { |
326 | 0 | LOG_WARNING("try to access file cache data, but file cache not enabled"); |
327 | 0 | return; |
328 | 0 | } |
329 | | |
330 | 0 | auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
331 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
332 | 0 | .count(); |
333 | |
|
334 | 0 | const auto type = request->type(); |
335 | 0 | const auto& path = request->path(); |
336 | 0 | response->mutable_status()->set_status_code(TStatusCode::OK); |
337 | |
|
338 | 0 | Status status = Status::OK(); |
339 | 0 | if (type == PFetchPeerDataRequest_Type_PEER_FILE_RANGE) { |
340 | 0 | status = handle_peer_file_range_request(path, response); |
341 | 0 | } else if (type == PFetchPeerDataRequest_Type_PEER_FILE_CACHE_BLOCK) { |
342 | 0 | status = handle_peer_file_cache_block_request(request, response); |
343 | 0 | } |
344 | |
|
345 | 0 | if (!status.ok()) { |
346 | 0 | LOG(WARNING) << "fetch peer data failed: " << status.to_string(); |
347 | 0 | set_error_response(response, status.to_string()); |
348 | 0 | } |
349 | |
|
350 | 0 | DBUG_EXECUTE_IF("CloudInternalServiceImpl::fetch_peer_data_slower", { |
351 | 0 | int st_us = dp->param<int>("sleep", 1000); |
352 | 0 | LOG_WARNING("CloudInternalServiceImpl::fetch_peer_data_slower").tag("sleep", st_us); |
353 | 0 | bthread_usleep(st_us); |
354 | 0 | }); |
355 | |
|
356 | 0 | auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>( |
357 | 0 | std::chrono::steady_clock::now().time_since_epoch()) |
358 | 0 | .count(); |
359 | 0 | g_file_cache_get_by_peer_server_latency << (end_ts - begin_ts); |
360 | 0 | g_file_cache_get_by_peer_success_num << 1; |
361 | |
|
362 | 0 | VLOG_DEBUG << "fetch cache request=" << request->DebugString() |
363 | 0 | << ", response=" << response->DebugString(); |
364 | 0 | }); |
365 | |
|
366 | 0 | if (!ret) { |
367 | 0 | brpc::ClosureGuard closure_guard(done); |
368 | 0 | LOG(WARNING) << "fail to offer fetch peer data request to the work pool, pool=" |
369 | 0 | << _heavy_work_pool.get_info(); |
370 | 0 | } |
371 | 0 | } |
372 | | |
373 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_num( |
374 | | "file_cache_event_driven_warm_up_submitted_segment_num"); |
375 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_num( |
376 | | "file_cache_event_driven_warm_up_finished_segment_num"); |
377 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_num( |
378 | | "file_cache_event_driven_warm_up_failed_segment_num"); |
379 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_size( |
380 | | "file_cache_event_driven_warm_up_submitted_segment_size"); |
381 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_size( |
382 | | "file_cache_event_driven_warm_up_finished_segment_size"); |
383 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_size( |
384 | | "file_cache_event_driven_warm_up_failed_segment_size"); |
385 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_num( |
386 | | "file_cache_event_driven_warm_up_submitted_index_num"); |
387 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_num( |
388 | | "file_cache_event_driven_warm_up_finished_index_num"); |
389 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_num( |
390 | | "file_cache_event_driven_warm_up_failed_index_num"); |
391 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_size( |
392 | | "file_cache_event_driven_warm_up_submitted_index_size"); |
393 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_size( |
394 | | "file_cache_event_driven_warm_up_finished_index_size"); |
395 | | bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_size( |
396 | | "file_cache_event_driven_warm_up_failed_index_size"); |
397 | | bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_handle_unix_ts( |
398 | | "file_cache_warm_up_rowset_last_handle_unix_ts", 0); |
399 | | bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_finish_unix_ts( |
400 | | "file_cache_warm_up_rowset_last_finish_unix_ts", 0); |
401 | | bvar::LatencyRecorder g_file_cache_warm_up_rowset_latency("file_cache_warm_up_rowset_latency"); |
402 | | bvar::LatencyRecorder g_file_cache_warm_up_rowset_request_to_handle_latency( |
403 | | "file_cache_warm_up_rowset_request_to_handle_latency"); |
404 | | bvar::LatencyRecorder g_file_cache_warm_up_rowset_handle_to_finish_latency( |
405 | | "file_cache_warm_up_rowset_handle_to_finish_latency"); |
406 | | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_slow_count( |
407 | | "file_cache_warm_up_rowset_slow_count"); |
408 | | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_request_to_handle_slow_count( |
409 | | "file_cache_warm_up_rowset_request_to_handle_slow_count"); |
410 | | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_handle_to_finish_slow_count( |
411 | | "file_cache_warm_up_rowset_handle_to_finish_slow_count"); |
412 | | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_num( |
413 | | "file_cache_warm_up_rowset_wait_for_compaction_num"); |
414 | | bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num( |
415 | | "file_cache_warm_up_rowset_wait_for_compaction_timeout_num"); |
416 | | |
417 | | // Per-job windowed metrics for target BE |
418 | | // bvar::Window enforces MAX_SECONDS_LIMIT = 3600, so the longest window is 1h. |
419 | | static constexpr int WINDOW_5M = 300; |
420 | | static constexpr int WINDOW_30M = 1800; |
421 | | static constexpr int WINDOW_1H = 3600; |
422 | | |
423 | | MBvarWindowedAdder g_warmup_ed_finish_segment_num("warmup_ed_finish_segment_num", {"job_id"}, |
424 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
425 | | MBvarWindowedAdder g_warmup_ed_finish_segment_size("warmup_ed_finish_segment_size", {"job_id"}, |
426 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
427 | | MBvarWindowedAdder g_warmup_ed_finish_index_num("warmup_ed_finish_index_num", {"job_id"}, |
428 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
429 | | MBvarWindowedAdder g_warmup_ed_finish_index_size("warmup_ed_finish_index_size", {"job_id"}, |
430 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
431 | | MBvarWindowedAdder g_warmup_ed_fail_segment_num("warmup_ed_fail_segment_num", {"job_id"}, |
432 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
433 | | MBvarWindowedAdder g_warmup_ed_fail_segment_size("warmup_ed_fail_segment_size", {"job_id"}, |
434 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
435 | | MBvarWindowedAdder g_warmup_ed_fail_index_num("warmup_ed_fail_index_num", {"job_id"}, |
436 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
437 | | MBvarWindowedAdder g_warmup_ed_fail_index_size("warmup_ed_fail_index_size", {"job_id"}, |
438 | | {WINDOW_5M, WINDOW_30M, WINDOW_1H}, false); |
439 | | bvar::MultiDimension<bvar::Status<int64_t>> g_warmup_ed_last_finish_ts({"job_id"}); |
440 | | |
441 | 0 | void update_warmup_ed_last_finish_ts(const std::string& job_id_str) { |
442 | 0 | auto* finish_ts = g_warmup_ed_last_finish_ts.get_stats(std::list<std::string> {job_id_str}); |
443 | 0 | if (finish_ts) { |
444 | 0 | finish_ts->set_value(std::chrono::duration_cast<std::chrono::milliseconds>( |
445 | 0 | std::chrono::system_clock::now().time_since_epoch()) |
446 | 0 | .count()); |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | 0 | void record_warmup_ed_finish_segment(const std::string& job_id_str, int64_t segment_size) { |
451 | 0 | g_warmup_ed_finish_segment_num.put({job_id_str}, 1); |
452 | 0 | g_warmup_ed_finish_segment_size.put({job_id_str}, segment_size); |
453 | 0 | update_warmup_ed_last_finish_ts(job_id_str); |
454 | 0 | } |
455 | | |
456 | 0 | void record_warmup_ed_finish_index(const std::string& job_id_str, int64_t idx_size) { |
457 | 0 | g_warmup_ed_finish_index_num.put({job_id_str}, 1); |
458 | 0 | g_warmup_ed_finish_index_size.put({job_id_str}, idx_size); |
459 | 0 | update_warmup_ed_last_finish_ts(job_id_str); |
460 | 0 | } |
461 | | |
462 | 0 | void record_warmup_ed_fail_segment(const std::string& job_id_str, int64_t segment_size) { |
463 | 0 | g_warmup_ed_fail_segment_num.put({job_id_str}, 1); |
464 | 0 | g_warmup_ed_fail_segment_size.put({job_id_str}, segment_size); |
465 | 0 | } |
466 | | |
467 | 0 | void record_warmup_ed_fail_index(const std::string& job_id_str, int64_t idx_size) { |
468 | 0 | g_warmup_ed_fail_index_num.put({job_id_str}, 1); |
469 | 0 | g_warmup_ed_fail_index_size.put({job_id_str}, idx_size); |
470 | 0 | } |
471 | | |
472 | | void record_warmup_ed_skipped_rowset_as_finished(RowsetMeta& rs_meta, |
473 | 0 | const std::string& job_id_str) { |
474 | 0 | auto schema_ptr = rs_meta.tablet_schema(); |
475 | 0 | bool has_inverted_index = schema_ptr->has_inverted_index() || schema_ptr->has_ann_index(); |
476 | 0 | auto idx_version = schema_ptr->get_inverted_index_storage_format(); |
477 | 0 | for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); segment_id++) { |
478 | 0 | record_warmup_ed_finish_segment(job_id_str, rs_meta.segment_file_size(segment_id)); |
479 | |
|
480 | 0 | if (!has_inverted_index) { |
481 | 0 | continue; |
482 | 0 | } |
483 | 0 | auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id); |
484 | 0 | if (idx_version == InvertedIndexStorageFormatPB::V1) { |
485 | 0 | std::unordered_map<int64_t, int64_t> index_size_map; |
486 | 0 | for (const auto& info : inverted_index_info.index_info()) { |
487 | 0 | if (info.index_file_size() != -1) { |
488 | 0 | index_size_map[info.index_id()] = info.index_file_size(); |
489 | 0 | } else { |
490 | 0 | VLOG_DEBUG << "Invalid index_file_size for segment_id " << segment_id |
491 | 0 | << ", index_id " << info.index_id(); |
492 | 0 | } |
493 | 0 | } |
494 | 0 | for (const auto& index : schema_ptr->inverted_indexes()) { |
495 | 0 | record_warmup_ed_finish_index(job_id_str, index_size_map[index->index_id()]); |
496 | 0 | } |
497 | 0 | } else { // InvertedIndexStorageFormatPB::V2 |
498 | 0 | int64_t idx_size = 0; |
499 | 0 | if (inverted_index_info.has_index_size()) { |
500 | 0 | idx_size = inverted_index_info.index_size(); |
501 | 0 | } else { |
502 | 0 | VLOG_DEBUG << "index_size is not set for segment " << segment_id; |
503 | 0 | } |
504 | 0 | record_warmup_ed_finish_index(job_id_str, idx_size); |
505 | 0 | } |
506 | 0 | } |
507 | 0 | } |
508 | | |
509 | | void handle_segment_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id, |
510 | | int64_t segment_id, std::shared_ptr<CloudTablet> tablet, |
511 | | std::shared_ptr<bthread::CountdownEvent> wait, Version version, |
512 | | int64_t segment_size, int64_t request_ts, int64_t handle_ts, |
513 | 0 | std::string job_id_str, int64_t upstream_trigger_ts_ms) { |
514 | 0 | DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_segment", { |
515 | 0 | auto sleep_time = dp->param<int>("sleep", 3); |
516 | 0 | LOG_INFO("[verbose] block download for rowset={}, version={}, sleep={}", |
517 | 0 | rowset_id.to_string(), version.to_string(), sleep_time); |
518 | 0 | std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); |
519 | 0 | }); |
520 | 0 | DBUG_EXECUTE_IF( |
521 | 0 | "CloudInternalServiceImpl::warm_up_rowset.download_segment.inject_" |
522 | 0 | "error", |
523 | 0 | { |
524 | 0 | st = Status::InternalError("injected error"); |
525 | 0 | LOG_INFO("[verbose] inject error, tablet={}, rowset={}, st={}", tablet_id, |
526 | 0 | rowset_id.to_string(), st.to_string()); |
527 | 0 | }); |
528 | 0 | if (st.ok()) { |
529 | 0 | g_file_cache_event_driven_warm_up_finished_segment_num << 1; |
530 | 0 | g_file_cache_event_driven_warm_up_finished_segment_size << segment_size; |
531 | 0 | record_warmup_ed_finish_segment(job_id_str, segment_size); |
532 | 0 | int64_t now_ts = current_unix_time_us(); |
533 | 0 | g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts); |
534 | 0 | auto rowset_latency_us = warm_up_rowset_cross_host_latency_us(request_ts, now_ts); |
535 | 0 | if (rowset_latency_us.has_value()) { |
536 | 0 | g_file_cache_warm_up_rowset_latency << *rowset_latency_us; |
537 | 0 | } |
538 | 0 | g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts); |
539 | 0 | if (rowset_latency_us.has_value() && |
540 | 0 | *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) { |
541 | 0 | g_file_cache_warm_up_rowset_slow_count << 1; |
542 | 0 | LOG(INFO) << "warm up rowset took " << *rowset_latency_us |
543 | 0 | << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string() |
544 | 0 | << ", segment_id: " << segment_id; |
545 | 0 | } |
546 | 0 | if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) { |
547 | 0 | g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1; |
548 | 0 | LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts |
549 | 0 | << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string() |
550 | 0 | << ", segment_id: " << segment_id; |
551 | 0 | } |
552 | 0 | } else { |
553 | 0 | g_file_cache_event_driven_warm_up_failed_segment_num << 1; |
554 | 0 | g_file_cache_event_driven_warm_up_failed_segment_size << segment_size; |
555 | 0 | record_warmup_ed_fail_segment(job_id_str, segment_size); |
556 | 0 | LOG(WARNING) << "download segment failed, tablet_id: " << tablet_id |
557 | 0 | << " rowset_id: " << rowset_id.to_string() << ", error: " << st; |
558 | 0 | } |
559 | 0 | if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 1, |
560 | 0 | 0) |
561 | 0 | .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) { |
562 | 0 | VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string() |
563 | 0 | << ") completed"; |
564 | 0 | } |
565 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_done(job_id_str, upstream_trigger_ts_ms); |
566 | 0 | if (wait) { |
567 | 0 | wait->signal(); |
568 | 0 | } |
569 | 0 | } |
570 | | |
571 | | void handle_inverted_index_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id, |
572 | | int64_t segment_id, std::string index_path, |
573 | | std::shared_ptr<CloudTablet> tablet, |
574 | | std::shared_ptr<bthread::CountdownEvent> wait, |
575 | | Version version, uint64_t idx_size, int64_t request_ts, |
576 | | int64_t handle_ts, std::string job_id_str, |
577 | 0 | int64_t upstream_trigger_ts_ms) { |
578 | 0 | DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_inverted_idx", { |
579 | 0 | auto sleep_time = dp->param<int>("sleep", 3); |
580 | 0 | LOG_INFO( |
581 | 0 | "[verbose] block download for rowset={}, inverted index " |
582 | 0 | "file={}, sleep={}", |
583 | 0 | rowset_id.to_string(), index_path, sleep_time); |
584 | 0 | std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); |
585 | 0 | }); |
586 | 0 | if (st.ok()) { |
587 | 0 | g_file_cache_event_driven_warm_up_finished_index_num << 1; |
588 | 0 | g_file_cache_event_driven_warm_up_finished_index_size << idx_size; |
589 | 0 | record_warmup_ed_finish_index(job_id_str, static_cast<int64_t>(idx_size)); |
590 | 0 | int64_t now_ts = current_unix_time_us(); |
591 | 0 | g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts); |
592 | 0 | auto rowset_latency_us = warm_up_rowset_cross_host_latency_us(request_ts, now_ts); |
593 | 0 | if (rowset_latency_us.has_value()) { |
594 | 0 | g_file_cache_warm_up_rowset_latency << *rowset_latency_us; |
595 | 0 | } |
596 | 0 | g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts); |
597 | 0 | if (rowset_latency_us.has_value() && |
598 | 0 | *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) { |
599 | 0 | g_file_cache_warm_up_rowset_slow_count << 1; |
600 | 0 | LOG(INFO) << "warm up rowset took " << *rowset_latency_us |
601 | 0 | << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string() |
602 | 0 | << ", segment_id: " << segment_id; |
603 | 0 | } |
604 | 0 | if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) { |
605 | 0 | g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1; |
606 | 0 | LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts |
607 | 0 | << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string() |
608 | 0 | << ", segment_id: " << segment_id; |
609 | 0 | } |
610 | 0 | } else { |
611 | 0 | g_file_cache_event_driven_warm_up_failed_index_num << 1; |
612 | 0 | g_file_cache_event_driven_warm_up_failed_index_size << idx_size; |
613 | 0 | record_warmup_ed_fail_index(job_id_str, static_cast<int64_t>(idx_size)); |
614 | 0 | LOG(WARNING) << "download inverted index failed, tablet_id: " << tablet_id |
615 | 0 | << " rowset_id: " << rowset_id << ", error: " << st; |
616 | 0 | } |
617 | 0 | if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 0, |
618 | 0 | 1) |
619 | 0 | .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) { |
620 | 0 | VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string() |
621 | 0 | << ") completed"; |
622 | 0 | } |
623 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_done(job_id_str, upstream_trigger_ts_ms); |
624 | 0 | if (wait) { |
625 | 0 | wait->signal(); |
626 | 0 | } |
627 | 0 | } |
628 | | |
629 | | void CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* controller |
630 | | [[maybe_unused]], |
631 | | const PWarmUpRowsetRequest* request, |
632 | | PWarmUpRowsetResponse* response, |
633 | 0 | google::protobuf::Closure* done) { |
634 | 0 | brpc::ClosureGuard closure_guard(done); |
635 | 0 | std::shared_ptr<bthread::CountdownEvent> wait = nullptr; |
636 | 0 | timespec due_time; |
637 | 0 | if (request->has_sync_wait_timeout_ms() && request->sync_wait_timeout_ms() > 0) { |
638 | 0 | g_file_cache_warm_up_rowset_wait_for_compaction_num << 1; |
639 | 0 | wait = std::make_shared<bthread::CountdownEvent>(0); |
640 | 0 | VLOG_DEBUG << "sync_wait_timeout: " << request->sync_wait_timeout_ms() << " ms"; |
641 | 0 | due_time = butil::milliseconds_from_now(request->sync_wait_timeout_ms()); |
642 | 0 | } |
643 | | |
644 | | // Extract job_id from request (0 if not set, for backward compatibility) |
645 | 0 | std::string job_id_str = std::to_string(request->has_job_id() ? request->job_id() : 0); |
646 | 0 | int64_t upstream_trigger_ts_ms = |
647 | 0 | request->has_upstream_trigger_ts_ms() ? request->upstream_trigger_ts_ms() : 0; |
648 | |
|
649 | 0 | for (auto& rs_meta_pb : request->rowset_metas()) { |
650 | 0 | RowsetMeta rs_meta; |
651 | 0 | rs_meta.init_from_pb(rs_meta_pb); |
652 | 0 | auto storage_resource = rs_meta.remote_storage_resource(); |
653 | 0 | if (!storage_resource) { |
654 | 0 | LOG(WARNING) << storage_resource.error(); |
655 | 0 | continue; |
656 | 0 | } |
657 | 0 | int64_t tablet_id = rs_meta.tablet_id(); |
658 | 0 | auto rowset_id = rs_meta.rowset_id(); |
659 | 0 | bool local_only = !(request->has_skip_existence_check() && request->skip_existence_check()); |
660 | 0 | auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data = */ false, |
661 | 0 | /* sync_delete_bitmap = */ true, |
662 | 0 | /* sync_stats = */ nullptr, |
663 | 0 | /* local_only = */ local_only); |
664 | 0 | if (!res.has_value()) { |
665 | 0 | LOG_WARNING("Warm up error ").tag("tablet_id", tablet_id).error(res.error()); |
666 | 0 | if (res.error().msg().find("local_only=true") != std::string::npos || |
667 | 0 | res.error().msg().find("force_use_only_cached=true") != std::string::npos) { |
668 | 0 | res.error().set_code(ErrorCode::TABLE_NOT_FOUND); |
669 | 0 | } |
670 | 0 | res.error().to_protobuf(response->mutable_status()); |
671 | 0 | continue; |
672 | 0 | } |
673 | 0 | auto tablet = res.value(); |
674 | 0 | auto tablet_meta = tablet->tablet_meta(); |
675 | |
|
676 | 0 | int64_t handle_ts = current_unix_time_us(); |
677 | 0 | g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts); |
678 | 0 | int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() : 0; |
679 | 0 | auto request_to_handle_latency_us = |
680 | 0 | warm_up_rowset_cross_host_latency_us(request_ts, handle_ts); |
681 | 0 | if (request_to_handle_latency_us.has_value()) { |
682 | 0 | g_file_cache_warm_up_rowset_request_to_handle_latency << *request_to_handle_latency_us; |
683 | 0 | } |
684 | 0 | if (request_to_handle_latency_us.has_value() && |
685 | 0 | *request_to_handle_latency_us > config::warm_up_rowset_slow_log_ms * 1000) { |
686 | 0 | g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1; |
687 | 0 | LOG(INFO) << "warm up rowset (request to handle) took " << *request_to_handle_latency_us |
688 | 0 | << " us, tablet_id: " << rs_meta.tablet_id() |
689 | 0 | << ", rowset_id: " << rowset_id.to_string(); |
690 | 0 | } |
691 | 0 | int64_t expiration_time = tablet_meta->ttl_seconds(); |
692 | |
|
693 | 0 | if (!tablet->add_rowset_warmup_state(rs_meta, WarmUpTriggerSource::EVENT_DRIVEN)) { |
694 | 0 | LOG(INFO) << "found duplicate warmup task for rowset " << rowset_id.to_string() |
695 | 0 | << ", skip it"; |
696 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_done(job_id_str, |
697 | 0 | upstream_trigger_ts_ms); |
698 | 0 | record_warmup_ed_skipped_rowset_as_finished(rs_meta, job_id_str); |
699 | 0 | continue; |
700 | 0 | } |
701 | 0 | if (rs_meta.num_segments() == 0) { |
702 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_done(job_id_str, |
703 | 0 | upstream_trigger_ts_ms); |
704 | 0 | } |
705 | |
|
706 | 0 | for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); segment_id++) { |
707 | 0 | if (!config::file_cache_enable_only_warm_up_idx) { |
708 | 0 | auto segment_size = rs_meta.segment_file_size(segment_id); |
709 | | |
710 | | // Use rs_meta.fs() instead of storage_resource.value()->fs to support packed files. |
711 | | // PackedFileSystem wrapper in rs_meta.fs() handles the index_map lookup and |
712 | | // reads from the correct packed file. |
713 | 0 | io::DownloadFileMeta download_meta { |
714 | 0 | .path = storage_resource.value()->remote_segment_path(rs_meta, segment_id), |
715 | 0 | .file_size = segment_size, |
716 | 0 | .offset = 0, |
717 | 0 | .download_size = segment_size, |
718 | 0 | .file_system = rs_meta.fs(), |
719 | 0 | .ctx = {.is_index_data = false, |
720 | 0 | .expiration_time = expiration_time, |
721 | 0 | .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, |
722 | 0 | .is_warmup = true}, |
723 | 0 | .download_done = |
724 | 0 | [=, version = rs_meta.version()](Status st) { |
725 | 0 | handle_segment_download_done( |
726 | 0 | st, tablet_id, rowset_id, segment_id, tablet, wait, |
727 | 0 | version, segment_size, request_ts, handle_ts, |
728 | 0 | job_id_str, upstream_trigger_ts_ms); |
729 | 0 | }, |
730 | 0 | .tablet_id = tablet_id}; |
731 | |
|
732 | 0 | g_file_cache_event_driven_warm_up_submitted_segment_num << 1; |
733 | 0 | g_file_cache_event_driven_warm_up_submitted_segment_size << segment_size; |
734 | 0 | if (wait) { |
735 | 0 | wait->add_count(); |
736 | 0 | } |
737 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_submit(job_id_str, |
738 | 0 | upstream_trigger_ts_ms); |
739 | |
|
740 | 0 | _engine.file_cache_block_downloader().submit_download_task(download_meta); |
741 | 0 | } |
742 | | |
743 | | // Use rs_meta.fs() to support packed files for inverted index download. |
744 | 0 | auto download_inverted_index = [&, tablet, job_id_str](std::string index_path, |
745 | 0 | uint64_t idx_size) { |
746 | 0 | io::DownloadFileMeta download_meta { |
747 | 0 | .path = io::Path(index_path), |
748 | 0 | .file_size = static_cast<int64_t>(idx_size), |
749 | 0 | .file_system = rs_meta.fs(), |
750 | 0 | .ctx = {.is_index_data = false, // DORIS-20877 |
751 | 0 | .expiration_time = expiration_time, |
752 | 0 | .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, |
753 | 0 | .is_warmup = true}, |
754 | 0 | .download_done = |
755 | 0 | [=, version = rs_meta.version()](Status st) { |
756 | 0 | handle_inverted_index_download_done( |
757 | 0 | st, tablet_id, rowset_id, segment_id, index_path, |
758 | 0 | tablet, wait, version, idx_size, request_ts, handle_ts, |
759 | 0 | job_id_str, upstream_trigger_ts_ms); |
760 | 0 | }, |
761 | 0 | .tablet_id = tablet_id, |
762 | 0 | }; |
763 | 0 | g_file_cache_event_driven_warm_up_submitted_index_num << 1; |
764 | 0 | g_file_cache_event_driven_warm_up_submitted_index_size << idx_size; |
765 | 0 | tablet->update_rowset_warmup_state_inverted_idx_num( |
766 | 0 | WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, 1); |
767 | 0 | if (wait) { |
768 | 0 | wait->add_count(); |
769 | 0 | } |
770 | 0 | g_warmup_ed_downstream_progress_tracker.record_task_submit(job_id_str, |
771 | 0 | upstream_trigger_ts_ms); |
772 | 0 | _engine.file_cache_block_downloader().submit_download_task(download_meta); |
773 | 0 | }; |
774 | | |
775 | | // inverted index |
776 | 0 | auto schema_ptr = rs_meta.tablet_schema(); |
777 | 0 | auto idx_version = schema_ptr->get_inverted_index_storage_format(); |
778 | |
|
779 | 0 | if (schema_ptr->has_inverted_index() || schema_ptr->has_ann_index()) { |
780 | 0 | if (idx_version == InvertedIndexStorageFormatPB::V1) { |
781 | 0 | auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id); |
782 | 0 | std::unordered_map<int64_t, int64_t> index_size_map; |
783 | 0 | for (const auto& info : inverted_index_info.index_info()) { |
784 | 0 | if (info.index_file_size() != -1) { |
785 | 0 | index_size_map[info.index_id()] = info.index_file_size(); |
786 | 0 | } else { |
787 | 0 | VLOG_DEBUG << "Invalid index_file_size for segment_id " << segment_id |
788 | 0 | << ", index_id " << info.index_id(); |
789 | 0 | } |
790 | 0 | } |
791 | 0 | for (const auto& index : schema_ptr->inverted_indexes()) { |
792 | 0 | auto idx_path = storage_resource.value()->remote_idx_v1_path( |
793 | 0 | rs_meta, segment_id, index->index_id(), index->get_index_suffix()); |
794 | 0 | download_inverted_index(idx_path, index_size_map[index->index_id()]); |
795 | 0 | } |
796 | 0 | } else { // InvertedIndexStorageFormatPB::V2 |
797 | 0 | auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id); |
798 | 0 | int64_t idx_size = 0; |
799 | 0 | if (inverted_index_info.has_index_size()) { |
800 | 0 | idx_size = inverted_index_info.index_size(); |
801 | 0 | } else { |
802 | 0 | VLOG_DEBUG << "index_size is not set for segment " << segment_id; |
803 | 0 | } |
804 | 0 | auto idx_path = |
805 | 0 | storage_resource.value()->remote_idx_v2_path(rs_meta, segment_id); |
806 | 0 | download_inverted_index(idx_path, idx_size); |
807 | 0 | } |
808 | 0 | } |
809 | 0 | } |
810 | 0 | } |
811 | 0 | if (wait && wait->timed_wait(due_time)) { |
812 | 0 | g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num << 1; |
813 | 0 | LOG_WARNING("the time spent warming up {} rowsets exceeded {} ms", |
814 | 0 | request->rowset_metas().size(), request->sync_wait_timeout_ms()); |
815 | 0 | } |
816 | 0 | } |
817 | | |
818 | | bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_segment_num( |
819 | | "file_cache_recycle_cache_finished_segment_num"); |
820 | | bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_index_num( |
821 | | "file_cache_recycle_cache_finished_index_num"); |
822 | | |
823 | | void CloudInternalServiceImpl::recycle_cache(google::protobuf::RpcController* controller |
824 | | [[maybe_unused]], |
825 | | const PRecycleCacheRequest* request, |
826 | | PRecycleCacheResponse* response, |
827 | 0 | google::protobuf::Closure* done) { |
828 | 0 | brpc::ClosureGuard closure_guard(done); |
829 | |
|
830 | 0 | if (!config::enable_file_cache) { |
831 | 0 | return; |
832 | 0 | } |
833 | 0 | for (const auto& meta : request->cache_metas()) { |
834 | 0 | for (int64_t segment_id = 0; segment_id < meta.num_segments(); segment_id++) { |
835 | 0 | auto file_key = Segment::file_cache_key(meta.rowset_id(), segment_id); |
836 | 0 | auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); |
837 | 0 | file_cache->remove_if_cached_async(file_key); |
838 | 0 | g_file_cache_recycle_cache_finished_segment_num << 1; |
839 | 0 | } |
840 | | |
841 | | // inverted index |
842 | 0 | for (const auto& file_name : meta.index_file_names()) { |
843 | 0 | auto file_key = io::BlockFileCache::hash(file_name); |
844 | 0 | auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); |
845 | 0 | file_cache->remove_if_cached_async(file_key); |
846 | 0 | g_file_cache_recycle_cache_finished_index_num << 1; |
847 | 0 | } |
848 | 0 | } |
849 | 0 | } |
850 | | |
851 | | #include "common/compile_check_avoid_end.h" |
852 | | } // namespace doris |