Coverage Report

Created: 2026-04-14 05:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/cloud/cloud_internal_service.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "cloud/cloud_internal_service.h"
19
20
#include <bthread/countdown_event.h>
21
22
#include <algorithm>
23
#include <thread>
24
25
#include "cloud/cloud_storage_engine.h"
26
#include "cloud/cloud_tablet.h"
27
#include "cloud/cloud_tablet_mgr.h"
28
#include "cloud/cloud_warm_up_manager.h"
29
#include "cloud/config.h"
30
#include "io/cache/block_file_cache.h"
31
#include "io/cache/block_file_cache_downloader.h"
32
#include "io/cache/block_file_cache_factory.h"
33
#include "runtime/thread_context.h"
34
#include "runtime/workload_management/io_throttle.h"
35
#include "util/async_io.h"
36
#include "util/debug_points.h"
37
38
namespace doris {
39
#include "common/compile_check_avoid_begin.h"
40
41
bvar::Adder<uint64_t> g_file_cache_get_by_peer_num("file_cache_get_by_peer_num");
42
bvar::Adder<uint64_t> g_file_cache_get_by_peer_blocks_num("file_cache_get_by_peer_blocks_num");
43
bvar::Adder<uint64_t> g_file_cache_get_by_peer_success_num("file_cache_get_by_peer_success_num");
44
bvar::Adder<uint64_t> g_file_cache_get_by_peer_failed_num("file_cache_get_by_peer_failed_num");
45
bvar::LatencyRecorder g_file_cache_get_by_peer_server_latency(
46
        "file_cache_get_by_peer_server_latency");
47
bvar::LatencyRecorder g_file_cache_get_by_peer_read_cache_file_latency(
48
        "file_cache_get_by_peer_read_cache_file_latency");
49
bvar::LatencyRecorder g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency(
50
        "cloud_internal_service_get_file_cache_meta_by_tablet_id_latency");
51
52
CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine, ExecEnv* exec_env)
53
1
        : PInternalService(exec_env), _engine(engine) {}
54
55
0
CloudInternalServiceImpl::~CloudInternalServiceImpl() = default;
56
57
void CloudInternalServiceImpl::alter_vault_sync(google::protobuf::RpcController* controller,
58
                                                const doris::PAlterVaultSyncRequest* request,
59
                                                PAlterVaultSyncResponse* response,
60
0
                                                google::protobuf::Closure* done) {
61
0
    LOG(INFO) << "alter be to sync vault info from Meta Service";
62
    // If the vaults containing hdfs vault then it would try to create hdfs connection using jni
63
    // which would acuiqre one thread local jniEnv. But bthread context can't guarantee that the brpc
64
    // worker thread wouldn't do bthread switch between worker threads.
65
0
    bool ret = _heavy_work_pool.try_offer([this, done]() {
66
0
        brpc::ClosureGuard closure_guard(done);
67
0
        _engine.sync_storage_vault();
68
0
    });
69
0
    if (!ret) {
70
0
        brpc::ClosureGuard closure_guard(done);
71
0
        LOG(WARNING) << "fail to offer alter_vault_sync request to the work pool, pool="
72
0
                     << _heavy_work_pool.get_info();
73
0
    }
74
0
}
75
76
0
FileCacheType cache_type_to_pb(io::FileCacheType type) {
77
0
    switch (type) {
78
0
    case io::FileCacheType::TTL:
79
0
        return FileCacheType::TTL;
80
0
    case io::FileCacheType::INDEX:
81
0
        return FileCacheType::INDEX;
82
0
    case io::FileCacheType::NORMAL:
83
0
        return FileCacheType::NORMAL;
84
0
    default:
85
0
        DCHECK(false);
86
0
    }
87
0
    return FileCacheType::NORMAL;
88
0
}
89
90
static void add_file_cache_block_meta_to_response(
91
        PGetFileCacheMetaResponse* resp, int64_t tablet_id, const std::string& rowset_id,
92
        int32_t segment_id, const std::string& file_name,
93
        const std::tuple<int64_t, int64_t, io::FileCacheType, int64_t>& tuple,
94
0
        const RowsetSharedPtr& rowset, bool is_index) {
95
0
    FileCacheBlockMeta* meta = resp->add_file_cache_block_metas();
96
0
    meta->set_tablet_id(tablet_id);
97
0
    meta->set_rowset_id(rowset_id);
98
0
    meta->set_segment_id(segment_id);
99
0
    meta->set_file_name(file_name);
100
101
0
    if (!is_index) {
102
        // .dat
103
0
        meta->set_file_size(rowset->rowset_meta()->segment_file_size(segment_id));
104
0
        meta->set_file_type(doris::FileType::SEGMENT_FILE);
105
0
    } else {
106
        // .idx
107
0
        const auto& idx_file_info = rowset->rowset_meta()->inverted_index_file_info(segment_id);
108
0
        meta->set_file_size(idx_file_info.has_index_size() ? idx_file_info.index_size() : -1);
109
0
        meta->set_file_type(doris::FileType::INVERTED_INDEX_FILE);
110
0
    }
111
112
0
    meta->set_offset(std::get<0>(tuple));
113
0
    meta->set_size(std::get<1>(tuple));
114
0
    meta->set_cache_type(cache_type_to_pb(std::get<2>(tuple)));
115
0
    meta->set_expiration_time(std::get<3>(tuple));
116
0
}
117
118
static void process_segment_file_cache_meta(PGetFileCacheMetaResponse* resp,
119
                                            const RowsetSharedPtr& rowset, int64_t tablet_id,
120
                                            const std::string& rowset_id, int32_t segment_id,
121
0
                                            bool is_index) {
122
0
    const char* extension = is_index ? ".idx" : ".dat";
123
0
    std::string file_name = fmt::format("{}_{}{}", rowset_id, segment_id, extension);
124
0
    auto cache_key = io::BlockFileCache::hash(file_name);
125
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(cache_key);
126
0
    if (!cache) return;
127
0
    auto segments_meta = cache->get_hot_blocks_meta(cache_key);
128
0
    for (const auto& tuple : segments_meta) {
129
0
        add_file_cache_block_meta_to_response(resp, tablet_id, rowset_id, segment_id, file_name,
130
0
                                              tuple, rowset, is_index);
131
0
    }
132
0
}
133
134
void CloudInternalServiceImpl::get_file_cache_meta_by_tablet_id(
135
        google::protobuf::RpcController* controller [[maybe_unused]],
136
        const PGetFileCacheMetaRequest* request, PGetFileCacheMetaResponse* response,
137
0
        google::protobuf::Closure* done) {
138
0
    brpc::ClosureGuard closure_guard(done);
139
0
    if (!config::enable_file_cache) {
140
0
        LOG_WARNING("try to access tablet file cache meta, but file cache not enabled");
141
0
        return;
142
0
    }
143
0
    auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
144
0
                            std::chrono::steady_clock::now().time_since_epoch())
145
0
                            .count();
146
0
    std::ostringstream tablet_ids_stream;
147
0
    int count = 0;
148
0
    for (const auto& tablet_id : request->tablet_ids()) {
149
0
        tablet_ids_stream << tablet_id << ", ";
150
0
        count++;
151
0
        if (count >= 10) {
152
0
            break;
153
0
        }
154
0
    }
155
0
    LOG(INFO) << "warm up get meta from this be, tablets num=" << request->tablet_ids().size()
156
0
              << ", first 10 tablet_ids=[ " << tablet_ids_stream.str() << " ]";
157
0
    for (const auto& tablet_id : request->tablet_ids()) {
158
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id);
159
0
        if (!res.has_value()) {
160
0
            LOG(ERROR) << "failed to get tablet: " << tablet_id
161
0
                       << " err msg: " << res.error().msg();
162
0
            continue;
163
0
        }
164
0
        CloudTabletSPtr tablet = std::move(res.value());
165
0
        auto st = tablet->sync_rowsets();
166
0
        if (!st) {
167
            // just log failed, try it best
168
0
            LOG(WARNING) << "failed to sync rowsets: " << tablet_id
169
0
                         << " err msg: " << st.to_string();
170
0
        }
171
0
        auto rowsets = tablet->get_snapshot_rowset();
172
173
0
        for (const RowsetSharedPtr& rowset : rowsets) {
174
0
            std::string rowset_id = rowset->rowset_id().to_string();
175
0
            for (int32_t segment_id = 0; segment_id < rowset->num_segments(); ++segment_id) {
176
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
177
0
                                                false);
178
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
179
0
                                                true);
180
0
            }
181
0
        }
182
0
    }
183
0
    auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
184
0
                          std::chrono::steady_clock::now().time_since_epoch())
185
0
                          .count();
186
0
    g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency << (end_ts - begin_ts);
187
0
    LOG(INFO) << "get file cache meta by tablet ids = [ " << tablet_ids_stream.str() << " ] took "
188
0
              << end_ts - begin_ts << " us";
189
0
    VLOG_DEBUG << "get file cache meta by tablet id request=" << request->DebugString()
190
0
               << ", response=" << response->DebugString();
191
0
}
192
193
namespace {
194
// Helper functions for fetch_peer_data
195
196
0
Status handle_peer_file_range_request(const std::string& path, PFetchPeerDataResponse* response) {
197
    // Read specific range [file_offset, file_offset+file_size) across cached blocks
198
0
    auto datas = io::FileCacheFactory::instance()->get_cache_data_by_path(path);
199
0
    for (auto& cb : datas) {
200
0
        *(response->add_datas()) = std::move(cb);
201
0
    }
202
0
    return Status::OK();
203
0
}
204
205
0
void set_error_response(PFetchPeerDataResponse* response, const std::string& error_msg) {
206
0
    response->mutable_status()->add_error_msgs(error_msg);
207
0
    response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR);
208
0
}
209
210
Status read_file_block(const std::shared_ptr<io::FileBlock>& file_block, size_t file_size,
211
0
                       doris::CacheBlockPB* output) {
212
0
    std::string data;
213
    // ATTN: calculate the rightmost boundary value of the block, due to inaccurate current block meta information.
214
    // see CachedRemoteFileReader::read_at_impl for more details.
215
    // Ensure file_size >= file_block->offset() to avoid underflow
216
0
    if (file_size < file_block->offset()) {
217
0
        LOG(WARNING) << "file_size (" << file_size << ") < file_block->offset("
218
0
                     << file_block->offset() << ")";
219
0
        return Status::InternalError<false>("file_size less than block offset");
220
0
    }
221
0
    size_t read_size = std::min(static_cast<size_t>(file_size - file_block->offset()),
222
0
                                file_block->range().size());
223
0
    data.resize(read_size);
224
225
0
    auto begin_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
226
0
                                      std::chrono::steady_clock::now().time_since_epoch())
227
0
                                      .count();
228
229
0
    SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->s3_file_buffer_tracker());
230
0
    Slice slice(data.data(), data.size());
231
0
    Status read_st = file_block->read(slice, /*read_offset=*/0);
232
233
0
    auto end_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
234
0
                                    std::chrono::steady_clock::now().time_since_epoch())
235
0
                                    .count();
236
0
    g_file_cache_get_by_peer_read_cache_file_latency << (end_read_file_ts - begin_read_file_ts);
237
238
0
    if (read_st.ok()) {
239
0
        output->set_block_offset(static_cast<int64_t>(file_block->offset()));
240
0
        output->set_block_size(static_cast<int64_t>(read_size));
241
0
        output->set_data(std::move(data));
242
0
        return Status::OK();
243
0
    } else {
244
0
        g_file_cache_get_by_peer_failed_num << 1;
245
0
        LOG(WARNING) << "read cache block failed: " << read_st;
246
0
        return read_st;
247
0
    }
248
0
}
249
250
Status handle_peer_file_cache_block_request(const PFetchPeerDataRequest* request,
251
0
                                            PFetchPeerDataResponse* response) {
252
0
    const auto& path = request->path();
253
0
    auto hash = io::BlockFileCache::hash(path);
254
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(hash);
255
0
    if (cache == nullptr) {
256
0
        g_file_cache_get_by_peer_failed_num << 1;
257
0
        set_error_response(response, "can't get file cache instance");
258
0
        return Status::InternalError<false>("can't get file cache instance");
259
0
    }
260
261
0
    io::CacheContext ctx {};
262
0
    io::ReadStatistics local_stats;
263
0
    ctx.stats = &local_stats;
264
265
0
    for (const auto& cb_req : request->cache_req()) {
266
0
        size_t offset = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_offset()));
267
0
        size_t size = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_size()));
268
0
        auto holder = cache->get_or_set(hash, offset, size, ctx);
269
270
0
        for (auto& fb : holder.file_blocks) {
271
0
            if (fb->state() != io::FileBlock::State::DOWNLOADED) {
272
0
                g_file_cache_get_by_peer_failed_num << 1;
273
0
                LOG(WARNING) << "read cache block failed, state=" << fb->state();
274
0
                set_error_response(response, "read cache file error");
275
0
                return Status::InternalError<false>("cache block not downloaded");
276
0
            }
277
278
0
            g_file_cache_get_by_peer_blocks_num << 1;
279
0
            doris::CacheBlockPB* out = response->add_datas();
280
0
            Status read_status = read_file_block(fb, request->file_size(), out);
281
0
            if (!read_status.ok()) {
282
0
                set_error_response(response, "read cache file error");
283
0
                return read_status;
284
0
            }
285
0
        }
286
0
    }
287
288
0
    return Status::OK();
289
0
}
290
} // namespace
291
292
void CloudInternalServiceImpl::fetch_peer_data(google::protobuf::RpcController* controller
293
                                               [[maybe_unused]],
294
                                               const PFetchPeerDataRequest* request,
295
                                               PFetchPeerDataResponse* response,
296
0
                                               google::protobuf::Closure* done) {
297
0
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
298
0
        brpc::ClosureGuard closure_guard(done);
299
0
        g_file_cache_get_by_peer_num << 1;
300
301
0
        if (!config::enable_file_cache) {
302
0
            LOG_WARNING("try to access file cache data, but file cache not enabled");
303
0
            return;
304
0
        }
305
306
0
        auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
307
0
                                std::chrono::steady_clock::now().time_since_epoch())
308
0
                                .count();
309
310
0
        const auto type = request->type();
311
0
        const auto& path = request->path();
312
0
        response->mutable_status()->set_status_code(TStatusCode::OK);
313
314
0
        Status status = Status::OK();
315
0
        if (type == PFetchPeerDataRequest_Type_PEER_FILE_RANGE) {
316
0
            status = handle_peer_file_range_request(path, response);
317
0
        } else if (type == PFetchPeerDataRequest_Type_PEER_FILE_CACHE_BLOCK) {
318
0
            status = handle_peer_file_cache_block_request(request, response);
319
0
        }
320
321
0
        if (!status.ok()) {
322
0
            LOG(WARNING) << "fetch peer data failed: " << status.to_string();
323
0
            set_error_response(response, status.to_string());
324
0
        }
325
326
0
        DBUG_EXECUTE_IF("CloudInternalServiceImpl::fetch_peer_data_slower", {
327
0
            int st_us = dp->param<int>("sleep", 1000);
328
0
            LOG_WARNING("CloudInternalServiceImpl::fetch_peer_data_slower").tag("sleep", st_us);
329
0
            bthread_usleep(st_us);
330
0
        });
331
332
0
        auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
333
0
                              std::chrono::steady_clock::now().time_since_epoch())
334
0
                              .count();
335
0
        g_file_cache_get_by_peer_server_latency << (end_ts - begin_ts);
336
0
        g_file_cache_get_by_peer_success_num << 1;
337
338
0
        VLOG_DEBUG << "fetch cache request=" << request->DebugString()
339
0
                   << ", response=" << response->DebugString();
340
0
    });
341
342
0
    if (!ret) {
343
0
        brpc::ClosureGuard closure_guard(done);
344
0
        LOG(WARNING) << "fail to offer fetch peer data request to the work pool, pool="
345
0
                     << _heavy_work_pool.get_info();
346
0
    }
347
0
}
348
349
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_num(
350
        "file_cache_event_driven_warm_up_submitted_segment_num");
351
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_num(
352
        "file_cache_event_driven_warm_up_finished_segment_num");
353
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_num(
354
        "file_cache_event_driven_warm_up_failed_segment_num");
355
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_size(
356
        "file_cache_event_driven_warm_up_submitted_segment_size");
357
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_size(
358
        "file_cache_event_driven_warm_up_finished_segment_size");
359
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_size(
360
        "file_cache_event_driven_warm_up_failed_segment_size");
361
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_num(
362
        "file_cache_event_driven_warm_up_submitted_index_num");
363
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_num(
364
        "file_cache_event_driven_warm_up_finished_index_num");
365
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_num(
366
        "file_cache_event_driven_warm_up_failed_index_num");
367
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_size(
368
        "file_cache_event_driven_warm_up_submitted_index_size");
369
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_size(
370
        "file_cache_event_driven_warm_up_finished_index_size");
371
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_size(
372
        "file_cache_event_driven_warm_up_failed_index_size");
373
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_handle_unix_ts(
374
        "file_cache_warm_up_rowset_last_handle_unix_ts", 0);
375
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_finish_unix_ts(
376
        "file_cache_warm_up_rowset_last_finish_unix_ts", 0);
377
bvar::LatencyRecorder g_file_cache_warm_up_rowset_latency("file_cache_warm_up_rowset_latency");
378
bvar::LatencyRecorder g_file_cache_warm_up_rowset_request_to_handle_latency(
379
        "file_cache_warm_up_rowset_request_to_handle_latency");
380
bvar::LatencyRecorder g_file_cache_warm_up_rowset_handle_to_finish_latency(
381
        "file_cache_warm_up_rowset_handle_to_finish_latency");
382
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_slow_count(
383
        "file_cache_warm_up_rowset_slow_count");
384
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_request_to_handle_slow_count(
385
        "file_cache_warm_up_rowset_request_to_handle_slow_count");
386
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_handle_to_finish_slow_count(
387
        "file_cache_warm_up_rowset_handle_to_finish_slow_count");
388
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_num(
389
        "file_cache_warm_up_rowset_wait_for_compaction_num");
390
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num(
391
        "file_cache_warm_up_rowset_wait_for_compaction_timeout_num");
392
393
void handle_segment_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
394
                                  int64_t segment_id, std::shared_ptr<CloudTablet> tablet,
395
                                  std::shared_ptr<bthread::CountdownEvent> wait, Version version,
396
0
                                  int64_t segment_size, int64_t request_ts, int64_t handle_ts) {
397
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_segment", {
398
0
        auto sleep_time = dp->param<int>("sleep", 3);
399
0
        LOG_INFO("[verbose] block download for rowset={}, version={}, sleep={}",
400
0
                 rowset_id.to_string(), version.to_string(), sleep_time);
401
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
402
0
    });
403
0
    DBUG_EXECUTE_IF(
404
0
            "CloudInternalServiceImpl::warm_up_rowset.download_segment.inject_"
405
0
            "error",
406
0
            {
407
0
                st = Status::InternalError("injected error");
408
0
                LOG_INFO("[verbose] inject error, tablet={}, rowset={}, st={}", tablet_id,
409
0
                         rowset_id.to_string(), st.to_string());
410
0
            });
411
0
    if (st.ok()) {
412
0
        g_file_cache_event_driven_warm_up_finished_segment_num << 1;
413
0
        g_file_cache_event_driven_warm_up_finished_segment_size << segment_size;
414
0
        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
415
0
                                 std::chrono::system_clock::now().time_since_epoch())
416
0
                                 .count();
417
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
418
0
        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
419
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
420
0
        if (request_ts > 0 && now_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
421
0
            g_file_cache_warm_up_rowset_slow_count << 1;
422
0
            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
423
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
424
0
                      << ", segment_id: " << segment_id;
425
0
        }
426
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
427
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
428
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
429
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
430
0
                      << ", segment_id: " << segment_id;
431
0
        }
432
0
    } else {
433
0
        g_file_cache_event_driven_warm_up_failed_segment_num << 1;
434
0
        g_file_cache_event_driven_warm_up_failed_segment_size << segment_size;
435
0
        LOG(WARNING) << "download segment failed, tablet_id: " << tablet_id
436
0
                     << " rowset_id: " << rowset_id.to_string() << ", error: " << st;
437
0
    }
438
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 1,
439
0
                                               0)
440
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
441
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
442
0
                   << ") completed";
443
0
    }
444
0
    if (wait) {
445
0
        wait->signal();
446
0
    }
447
0
}
448
449
void handle_inverted_index_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
450
                                         int64_t segment_id, std::string index_path,
451
                                         std::shared_ptr<CloudTablet> tablet,
452
                                         std::shared_ptr<bthread::CountdownEvent> wait,
453
                                         Version version, uint64_t idx_size, int64_t request_ts,
454
0
                                         int64_t handle_ts) {
455
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_inverted_idx", {
456
0
        auto sleep_time = dp->param<int>("sleep", 3);
457
0
        LOG_INFO(
458
0
                "[verbose] block download for rowset={}, inverted index "
459
0
                "file={}, sleep={}",
460
0
                rowset_id.to_string(), index_path, sleep_time);
461
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
462
0
    });
463
0
    if (st.ok()) {
464
0
        g_file_cache_event_driven_warm_up_finished_index_num << 1;
465
0
        g_file_cache_event_driven_warm_up_finished_index_size << idx_size;
466
0
        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
467
0
                                 std::chrono::system_clock::now().time_since_epoch())
468
0
                                 .count();
469
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
470
0
        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
471
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
472
0
        if (request_ts > 0 && now_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
473
0
            g_file_cache_warm_up_rowset_slow_count << 1;
474
0
            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
475
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
476
0
                      << ", segment_id: " << segment_id;
477
0
        }
478
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
479
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
480
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
481
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
482
0
                      << ", segment_id: " << segment_id;
483
0
        }
484
0
    } else {
485
0
        g_file_cache_event_driven_warm_up_failed_index_num << 1;
486
0
        g_file_cache_event_driven_warm_up_failed_index_size << idx_size;
487
0
        LOG(WARNING) << "download inverted index failed, tablet_id: " << tablet_id
488
0
                     << " rowset_id: " << rowset_id << ", error: " << st;
489
0
    }
490
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 0,
491
0
                                               1)
492
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
493
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
494
0
                   << ") completed";
495
0
    }
496
0
    if (wait) {
497
0
        wait->signal();
498
0
    }
499
0
}
500
501
void CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* controller
502
                                              [[maybe_unused]],
503
                                              const PWarmUpRowsetRequest* request,
504
                                              PWarmUpRowsetResponse* response,
505
0
                                              google::protobuf::Closure* done) {
506
0
    brpc::ClosureGuard closure_guard(done);
507
0
    std::shared_ptr<bthread::CountdownEvent> wait = nullptr;
508
0
    timespec due_time;
509
0
    if (request->has_sync_wait_timeout_ms() && request->sync_wait_timeout_ms() > 0) {
510
0
        g_file_cache_warm_up_rowset_wait_for_compaction_num << 1;
511
0
        wait = std::make_shared<bthread::CountdownEvent>(0);
512
0
        VLOG_DEBUG << "sync_wait_timeout: " << request->sync_wait_timeout_ms() << " ms";
513
0
        due_time = butil::milliseconds_from_now(request->sync_wait_timeout_ms());
514
0
    }
515
516
0
    for (auto& rs_meta_pb : request->rowset_metas()) {
517
0
        RowsetMeta rs_meta;
518
0
        rs_meta.init_from_pb(rs_meta_pb);
519
0
        auto storage_resource = rs_meta.remote_storage_resource();
520
0
        if (!storage_resource) {
521
0
            LOG(WARNING) << storage_resource.error();
522
0
            continue;
523
0
        }
524
0
        int64_t tablet_id = rs_meta.tablet_id();
525
0
        auto rowset_id = rs_meta.rowset_id();
526
0
        bool local_only = !(request->has_skip_existence_check() && request->skip_existence_check());
527
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data = */ false,
528
0
                                                   /* sync_delete_bitmap = */ true,
529
0
                                                   /* sync_stats = */ nullptr,
530
0
                                                   /* local_only = */ local_only);
531
0
        if (!res.has_value()) {
532
0
            LOG_WARNING("Warm up error ").tag("tablet_id", tablet_id).error(res.error());
533
0
            if (res.error().msg().find("local_only=true") != std::string::npos) {
534
0
                res.error().set_code(ErrorCode::TABLE_NOT_FOUND);
535
0
            }
536
0
            res.error().to_protobuf(response->mutable_status());
537
0
            continue;
538
0
        }
539
0
        auto tablet = res.value();
540
0
        auto tablet_meta = tablet->tablet_meta();
541
542
0
        int64_t handle_ts = std::chrono::duration_cast<std::chrono::microseconds>(
543
0
                                    std::chrono::system_clock::now().time_since_epoch())
544
0
                                    .count();
545
0
        g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts);
546
0
        int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() : 0;
547
0
        g_file_cache_warm_up_rowset_request_to_handle_latency << (handle_ts - request_ts);
548
0
        if (request_ts > 0 && handle_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
549
0
            g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
550
0
            LOG(INFO) << "warm up rowset (request to handle) took " << handle_ts - request_ts
551
0
                      << " us, tablet_id: " << rs_meta.tablet_id()
552
0
                      << ", rowset_id: " << rowset_id.to_string();
553
0
        }
554
0
        int64_t expiration_time = tablet_meta->ttl_seconds();
555
556
0
        if (!tablet->add_rowset_warmup_state(rs_meta, WarmUpTriggerSource::EVENT_DRIVEN)) {
557
0
            LOG(INFO) << "found duplicate warmup task for rowset " << rowset_id.to_string()
558
0
                      << ", skip it";
559
0
            continue;
560
0
        }
561
562
0
        for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); segment_id++) {
563
0
            if (!config::file_cache_enable_only_warm_up_idx) {
564
0
                auto segment_size = rs_meta.segment_file_size(segment_id);
565
566
                // Use rs_meta.fs() instead of storage_resource.value()->fs to support packed files.
567
                // PackedFileSystem wrapper in rs_meta.fs() handles the index_map lookup and
568
                // reads from the correct packed file.
569
0
                io::DownloadFileMeta download_meta {
570
0
                        .path = storage_resource.value()->remote_segment_path(rs_meta, segment_id),
571
0
                        .file_size = segment_size,
572
0
                        .offset = 0,
573
0
                        .download_size = segment_size,
574
0
                        .file_system = rs_meta.fs(),
575
0
                        .ctx = {.is_index_data = false,
576
0
                                .expiration_time = expiration_time,
577
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
578
0
                                .is_warmup = true},
579
0
                        .download_done =
580
0
                                [=, version = rs_meta.version()](Status st) {
581
0
                                    handle_segment_download_done(
582
0
                                            st, tablet_id, rowset_id, segment_id, tablet, wait,
583
0
                                            version, segment_size, request_ts, handle_ts);
584
0
                                },
585
0
                        .tablet_id = tablet_id};
586
587
0
                g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
588
0
                g_file_cache_event_driven_warm_up_submitted_segment_size << segment_size;
589
0
                if (wait) {
590
0
                    wait->add_count();
591
0
                }
592
593
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
594
0
            }
595
596
            // Use rs_meta.fs() to support packed files for inverted index download.
597
0
            auto download_inverted_index = [&, tablet](std::string index_path, uint64_t idx_size) {
598
0
                io::DownloadFileMeta download_meta {
599
0
                        .path = io::Path(index_path),
600
0
                        .file_size = static_cast<int64_t>(idx_size),
601
0
                        .file_system = rs_meta.fs(),
602
0
                        .ctx = {.is_index_data = false, // DORIS-20877
603
0
                                .expiration_time = expiration_time,
604
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
605
0
                                .is_warmup = true},
606
0
                        .download_done =
607
0
                                [=, version = rs_meta.version()](Status st) {
608
0
                                    handle_inverted_index_download_done(
609
0
                                            st, tablet_id, rowset_id, segment_id, index_path,
610
0
                                            tablet, wait, version, idx_size, request_ts, handle_ts);
611
0
                                },
612
0
                        .tablet_id = tablet_id};
613
0
                g_file_cache_event_driven_warm_up_submitted_index_num << 1;
614
0
                g_file_cache_event_driven_warm_up_submitted_index_size << idx_size;
615
0
                tablet->update_rowset_warmup_state_inverted_idx_num(
616
0
                        WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, 1);
617
0
                if (wait) {
618
0
                    wait->add_count();
619
0
                }
620
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
621
0
            };
622
623
            // inverted index
624
0
            auto schema_ptr = rs_meta.tablet_schema();
625
0
            auto idx_version = schema_ptr->get_inverted_index_storage_format();
626
627
0
            if (schema_ptr->has_inverted_index() || schema_ptr->has_ann_index()) {
628
0
                if (idx_version == InvertedIndexStorageFormatPB::V1) {
629
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
630
0
                    std::unordered_map<int64_t, int64_t> index_size_map;
631
0
                    for (const auto& info : inverted_index_info.index_info()) {
632
0
                        if (info.index_file_size() != -1) {
633
0
                            index_size_map[info.index_id()] = info.index_file_size();
634
0
                        } else {
635
0
                            VLOG_DEBUG << "Invalid index_file_size for segment_id " << segment_id
636
0
                                       << ", index_id " << info.index_id();
637
0
                        }
638
0
                    }
639
0
                    for (const auto& index : schema_ptr->inverted_indexes()) {
640
0
                        auto idx_path = storage_resource.value()->remote_idx_v1_path(
641
0
                                rs_meta, segment_id, index->index_id(), index->get_index_suffix());
642
0
                        download_inverted_index(idx_path, index_size_map[index->index_id()]);
643
0
                    }
644
0
                } else { // InvertedIndexStorageFormatPB::V2
645
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
646
0
                    int64_t idx_size = 0;
647
0
                    if (inverted_index_info.has_index_size()) {
648
0
                        idx_size = inverted_index_info.index_size();
649
0
                    } else {
650
0
                        VLOG_DEBUG << "index_size is not set for segment " << segment_id;
651
0
                    }
652
0
                    auto idx_path =
653
0
                            storage_resource.value()->remote_idx_v2_path(rs_meta, segment_id);
654
0
                    download_inverted_index(idx_path, idx_size);
655
0
                }
656
0
            }
657
0
        }
658
0
    }
659
0
    if (wait && wait->timed_wait(due_time)) {
660
0
        g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num << 1;
661
0
        LOG_WARNING("the time spent warming up {} rowsets exceeded {} ms",
662
0
                    request->rowset_metas().size(), request->sync_wait_timeout_ms());
663
0
    }
664
0
}
665
666
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_segment_num(
667
        "file_cache_recycle_cache_finished_segment_num");
668
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_index_num(
669
        "file_cache_recycle_cache_finished_index_num");
670
671
void CloudInternalServiceImpl::recycle_cache(google::protobuf::RpcController* controller
672
                                             [[maybe_unused]],
673
                                             const PRecycleCacheRequest* request,
674
                                             PRecycleCacheResponse* response,
675
0
                                             google::protobuf::Closure* done) {
676
0
    brpc::ClosureGuard closure_guard(done);
677
678
0
    if (!config::enable_file_cache) {
679
0
        return;
680
0
    }
681
0
    for (const auto& meta : request->cache_metas()) {
682
0
        for (int64_t segment_id = 0; segment_id < meta.num_segments(); segment_id++) {
683
0
            auto file_key = Segment::file_cache_key(meta.rowset_id(), segment_id);
684
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
685
0
            file_cache->remove_if_cached_async(file_key);
686
0
            g_file_cache_recycle_cache_finished_segment_num << 1;
687
0
        }
688
689
        // inverted index
690
0
        for (const auto& file_name : meta.index_file_names()) {
691
0
            auto file_key = io::BlockFileCache::hash(file_name);
692
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
693
0
            file_cache->remove_if_cached_async(file_key);
694
0
            g_file_cache_recycle_cache_finished_index_num << 1;
695
0
        }
696
0
    }
697
0
}
698
699
#include "common/compile_check_avoid_end.h"
700
} // namespace doris