Coverage Report

Created: 2026-03-12 17:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/cloud/cloud_internal_service.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "cloud/cloud_internal_service.h"
19
20
#include <bthread/countdown_event.h>
21
22
#include <algorithm>
23
#include <thread>
24
25
#include "cloud/cloud_storage_engine.h"
26
#include "cloud/cloud_tablet.h"
27
#include "cloud/cloud_tablet_mgr.h"
28
#include "cloud/cloud_warm_up_manager.h"
29
#include "cloud/config.h"
30
#include "io/cache/block_file_cache.h"
31
#include "io/cache/block_file_cache_downloader.h"
32
#include "io/cache/block_file_cache_factory.h"
33
#include "runtime/thread_context.h"
34
#include "runtime/workload_management/io_throttle.h"
35
#include "util/async_io.h"
36
#include "util/debug_points.h"
37
38
namespace doris {
39
#include "common/compile_check_avoid_begin.h"
40
#include "common/compile_check_begin.h"
41
42
bvar::Adder<uint64_t> g_file_cache_get_by_peer_num("file_cache_get_by_peer_num");
43
bvar::Adder<uint64_t> g_file_cache_get_by_peer_blocks_num("file_cache_get_by_peer_blocks_num");
44
bvar::Adder<uint64_t> g_file_cache_get_by_peer_success_num("file_cache_get_by_peer_success_num");
45
bvar::Adder<uint64_t> g_file_cache_get_by_peer_failed_num("file_cache_get_by_peer_failed_num");
46
bvar::LatencyRecorder g_file_cache_get_by_peer_server_latency(
47
        "file_cache_get_by_peer_server_latency");
48
bvar::LatencyRecorder g_file_cache_get_by_peer_read_cache_file_latency(
49
        "file_cache_get_by_peer_read_cache_file_latency");
50
bvar::LatencyRecorder g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency(
51
        "cloud_internal_service_get_file_cache_meta_by_tablet_id_latency");
52
53
CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine, ExecEnv* exec_env)
54
1
        : PInternalService(exec_env), _engine(engine) {}
55
56
0
CloudInternalServiceImpl::~CloudInternalServiceImpl() = default;
57
58
void CloudInternalServiceImpl::alter_vault_sync(google::protobuf::RpcController* controller,
59
                                                const doris::PAlterVaultSyncRequest* request,
60
                                                PAlterVaultSyncResponse* response,
61
0
                                                google::protobuf::Closure* done) {
62
0
    LOG(INFO) << "alter be to sync vault info from Meta Service";
63
    // If the vaults containing hdfs vault then it would try to create hdfs connection using jni
64
    // which would acuiqre one thread local jniEnv. But bthread context can't guarantee that the brpc
65
    // worker thread wouldn't do bthread switch between worker threads.
66
0
    bool ret = _heavy_work_pool.try_offer([this, done]() {
67
0
        brpc::ClosureGuard closure_guard(done);
68
0
        _engine.sync_storage_vault();
69
0
    });
70
0
    if (!ret) {
71
0
        brpc::ClosureGuard closure_guard(done);
72
0
        LOG(WARNING) << "fail to offer alter_vault_sync request to the work pool, pool="
73
0
                     << _heavy_work_pool.get_info();
74
0
    }
75
0
}
76
77
0
FileCacheType cache_type_to_pb(io::FileCacheType type) {
78
0
    switch (type) {
79
0
    case io::FileCacheType::TTL:
80
0
        return FileCacheType::TTL;
81
0
    case io::FileCacheType::INDEX:
82
0
        return FileCacheType::INDEX;
83
0
    case io::FileCacheType::NORMAL:
84
0
        return FileCacheType::NORMAL;
85
0
    default:
86
0
        DCHECK(false);
87
0
    }
88
0
    return FileCacheType::NORMAL;
89
0
}
90
91
static void add_file_cache_block_meta_to_response(
92
        PGetFileCacheMetaResponse* resp, int64_t tablet_id, const std::string& rowset_id,
93
        int32_t segment_id, const std::string& file_name,
94
        const std::tuple<int64_t, int64_t, io::FileCacheType, int64_t>& tuple,
95
0
        const RowsetSharedPtr& rowset, bool is_index) {
96
0
    FileCacheBlockMeta* meta = resp->add_file_cache_block_metas();
97
0
    meta->set_tablet_id(tablet_id);
98
0
    meta->set_rowset_id(rowset_id);
99
0
    meta->set_segment_id(segment_id);
100
0
    meta->set_file_name(file_name);
101
102
0
    if (!is_index) {
103
        // .dat
104
0
        meta->set_file_size(rowset->rowset_meta()->segment_file_size(segment_id));
105
0
        meta->set_file_type(doris::FileType::SEGMENT_FILE);
106
0
    } else {
107
        // .idx
108
0
        const auto& idx_file_info = rowset->rowset_meta()->inverted_index_file_info(segment_id);
109
0
        meta->set_file_size(idx_file_info.has_index_size() ? idx_file_info.index_size() : -1);
110
0
        meta->set_file_type(doris::FileType::INVERTED_INDEX_FILE);
111
0
    }
112
113
0
    meta->set_offset(std::get<0>(tuple));
114
0
    meta->set_size(std::get<1>(tuple));
115
0
    meta->set_cache_type(cache_type_to_pb(std::get<2>(tuple)));
116
0
    meta->set_expiration_time(std::get<3>(tuple));
117
0
}
118
119
static void process_segment_file_cache_meta(PGetFileCacheMetaResponse* resp,
120
                                            const RowsetSharedPtr& rowset, int64_t tablet_id,
121
                                            const std::string& rowset_id, int32_t segment_id,
122
0
                                            bool is_index) {
123
0
    const char* extension = is_index ? ".idx" : ".dat";
124
0
    std::string file_name = fmt::format("{}_{}{}", rowset_id, segment_id, extension);
125
0
    auto cache_key = io::BlockFileCache::hash(file_name);
126
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(cache_key);
127
0
    if (!cache) return;
128
0
    auto segments_meta = cache->get_hot_blocks_meta(cache_key);
129
0
    for (const auto& tuple : segments_meta) {
130
0
        add_file_cache_block_meta_to_response(resp, tablet_id, rowset_id, segment_id, file_name,
131
0
                                              tuple, rowset, is_index);
132
0
    }
133
0
}
134
135
void CloudInternalServiceImpl::get_file_cache_meta_by_tablet_id(
136
        google::protobuf::RpcController* controller [[maybe_unused]],
137
        const PGetFileCacheMetaRequest* request, PGetFileCacheMetaResponse* response,
138
0
        google::protobuf::Closure* done) {
139
0
    brpc::ClosureGuard closure_guard(done);
140
0
    if (!config::enable_file_cache) {
141
0
        LOG_WARNING("try to access tablet file cache meta, but file cache not enabled");
142
0
        return;
143
0
    }
144
0
    auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
145
0
                            std::chrono::steady_clock::now().time_since_epoch())
146
0
                            .count();
147
0
    std::ostringstream tablet_ids_stream;
148
0
    int count = 0;
149
0
    for (const auto& tablet_id : request->tablet_ids()) {
150
0
        tablet_ids_stream << tablet_id << ", ";
151
0
        count++;
152
0
        if (count >= 10) {
153
0
            break;
154
0
        }
155
0
    }
156
0
    LOG(INFO) << "warm up get meta from this be, tablets num=" << request->tablet_ids().size()
157
0
              << ", first 10 tablet_ids=[ " << tablet_ids_stream.str() << " ]";
158
0
    for (const auto& tablet_id : request->tablet_ids()) {
159
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id);
160
0
        if (!res.has_value()) {
161
0
            LOG(ERROR) << "failed to get tablet: " << tablet_id
162
0
                       << " err msg: " << res.error().msg();
163
0
            continue;
164
0
        }
165
0
        CloudTabletSPtr tablet = std::move(res.value());
166
0
        auto st = tablet->sync_rowsets();
167
0
        if (!st) {
168
            // just log failed, try it best
169
0
            LOG(WARNING) << "failed to sync rowsets: " << tablet_id
170
0
                         << " err msg: " << st.to_string();
171
0
        }
172
0
        auto rowsets = tablet->get_snapshot_rowset();
173
174
0
        for (const RowsetSharedPtr& rowset : rowsets) {
175
0
            std::string rowset_id = rowset->rowset_id().to_string();
176
0
            for (int32_t segment_id = 0; segment_id < rowset->num_segments(); ++segment_id) {
177
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
178
0
                                                false);
179
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
180
0
                                                true);
181
0
            }
182
0
        }
183
0
    }
184
0
    auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
185
0
                          std::chrono::steady_clock::now().time_since_epoch())
186
0
                          .count();
187
0
    g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency << (end_ts - begin_ts);
188
0
    LOG(INFO) << "get file cache meta by tablet ids = [ " << tablet_ids_stream.str() << " ] took "
189
0
              << end_ts - begin_ts << " us";
190
0
    VLOG_DEBUG << "get file cache meta by tablet id request=" << request->DebugString()
191
0
               << ", response=" << response->DebugString();
192
0
}
193
194
namespace {
195
// Helper functions for fetch_peer_data
196
197
0
Status handle_peer_file_range_request(const std::string& path, PFetchPeerDataResponse* response) {
198
    // Read specific range [file_offset, file_offset+file_size) across cached blocks
199
0
    auto datas = io::FileCacheFactory::instance()->get_cache_data_by_path(path);
200
0
    for (auto& cb : datas) {
201
0
        *(response->add_datas()) = std::move(cb);
202
0
    }
203
0
    return Status::OK();
204
0
}
205
206
0
void set_error_response(PFetchPeerDataResponse* response, const std::string& error_msg) {
207
0
    response->mutable_status()->add_error_msgs(error_msg);
208
0
    response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR);
209
0
}
210
211
Status read_file_block(const std::shared_ptr<io::FileBlock>& file_block, size_t file_size,
212
0
                       doris::CacheBlockPB* output) {
213
0
    std::string data;
214
    // ATTN: calculate the rightmost boundary value of the block, due to inaccurate current block meta information.
215
    // see CachedRemoteFileReader::read_at_impl for more details.
216
    // Ensure file_size >= file_block->offset() to avoid underflow
217
0
    if (file_size < file_block->offset()) {
218
0
        LOG(WARNING) << "file_size (" << file_size << ") < file_block->offset("
219
0
                     << file_block->offset() << ")";
220
0
        return Status::InternalError<false>("file_size less than block offset");
221
0
    }
222
0
    size_t read_size = std::min(static_cast<size_t>(file_size - file_block->offset()),
223
0
                                file_block->range().size());
224
0
    data.resize(read_size);
225
226
0
    auto begin_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
227
0
                                      std::chrono::steady_clock::now().time_since_epoch())
228
0
                                      .count();
229
230
0
    SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->s3_file_buffer_tracker());
231
0
    Slice slice(data.data(), data.size());
232
0
    Status read_st = file_block->read(slice, /*read_offset=*/0);
233
234
0
    auto end_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
235
0
                                    std::chrono::steady_clock::now().time_since_epoch())
236
0
                                    .count();
237
0
    g_file_cache_get_by_peer_read_cache_file_latency << (end_read_file_ts - begin_read_file_ts);
238
239
0
    if (read_st.ok()) {
240
0
        output->set_block_offset(static_cast<int64_t>(file_block->offset()));
241
0
        output->set_block_size(static_cast<int64_t>(read_size));
242
0
        output->set_data(std::move(data));
243
0
        return Status::OK();
244
0
    } else {
245
0
        g_file_cache_get_by_peer_failed_num << 1;
246
0
        LOG(WARNING) << "read cache block failed: " << read_st;
247
0
        return read_st;
248
0
    }
249
0
}
250
251
Status handle_peer_file_cache_block_request(const PFetchPeerDataRequest* request,
252
0
                                            PFetchPeerDataResponse* response) {
253
0
    const auto& path = request->path();
254
0
    auto hash = io::BlockFileCache::hash(path);
255
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(hash);
256
0
    if (cache == nullptr) {
257
0
        g_file_cache_get_by_peer_failed_num << 1;
258
0
        set_error_response(response, "can't get file cache instance");
259
0
        return Status::InternalError<false>("can't get file cache instance");
260
0
    }
261
262
0
    io::CacheContext ctx {};
263
0
    io::ReadStatistics local_stats;
264
0
    ctx.stats = &local_stats;
265
266
0
    for (const auto& cb_req : request->cache_req()) {
267
0
        size_t offset = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_offset()));
268
0
        size_t size = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_size()));
269
0
        auto holder = cache->get_or_set(hash, offset, size, ctx);
270
271
0
        for (auto& fb : holder.file_blocks) {
272
0
            if (fb->state() != io::FileBlock::State::DOWNLOADED) {
273
0
                g_file_cache_get_by_peer_failed_num << 1;
274
0
                LOG(WARNING) << "read cache block failed, state=" << fb->state();
275
0
                set_error_response(response, "read cache file error");
276
0
                return Status::InternalError<false>("cache block not downloaded");
277
0
            }
278
279
0
            g_file_cache_get_by_peer_blocks_num << 1;
280
0
            doris::CacheBlockPB* out = response->add_datas();
281
0
            Status read_status = read_file_block(fb, request->file_size(), out);
282
0
            if (!read_status.ok()) {
283
0
                set_error_response(response, "read cache file error");
284
0
                return read_status;
285
0
            }
286
0
        }
287
0
    }
288
289
0
    return Status::OK();
290
0
}
291
} // namespace
292
293
void CloudInternalServiceImpl::fetch_peer_data(google::protobuf::RpcController* controller
294
                                               [[maybe_unused]],
295
                                               const PFetchPeerDataRequest* request,
296
                                               PFetchPeerDataResponse* response,
297
0
                                               google::protobuf::Closure* done) {
298
0
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
299
0
        brpc::ClosureGuard closure_guard(done);
300
0
        g_file_cache_get_by_peer_num << 1;
301
302
0
        if (!config::enable_file_cache) {
303
0
            LOG_WARNING("try to access file cache data, but file cache not enabled");
304
0
            return;
305
0
        }
306
307
0
        auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
308
0
                                std::chrono::steady_clock::now().time_since_epoch())
309
0
                                .count();
310
311
0
        const auto type = request->type();
312
0
        const auto& path = request->path();
313
0
        response->mutable_status()->set_status_code(TStatusCode::OK);
314
315
0
        Status status = Status::OK();
316
0
        if (type == PFetchPeerDataRequest_Type_PEER_FILE_RANGE) {
317
0
            status = handle_peer_file_range_request(path, response);
318
0
        } else if (type == PFetchPeerDataRequest_Type_PEER_FILE_CACHE_BLOCK) {
319
0
            status = handle_peer_file_cache_block_request(request, response);
320
0
        }
321
322
0
        if (!status.ok()) {
323
0
            LOG(WARNING) << "fetch peer data failed: " << status.to_string();
324
0
            set_error_response(response, status.to_string());
325
0
        }
326
327
0
        DBUG_EXECUTE_IF("CloudInternalServiceImpl::fetch_peer_data_slower", {
328
0
            int st_us = dp->param<int>("sleep", 1000);
329
0
            LOG_WARNING("CloudInternalServiceImpl::fetch_peer_data_slower").tag("sleep", st_us);
330
0
            bthread_usleep(st_us);
331
0
        });
332
333
0
        auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
334
0
                              std::chrono::steady_clock::now().time_since_epoch())
335
0
                              .count();
336
0
        g_file_cache_get_by_peer_server_latency << (end_ts - begin_ts);
337
0
        g_file_cache_get_by_peer_success_num << 1;
338
339
0
        VLOG_DEBUG << "fetch cache request=" << request->DebugString()
340
0
                   << ", response=" << response->DebugString();
341
0
    });
342
343
0
    if (!ret) {
344
0
        brpc::ClosureGuard closure_guard(done);
345
0
        LOG(WARNING) << "fail to offer fetch peer data request to the work pool, pool="
346
0
                     << _heavy_work_pool.get_info();
347
0
    }
348
0
}
349
350
#include "common/compile_check_end.h"
351
352
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_num(
353
        "file_cache_event_driven_warm_up_submitted_segment_num");
354
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_num(
355
        "file_cache_event_driven_warm_up_finished_segment_num");
356
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_num(
357
        "file_cache_event_driven_warm_up_failed_segment_num");
358
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_size(
359
        "file_cache_event_driven_warm_up_submitted_segment_size");
360
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_size(
361
        "file_cache_event_driven_warm_up_finished_segment_size");
362
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_size(
363
        "file_cache_event_driven_warm_up_failed_segment_size");
364
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_num(
365
        "file_cache_event_driven_warm_up_submitted_index_num");
366
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_num(
367
        "file_cache_event_driven_warm_up_finished_index_num");
368
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_num(
369
        "file_cache_event_driven_warm_up_failed_index_num");
370
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_size(
371
        "file_cache_event_driven_warm_up_submitted_index_size");
372
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_size(
373
        "file_cache_event_driven_warm_up_finished_index_size");
374
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_size(
375
        "file_cache_event_driven_warm_up_failed_index_size");
376
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_handle_unix_ts(
377
        "file_cache_warm_up_rowset_last_handle_unix_ts", 0);
378
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_finish_unix_ts(
379
        "file_cache_warm_up_rowset_last_finish_unix_ts", 0);
380
bvar::LatencyRecorder g_file_cache_warm_up_rowset_latency("file_cache_warm_up_rowset_latency");
381
bvar::LatencyRecorder g_file_cache_warm_up_rowset_request_to_handle_latency(
382
        "file_cache_warm_up_rowset_request_to_handle_latency");
383
bvar::LatencyRecorder g_file_cache_warm_up_rowset_handle_to_finish_latency(
384
        "file_cache_warm_up_rowset_handle_to_finish_latency");
385
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_slow_count(
386
        "file_cache_warm_up_rowset_slow_count");
387
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_request_to_handle_slow_count(
388
        "file_cache_warm_up_rowset_request_to_handle_slow_count");
389
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_handle_to_finish_slow_count(
390
        "file_cache_warm_up_rowset_handle_to_finish_slow_count");
391
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_num(
392
        "file_cache_warm_up_rowset_wait_for_compaction_num");
393
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num(
394
        "file_cache_warm_up_rowset_wait_for_compaction_timeout_num");
395
396
void handle_segment_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
397
                                  int64_t segment_id, std::shared_ptr<CloudTablet> tablet,
398
                                  std::shared_ptr<bthread::CountdownEvent> wait, Version version,
399
0
                                  int64_t segment_size, int64_t request_ts, int64_t handle_ts) {
400
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_segment", {
401
0
        auto sleep_time = dp->param<int>("sleep", 3);
402
0
        LOG_INFO("[verbose] block download for rowset={}, version={}, sleep={}",
403
0
                 rowset_id.to_string(), version.to_string(), sleep_time);
404
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
405
0
    });
406
0
    DBUG_EXECUTE_IF(
407
0
            "CloudInternalServiceImpl::warm_up_rowset.download_segment.inject_"
408
0
            "error",
409
0
            {
410
0
                st = Status::InternalError("injected error");
411
0
                LOG_INFO("[verbose] inject error, tablet={}, rowset={}, st={}", tablet_id,
412
0
                         rowset_id.to_string(), st.to_string());
413
0
            });
414
0
    if (st.ok()) {
415
0
        g_file_cache_event_driven_warm_up_finished_segment_num << 1;
416
0
        g_file_cache_event_driven_warm_up_finished_segment_size << segment_size;
417
0
        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
418
0
                                 std::chrono::system_clock::now().time_since_epoch())
419
0
                                 .count();
420
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
421
0
        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
422
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
423
0
        if (request_ts > 0 && now_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
424
0
            g_file_cache_warm_up_rowset_slow_count << 1;
425
0
            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
426
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
427
0
                      << ", segment_id: " << segment_id;
428
0
        }
429
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
430
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
431
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
432
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
433
0
                      << ", segment_id: " << segment_id;
434
0
        }
435
0
    } else {
436
0
        g_file_cache_event_driven_warm_up_failed_segment_num << 1;
437
0
        g_file_cache_event_driven_warm_up_failed_segment_size << segment_size;
438
0
        LOG(WARNING) << "download segment failed, tablet_id: " << tablet_id
439
0
                     << " rowset_id: " << rowset_id.to_string() << ", error: " << st;
440
0
    }
441
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 1,
442
0
                                               0)
443
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
444
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
445
0
                   << ") completed";
446
0
    }
447
0
    if (wait) {
448
0
        wait->signal();
449
0
    }
450
0
}
451
452
void handle_inverted_index_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
453
                                         int64_t segment_id, std::string index_path,
454
                                         std::shared_ptr<CloudTablet> tablet,
455
                                         std::shared_ptr<bthread::CountdownEvent> wait,
456
                                         Version version, uint64_t idx_size, int64_t request_ts,
457
0
                                         int64_t handle_ts) {
458
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_inverted_idx", {
459
0
        auto sleep_time = dp->param<int>("sleep", 3);
460
0
        LOG_INFO(
461
0
                "[verbose] block download for rowset={}, inverted index "
462
0
                "file={}, sleep={}",
463
0
                rowset_id.to_string(), index_path, sleep_time);
464
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
465
0
    });
466
0
    if (st.ok()) {
467
0
        g_file_cache_event_driven_warm_up_finished_index_num << 1;
468
0
        g_file_cache_event_driven_warm_up_finished_index_size << idx_size;
469
0
        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
470
0
                                 std::chrono::system_clock::now().time_since_epoch())
471
0
                                 .count();
472
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
473
0
        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
474
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
475
0
        if (request_ts > 0 && now_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
476
0
            g_file_cache_warm_up_rowset_slow_count << 1;
477
0
            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
478
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
479
0
                      << ", segment_id: " << segment_id;
480
0
        }
481
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
482
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
483
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
484
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
485
0
                      << ", segment_id: " << segment_id;
486
0
        }
487
0
    } else {
488
0
        g_file_cache_event_driven_warm_up_failed_index_num << 1;
489
0
        g_file_cache_event_driven_warm_up_failed_index_size << idx_size;
490
0
        LOG(WARNING) << "download inverted index failed, tablet_id: " << tablet_id
491
0
                     << " rowset_id: " << rowset_id << ", error: " << st;
492
0
    }
493
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 0,
494
0
                                               1)
495
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
496
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
497
0
                   << ") completed";
498
0
    }
499
0
    if (wait) {
500
0
        wait->signal();
501
0
    }
502
0
}
503
504
void CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* controller
505
                                              [[maybe_unused]],
506
                                              const PWarmUpRowsetRequest* request,
507
                                              PWarmUpRowsetResponse* response,
508
0
                                              google::protobuf::Closure* done) {
509
0
    brpc::ClosureGuard closure_guard(done);
510
0
    std::shared_ptr<bthread::CountdownEvent> wait = nullptr;
511
0
    timespec due_time;
512
0
    if (request->has_sync_wait_timeout_ms() && request->sync_wait_timeout_ms() > 0) {
513
0
        g_file_cache_warm_up_rowset_wait_for_compaction_num << 1;
514
0
        wait = std::make_shared<bthread::CountdownEvent>(0);
515
0
        VLOG_DEBUG << "sync_wait_timeout: " << request->sync_wait_timeout_ms() << " ms";
516
0
        due_time = butil::milliseconds_from_now(request->sync_wait_timeout_ms());
517
0
    }
518
519
0
    for (auto& rs_meta_pb : request->rowset_metas()) {
520
0
        RowsetMeta rs_meta;
521
0
        rs_meta.init_from_pb(rs_meta_pb);
522
0
        auto storage_resource = rs_meta.remote_storage_resource();
523
0
        if (!storage_resource) {
524
0
            LOG(WARNING) << storage_resource.error();
525
0
            continue;
526
0
        }
527
0
        int64_t tablet_id = rs_meta.tablet_id();
528
0
        auto rowset_id = rs_meta.rowset_id();
529
0
        bool local_only = !(request->has_skip_existence_check() && request->skip_existence_check());
530
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data = */ false,
531
0
                                                   /* sync_delete_bitmap = */ true,
532
0
                                                   /* sync_stats = */ nullptr,
533
0
                                                   /* local_only = */ local_only);
534
0
        if (!res.has_value()) {
535
0
            LOG_WARNING("Warm up error ").tag("tablet_id", tablet_id).error(res.error());
536
0
            if (res.error().msg().find("local_only=true") != std::string::npos) {
537
0
                res.error().set_code(ErrorCode::TABLE_NOT_FOUND);
538
0
            }
539
0
            res.error().to_protobuf(response->mutable_status());
540
0
            continue;
541
0
        }
542
0
        auto tablet = res.value();
543
0
        auto tablet_meta = tablet->tablet_meta();
544
545
0
        int64_t handle_ts = std::chrono::duration_cast<std::chrono::microseconds>(
546
0
                                    std::chrono::system_clock::now().time_since_epoch())
547
0
                                    .count();
548
0
        g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts);
549
0
        int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() : 0;
550
0
        g_file_cache_warm_up_rowset_request_to_handle_latency << (handle_ts - request_ts);
551
0
        if (request_ts > 0 && handle_ts - request_ts > config::warm_up_rowset_slow_log_ms * 1000) {
552
0
            g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
553
0
            LOG(INFO) << "warm up rowset (request to handle) took " << handle_ts - request_ts
554
0
                      << " us, tablet_id: " << rs_meta.tablet_id()
555
0
                      << ", rowset_id: " << rowset_id.to_string();
556
0
        }
557
0
        int64_t expiration_time = tablet_meta->ttl_seconds();
558
559
0
        if (!tablet->add_rowset_warmup_state(rs_meta, WarmUpTriggerSource::EVENT_DRIVEN)) {
560
0
            LOG(INFO) << "found duplicate warmup task for rowset " << rowset_id.to_string()
561
0
                      << ", skip it";
562
0
            continue;
563
0
        }
564
565
0
        for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); segment_id++) {
566
0
            if (!config::file_cache_enable_only_warm_up_idx) {
567
0
                auto segment_size = rs_meta.segment_file_size(segment_id);
568
569
                // Use rs_meta.fs() instead of storage_resource.value()->fs to support packed files.
570
                // PackedFileSystem wrapper in rs_meta.fs() handles the index_map lookup and
571
                // reads from the correct packed file.
572
0
                io::DownloadFileMeta download_meta {
573
0
                        .path = storage_resource.value()->remote_segment_path(rs_meta, segment_id),
574
0
                        .file_size = segment_size,
575
0
                        .offset = 0,
576
0
                        .download_size = segment_size,
577
0
                        .file_system = rs_meta.fs(),
578
0
                        .ctx = {.is_index_data = false,
579
0
                                .expiration_time = expiration_time,
580
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
581
0
                                .is_warmup = true},
582
0
                        .download_done = [=, version = rs_meta.version()](Status st) {
583
0
                            handle_segment_download_done(st, tablet_id, rowset_id, segment_id,
584
0
                                                         tablet, wait, version, segment_size,
585
0
                                                         request_ts, handle_ts);
586
0
                        }};
587
588
0
                g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
589
0
                g_file_cache_event_driven_warm_up_submitted_segment_size << segment_size;
590
0
                if (wait) {
591
0
                    wait->add_count();
592
0
                }
593
594
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
595
0
            }
596
597
            // Use rs_meta.fs() to support packed files for inverted index download.
598
0
            auto download_inverted_index = [&, tablet](std::string index_path, uint64_t idx_size) {
599
0
                io::DownloadFileMeta download_meta {
600
0
                        .path = io::Path(index_path),
601
0
                        .file_size = static_cast<int64_t>(idx_size),
602
0
                        .file_system = rs_meta.fs(),
603
0
                        .ctx = {.is_index_data = false, // DORIS-20877
604
0
                                .expiration_time = expiration_time,
605
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
606
0
                                .is_warmup = true},
607
0
                        .download_done = [=, version = rs_meta.version()](Status st) {
608
0
                            handle_inverted_index_download_done(
609
0
                                    st, tablet_id, rowset_id, segment_id, index_path, tablet, wait,
610
0
                                    version, idx_size, request_ts, handle_ts);
611
0
                        }};
612
0
                g_file_cache_event_driven_warm_up_submitted_index_num << 1;
613
0
                g_file_cache_event_driven_warm_up_submitted_index_size << idx_size;
614
0
                tablet->update_rowset_warmup_state_inverted_idx_num(
615
0
                        WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, 1);
616
0
                if (wait) {
617
0
                    wait->add_count();
618
0
                }
619
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
620
0
            };
621
622
            // inverted index
623
0
            auto schema_ptr = rs_meta.tablet_schema();
624
0
            auto idx_version = schema_ptr->get_inverted_index_storage_format();
625
626
0
            if (schema_ptr->has_inverted_index() || schema_ptr->has_ann_index()) {
627
0
                if (idx_version == InvertedIndexStorageFormatPB::V1) {
628
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
629
0
                    std::unordered_map<int64_t, int64_t> index_size_map;
630
0
                    for (const auto& info : inverted_index_info.index_info()) {
631
0
                        if (info.index_file_size() != -1) {
632
0
                            index_size_map[info.index_id()] = info.index_file_size();
633
0
                        } else {
634
0
                            VLOG_DEBUG << "Invalid index_file_size for segment_id " << segment_id
635
0
                                       << ", index_id " << info.index_id();
636
0
                        }
637
0
                    }
638
0
                    for (const auto& index : schema_ptr->inverted_indexes()) {
639
0
                        auto idx_path = storage_resource.value()->remote_idx_v1_path(
640
0
                                rs_meta, segment_id, index->index_id(), index->get_index_suffix());
641
0
                        download_inverted_index(idx_path, index_size_map[index->index_id()]);
642
0
                    }
643
0
                } else { // InvertedIndexStorageFormatPB::V2
644
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
645
0
                    int64_t idx_size = 0;
646
0
                    if (inverted_index_info.has_index_size()) {
647
0
                        idx_size = inverted_index_info.index_size();
648
0
                    } else {
649
0
                        VLOG_DEBUG << "index_size is not set for segment " << segment_id;
650
0
                    }
651
0
                    auto idx_path =
652
0
                            storage_resource.value()->remote_idx_v2_path(rs_meta, segment_id);
653
0
                    download_inverted_index(idx_path, idx_size);
654
0
                }
655
0
            }
656
0
        }
657
0
    }
658
0
    if (wait && wait->timed_wait(due_time)) {
659
0
        g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num << 1;
660
0
        LOG_WARNING("the time spent warming up {} rowsets exceeded {} ms",
661
0
                    request->rowset_metas().size(), request->sync_wait_timeout_ms());
662
0
    }
663
0
}
664
665
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_segment_num(
666
        "file_cache_recycle_cache_finished_segment_num");
667
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_index_num(
668
        "file_cache_recycle_cache_finished_index_num");
669
670
void CloudInternalServiceImpl::recycle_cache(google::protobuf::RpcController* controller
671
                                             [[maybe_unused]],
672
                                             const PRecycleCacheRequest* request,
673
                                             PRecycleCacheResponse* response,
674
0
                                             google::protobuf::Closure* done) {
675
0
    brpc::ClosureGuard closure_guard(done);
676
677
0
    if (!config::enable_file_cache) {
678
0
        return;
679
0
    }
680
0
    for (const auto& meta : request->cache_metas()) {
681
0
        for (int64_t segment_id = 0; segment_id < meta.num_segments(); segment_id++) {
682
0
            auto file_key = Segment::file_cache_key(meta.rowset_id(), segment_id);
683
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
684
0
            file_cache->remove_if_cached_async(file_key);
685
0
            g_file_cache_recycle_cache_finished_segment_num << 1;
686
0
        }
687
688
        // inverted index
689
0
        for (const auto& file_name : meta.index_file_names()) {
690
0
            auto file_key = io::BlockFileCache::hash(file_name);
691
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
692
0
            file_cache->remove_if_cached_async(file_key);
693
0
            g_file_cache_recycle_cache_finished_index_num << 1;
694
0
        }
695
0
    }
696
0
}
697
698
#include "common/compile_check_avoid_end.h"
699
} // namespace doris