Coverage Report

Created: 2026-05-09 01:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/cloud/cloud_internal_service.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "cloud/cloud_internal_service.h"
19
20
#include <bthread/countdown_event.h>
21
22
#include <algorithm>
23
#include <optional>
24
#include <thread>
25
26
#include "cloud/cloud_storage_engine.h"
27
#include "cloud/cloud_tablet.h"
28
#include "cloud/cloud_tablet_mgr.h"
29
#include "cloud/cloud_warm_up_manager.h"
30
#include "cloud/config.h"
31
#include "io/cache/block_file_cache.h"
32
#include "io/cache/block_file_cache_downloader.h"
33
#include "io/cache/block_file_cache_factory.h"
34
#include "runtime/thread_context.h"
35
#include "runtime/workload_management/io_throttle.h"
36
#include "util/async_io.h"
37
#include "util/debug_points.h"
38
39
namespace doris {
40
#include "common/compile_check_avoid_begin.h"
41
42
bvar::Adder<uint64_t> g_file_cache_get_by_peer_num("file_cache_get_by_peer_num");
43
bvar::Adder<uint64_t> g_file_cache_get_by_peer_blocks_num("file_cache_get_by_peer_blocks_num");
44
bvar::Adder<uint64_t> g_file_cache_get_by_peer_success_num("file_cache_get_by_peer_success_num");
45
bvar::Adder<uint64_t> g_file_cache_get_by_peer_failed_num("file_cache_get_by_peer_failed_num");
46
bvar::LatencyRecorder g_file_cache_get_by_peer_server_latency(
47
        "file_cache_get_by_peer_server_latency");
48
bvar::LatencyRecorder g_file_cache_get_by_peer_read_cache_file_latency(
49
        "file_cache_get_by_peer_read_cache_file_latency");
50
bvar::LatencyRecorder g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency(
51
        "cloud_internal_service_get_file_cache_meta_by_tablet_id_latency");
52
53
CloudInternalServiceImpl::CloudInternalServiceImpl(CloudStorageEngine& engine, ExecEnv* exec_env)
54
0
        : PInternalService(exec_env), _engine(engine) {}
55
56
0
CloudInternalServiceImpl::~CloudInternalServiceImpl() = default;
57
58
void CloudInternalServiceImpl::alter_vault_sync(google::protobuf::RpcController* controller,
59
                                                const doris::PAlterVaultSyncRequest* request,
60
                                                PAlterVaultSyncResponse* response,
61
0
                                                google::protobuf::Closure* done) {
62
0
    LOG(INFO) << "alter be to sync vault info from Meta Service";
63
    // If the vaults containing hdfs vault then it would try to create hdfs connection using jni
64
    // which would acuiqre one thread local jniEnv. But bthread context can't guarantee that the brpc
65
    // worker thread wouldn't do bthread switch between worker threads.
66
0
    bool ret = _heavy_work_pool.try_offer([this, done]() {
67
0
        brpc::ClosureGuard closure_guard(done);
68
0
        _engine.sync_storage_vault();
69
0
    });
70
0
    if (!ret) {
71
0
        brpc::ClosureGuard closure_guard(done);
72
0
        LOG(WARNING) << "fail to offer alter_vault_sync request to the work pool, pool="
73
0
                     << _heavy_work_pool.get_info();
74
0
    }
75
0
}
76
77
0
FileCacheType cache_type_to_pb(io::FileCacheType type) {
78
0
    switch (type) {
79
0
    case io::FileCacheType::TTL:
80
0
        return FileCacheType::TTL;
81
0
    case io::FileCacheType::INDEX:
82
0
        return FileCacheType::INDEX;
83
0
    case io::FileCacheType::NORMAL:
84
0
        return FileCacheType::NORMAL;
85
0
    default:
86
0
        DCHECK(false);
87
0
    }
88
0
    return FileCacheType::NORMAL;
89
0
}
90
91
0
static int64_t current_unix_time_us() {
92
0
    return std::chrono::duration_cast<std::chrono::microseconds>(
93
0
                   std::chrono::system_clock::now().time_since_epoch())
94
0
            .count();
95
0
}
96
97
static std::optional<int64_t> warm_up_rowset_cross_host_latency_us(int64_t start_unix_ts_us,
98
0
                                                                   int64_t end_unix_ts_us) {
99
    // The start timestamp is generated by the caller BE. Mixed-version callers may omit it, and
100
    // system clocks across BEs are not guaranteed to be ordered.
101
0
    if (start_unix_ts_us <= 0 || end_unix_ts_us < start_unix_ts_us) {
102
0
        return std::nullopt;
103
0
    }
104
0
    return end_unix_ts_us - start_unix_ts_us;
105
0
}
106
107
static void add_file_cache_block_meta_to_response(
108
        PGetFileCacheMetaResponse* resp, int64_t tablet_id, const std::string& rowset_id,
109
        int32_t segment_id, const std::string& file_name,
110
        const std::tuple<int64_t, int64_t, io::FileCacheType, int64_t>& tuple,
111
0
        const RowsetSharedPtr& rowset, bool is_index) {
112
0
    FileCacheBlockMeta* meta = resp->add_file_cache_block_metas();
113
0
    meta->set_tablet_id(tablet_id);
114
0
    meta->set_rowset_id(rowset_id);
115
0
    meta->set_segment_id(segment_id);
116
0
    meta->set_file_name(file_name);
117
118
0
    if (!is_index) {
119
        // .dat
120
0
        meta->set_file_size(rowset->rowset_meta()->segment_file_size(segment_id));
121
0
        meta->set_file_type(doris::FileType::SEGMENT_FILE);
122
0
    } else {
123
        // .idx
124
0
        const auto& idx_file_info = rowset->rowset_meta()->inverted_index_file_info(segment_id);
125
0
        meta->set_file_size(idx_file_info.has_index_size() ? idx_file_info.index_size() : -1);
126
0
        meta->set_file_type(doris::FileType::INVERTED_INDEX_FILE);
127
0
    }
128
129
0
    meta->set_offset(std::get<0>(tuple));
130
0
    meta->set_size(std::get<1>(tuple));
131
0
    meta->set_cache_type(cache_type_to_pb(std::get<2>(tuple)));
132
0
    meta->set_expiration_time(std::get<3>(tuple));
133
0
}
134
135
static void process_segment_file_cache_meta(PGetFileCacheMetaResponse* resp,
136
                                            const RowsetSharedPtr& rowset, int64_t tablet_id,
137
                                            const std::string& rowset_id, int32_t segment_id,
138
0
                                            bool is_index) {
139
0
    const char* extension = is_index ? ".idx" : ".dat";
140
0
    std::string file_name = fmt::format("{}_{}{}", rowset_id, segment_id, extension);
141
0
    auto cache_key = io::BlockFileCache::hash(file_name);
142
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(cache_key);
143
0
    if (!cache) return;
144
0
    auto segments_meta = cache->get_hot_blocks_meta(cache_key);
145
0
    for (const auto& tuple : segments_meta) {
146
0
        add_file_cache_block_meta_to_response(resp, tablet_id, rowset_id, segment_id, file_name,
147
0
                                              tuple, rowset, is_index);
148
0
    }
149
0
}
150
151
void CloudInternalServiceImpl::get_file_cache_meta_by_tablet_id(
152
        google::protobuf::RpcController* controller [[maybe_unused]],
153
        const PGetFileCacheMetaRequest* request, PGetFileCacheMetaResponse* response,
154
0
        google::protobuf::Closure* done) {
155
0
    brpc::ClosureGuard closure_guard(done);
156
0
    if (!config::enable_file_cache) {
157
0
        LOG_WARNING("try to access tablet file cache meta, but file cache not enabled");
158
0
        return;
159
0
    }
160
0
    auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
161
0
                            std::chrono::steady_clock::now().time_since_epoch())
162
0
                            .count();
163
0
    std::ostringstream tablet_ids_stream;
164
0
    int count = 0;
165
0
    for (const auto& tablet_id : request->tablet_ids()) {
166
0
        tablet_ids_stream << tablet_id << ", ";
167
0
        count++;
168
0
        if (count >= 10) {
169
0
            break;
170
0
        }
171
0
    }
172
0
    LOG(INFO) << "warm up get meta from this be, tablets num=" << request->tablet_ids().size()
173
0
              << ", first 10 tablet_ids=[ " << tablet_ids_stream.str() << " ]";
174
0
    for (const auto& tablet_id : request->tablet_ids()) {
175
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id);
176
0
        if (!res.has_value()) {
177
0
            LOG(ERROR) << "failed to get tablet: " << tablet_id
178
0
                       << " err msg: " << res.error().msg();
179
0
            continue;
180
0
        }
181
0
        CloudTabletSPtr tablet = std::move(res.value());
182
0
        auto st = tablet->sync_rowsets();
183
0
        if (!st) {
184
            // just log failed, try it best
185
0
            LOG(WARNING) << "failed to sync rowsets: " << tablet_id
186
0
                         << " err msg: " << st.to_string();
187
0
        }
188
0
        auto rowsets = tablet->get_snapshot_rowset();
189
190
0
        for (const RowsetSharedPtr& rowset : rowsets) {
191
0
            std::string rowset_id = rowset->rowset_id().to_string();
192
0
            for (int32_t segment_id = 0; segment_id < rowset->num_segments(); ++segment_id) {
193
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
194
0
                                                false);
195
0
                process_segment_file_cache_meta(response, rowset, tablet_id, rowset_id, segment_id,
196
0
                                                true);
197
0
            }
198
0
        }
199
0
    }
200
0
    auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
201
0
                          std::chrono::steady_clock::now().time_since_epoch())
202
0
                          .count();
203
0
    g_cloud_internal_service_get_file_cache_meta_by_tablet_id_latency << (end_ts - begin_ts);
204
0
    LOG(INFO) << "get file cache meta by tablet ids = [ " << tablet_ids_stream.str() << " ] took "
205
0
              << end_ts - begin_ts << " us";
206
0
    VLOG_DEBUG << "get file cache meta by tablet id request=" << request->DebugString()
207
0
               << ", response=" << response->DebugString();
208
0
}
209
210
namespace {
211
// Helper functions for fetch_peer_data
212
213
0
Status handle_peer_file_range_request(const std::string& path, PFetchPeerDataResponse* response) {
214
    // Read specific range [file_offset, file_offset+file_size) across cached blocks
215
0
    auto datas = io::FileCacheFactory::instance()->get_cache_data_by_path(path);
216
0
    for (auto& cb : datas) {
217
0
        *(response->add_datas()) = std::move(cb);
218
0
    }
219
0
    return Status::OK();
220
0
}
221
222
0
void set_error_response(PFetchPeerDataResponse* response, const std::string& error_msg) {
223
0
    response->mutable_status()->add_error_msgs(error_msg);
224
0
    response->mutable_status()->set_status_code(TStatusCode::INTERNAL_ERROR);
225
0
}
226
227
Status read_file_block(const std::shared_ptr<io::FileBlock>& file_block, size_t file_size,
228
0
                       doris::CacheBlockPB* output) {
229
0
    std::string data;
230
    // ATTN: calculate the rightmost boundary value of the block, due to inaccurate current block meta information.
231
    // see CachedRemoteFileReader::read_at_impl for more details.
232
    // Ensure file_size >= file_block->offset() to avoid underflow
233
0
    if (file_size < file_block->offset()) {
234
0
        LOG(WARNING) << "file_size (" << file_size << ") < file_block->offset("
235
0
                     << file_block->offset() << ")";
236
0
        return Status::InternalError<false>("file_size less than block offset");
237
0
    }
238
0
    size_t read_size = std::min(static_cast<size_t>(file_size - file_block->offset()),
239
0
                                file_block->range().size());
240
0
    data.resize(read_size);
241
242
0
    auto begin_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
243
0
                                      std::chrono::steady_clock::now().time_since_epoch())
244
0
                                      .count();
245
246
0
    SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->s3_file_buffer_tracker());
247
0
    Slice slice(data.data(), data.size());
248
0
    Status read_st = file_block->read(slice, /*read_offset=*/0);
249
250
0
    auto end_read_file_ts = std::chrono::duration_cast<std::chrono::microseconds>(
251
0
                                    std::chrono::steady_clock::now().time_since_epoch())
252
0
                                    .count();
253
0
    g_file_cache_get_by_peer_read_cache_file_latency << (end_read_file_ts - begin_read_file_ts);
254
255
0
    if (read_st.ok()) {
256
0
        output->set_block_offset(static_cast<int64_t>(file_block->offset()));
257
0
        output->set_block_size(static_cast<int64_t>(read_size));
258
0
        output->set_data(std::move(data));
259
0
        return Status::OK();
260
0
    } else {
261
0
        g_file_cache_get_by_peer_failed_num << 1;
262
0
        LOG(WARNING) << "read cache block failed: " << read_st;
263
0
        return read_st;
264
0
    }
265
0
}
266
267
Status handle_peer_file_cache_block_request(const PFetchPeerDataRequest* request,
268
0
                                            PFetchPeerDataResponse* response) {
269
0
    const auto& path = request->path();
270
0
    auto hash = io::BlockFileCache::hash(path);
271
0
    auto* cache = io::FileCacheFactory::instance()->get_by_path(hash);
272
0
    if (cache == nullptr) {
273
0
        g_file_cache_get_by_peer_failed_num << 1;
274
0
        set_error_response(response, "can't get file cache instance");
275
0
        return Status::InternalError<false>("can't get file cache instance");
276
0
    }
277
278
0
    io::CacheContext ctx {};
279
0
    io::ReadStatistics local_stats;
280
0
    ctx.stats = &local_stats;
281
282
0
    for (const auto& cb_req : request->cache_req()) {
283
0
        size_t offset = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_offset()));
284
0
        size_t size = static_cast<size_t>(std::max<int64_t>(0, cb_req.block_size()));
285
0
        auto holder = cache->get_or_set(hash, offset, size, ctx);
286
287
0
        for (auto& fb : holder.file_blocks) {
288
0
            if (fb->state() != io::FileBlock::State::DOWNLOADED) {
289
0
                g_file_cache_get_by_peer_failed_num << 1;
290
0
                LOG(WARNING) << "read cache block failed, state=" << fb->state();
291
0
                set_error_response(response, "read cache file error");
292
0
                return Status::InternalError<false>("cache block not downloaded");
293
0
            }
294
295
0
            g_file_cache_get_by_peer_blocks_num << 1;
296
0
            doris::CacheBlockPB* out = response->add_datas();
297
0
            Status read_status = read_file_block(fb, request->file_size(), out);
298
0
            if (!read_status.ok()) {
299
0
                set_error_response(response, "read cache file error");
300
0
                return read_status;
301
0
            }
302
0
        }
303
0
    }
304
305
0
    return Status::OK();
306
0
}
307
} // namespace
308
309
void CloudInternalServiceImpl::fetch_peer_data(google::protobuf::RpcController* controller
310
                                               [[maybe_unused]],
311
                                               const PFetchPeerDataRequest* request,
312
                                               PFetchPeerDataResponse* response,
313
0
                                               google::protobuf::Closure* done) {
314
0
    bool ret = _heavy_work_pool.try_offer([request, response, done]() {
315
0
        brpc::ClosureGuard closure_guard(done);
316
0
        g_file_cache_get_by_peer_num << 1;
317
318
0
        if (!config::enable_file_cache) {
319
0
            LOG_WARNING("try to access file cache data, but file cache not enabled");
320
0
            return;
321
0
        }
322
323
0
        auto begin_ts = std::chrono::duration_cast<std::chrono::microseconds>(
324
0
                                std::chrono::steady_clock::now().time_since_epoch())
325
0
                                .count();
326
327
0
        const auto type = request->type();
328
0
        const auto& path = request->path();
329
0
        response->mutable_status()->set_status_code(TStatusCode::OK);
330
331
0
        Status status = Status::OK();
332
0
        if (type == PFetchPeerDataRequest_Type_PEER_FILE_RANGE) {
333
0
            status = handle_peer_file_range_request(path, response);
334
0
        } else if (type == PFetchPeerDataRequest_Type_PEER_FILE_CACHE_BLOCK) {
335
0
            status = handle_peer_file_cache_block_request(request, response);
336
0
        }
337
338
0
        if (!status.ok()) {
339
0
            LOG(WARNING) << "fetch peer data failed: " << status.to_string();
340
0
            set_error_response(response, status.to_string());
341
0
        }
342
343
0
        DBUG_EXECUTE_IF("CloudInternalServiceImpl::fetch_peer_data_slower", {
344
0
            int st_us = dp->param<int>("sleep", 1000);
345
0
            LOG_WARNING("CloudInternalServiceImpl::fetch_peer_data_slower").tag("sleep", st_us);
346
0
            bthread_usleep(st_us);
347
0
        });
348
349
0
        auto end_ts = std::chrono::duration_cast<std::chrono::microseconds>(
350
0
                              std::chrono::steady_clock::now().time_since_epoch())
351
0
                              .count();
352
0
        g_file_cache_get_by_peer_server_latency << (end_ts - begin_ts);
353
0
        g_file_cache_get_by_peer_success_num << 1;
354
355
0
        VLOG_DEBUG << "fetch cache request=" << request->DebugString()
356
0
                   << ", response=" << response->DebugString();
357
0
    });
358
359
0
    if (!ret) {
360
0
        brpc::ClosureGuard closure_guard(done);
361
0
        LOG(WARNING) << "fail to offer fetch peer data request to the work pool, pool="
362
0
                     << _heavy_work_pool.get_info();
363
0
    }
364
0
}
365
366
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_num(
367
        "file_cache_event_driven_warm_up_submitted_segment_num");
368
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_num(
369
        "file_cache_event_driven_warm_up_finished_segment_num");
370
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_num(
371
        "file_cache_event_driven_warm_up_failed_segment_num");
372
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_segment_size(
373
        "file_cache_event_driven_warm_up_submitted_segment_size");
374
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_segment_size(
375
        "file_cache_event_driven_warm_up_finished_segment_size");
376
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_segment_size(
377
        "file_cache_event_driven_warm_up_failed_segment_size");
378
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_num(
379
        "file_cache_event_driven_warm_up_submitted_index_num");
380
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_num(
381
        "file_cache_event_driven_warm_up_finished_index_num");
382
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_num(
383
        "file_cache_event_driven_warm_up_failed_index_num");
384
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_submitted_index_size(
385
        "file_cache_event_driven_warm_up_submitted_index_size");
386
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_finished_index_size(
387
        "file_cache_event_driven_warm_up_finished_index_size");
388
bvar::Adder<uint64_t> g_file_cache_event_driven_warm_up_failed_index_size(
389
        "file_cache_event_driven_warm_up_failed_index_size");
390
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_handle_unix_ts(
391
        "file_cache_warm_up_rowset_last_handle_unix_ts", 0);
392
bvar::Status<int64_t> g_file_cache_warm_up_rowset_last_finish_unix_ts(
393
        "file_cache_warm_up_rowset_last_finish_unix_ts", 0);
394
bvar::LatencyRecorder g_file_cache_warm_up_rowset_latency("file_cache_warm_up_rowset_latency");
395
bvar::LatencyRecorder g_file_cache_warm_up_rowset_request_to_handle_latency(
396
        "file_cache_warm_up_rowset_request_to_handle_latency");
397
bvar::LatencyRecorder g_file_cache_warm_up_rowset_handle_to_finish_latency(
398
        "file_cache_warm_up_rowset_handle_to_finish_latency");
399
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_slow_count(
400
        "file_cache_warm_up_rowset_slow_count");
401
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_request_to_handle_slow_count(
402
        "file_cache_warm_up_rowset_request_to_handle_slow_count");
403
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_handle_to_finish_slow_count(
404
        "file_cache_warm_up_rowset_handle_to_finish_slow_count");
405
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_num(
406
        "file_cache_warm_up_rowset_wait_for_compaction_num");
407
bvar::Adder<uint64_t> g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num(
408
        "file_cache_warm_up_rowset_wait_for_compaction_timeout_num");
409
410
void handle_segment_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
411
                                  int64_t segment_id, std::shared_ptr<CloudTablet> tablet,
412
                                  std::shared_ptr<bthread::CountdownEvent> wait, Version version,
413
0
                                  int64_t segment_size, int64_t request_ts, int64_t handle_ts) {
414
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_segment", {
415
0
        auto sleep_time = dp->param<int>("sleep", 3);
416
0
        LOG_INFO("[verbose] block download for rowset={}, version={}, sleep={}",
417
0
                 rowset_id.to_string(), version.to_string(), sleep_time);
418
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
419
0
    });
420
0
    DBUG_EXECUTE_IF(
421
0
            "CloudInternalServiceImpl::warm_up_rowset.download_segment.inject_"
422
0
            "error",
423
0
            {
424
0
                st = Status::InternalError("injected error");
425
0
                LOG_INFO("[verbose] inject error, tablet={}, rowset={}, st={}", tablet_id,
426
0
                         rowset_id.to_string(), st.to_string());
427
0
            });
428
0
    if (st.ok()) {
429
0
        g_file_cache_event_driven_warm_up_finished_segment_num << 1;
430
0
        g_file_cache_event_driven_warm_up_finished_segment_size << segment_size;
431
0
        int64_t now_ts = current_unix_time_us();
432
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
433
0
        auto rowset_latency_us = warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
434
0
        if (rowset_latency_us.has_value()) {
435
0
            g_file_cache_warm_up_rowset_latency << *rowset_latency_us;
436
0
        }
437
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
438
0
        if (rowset_latency_us.has_value() &&
439
0
            *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) {
440
0
            g_file_cache_warm_up_rowset_slow_count << 1;
441
0
            LOG(INFO) << "warm up rowset took " << *rowset_latency_us
442
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
443
0
                      << ", segment_id: " << segment_id;
444
0
        }
445
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
446
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
447
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
448
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
449
0
                      << ", segment_id: " << segment_id;
450
0
        }
451
0
    } else {
452
0
        g_file_cache_event_driven_warm_up_failed_segment_num << 1;
453
0
        g_file_cache_event_driven_warm_up_failed_segment_size << segment_size;
454
0
        LOG(WARNING) << "download segment failed, tablet_id: " << tablet_id
455
0
                     << " rowset_id: " << rowset_id.to_string() << ", error: " << st;
456
0
    }
457
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 1,
458
0
                                               0)
459
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
460
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
461
0
                   << ") completed";
462
0
    }
463
0
    if (wait) {
464
0
        wait->signal();
465
0
    }
466
0
}
467
468
void handle_inverted_index_download_done(Status st, int64_t tablet_id, const RowsetId& rowset_id,
469
                                         int64_t segment_id, std::string index_path,
470
                                         std::shared_ptr<CloudTablet> tablet,
471
                                         std::shared_ptr<bthread::CountdownEvent> wait,
472
                                         Version version, uint64_t idx_size, int64_t request_ts,
473
0
                                         int64_t handle_ts) {
474
0
    DBUG_EXECUTE_IF("CloudInternalServiceImpl::warm_up_rowset.download_inverted_idx", {
475
0
        auto sleep_time = dp->param<int>("sleep", 3);
476
0
        LOG_INFO(
477
0
                "[verbose] block download for rowset={}, inverted index "
478
0
                "file={}, sleep={}",
479
0
                rowset_id.to_string(), index_path, sleep_time);
480
0
        std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
481
0
    });
482
0
    if (st.ok()) {
483
0
        g_file_cache_event_driven_warm_up_finished_index_num << 1;
484
0
        g_file_cache_event_driven_warm_up_finished_index_size << idx_size;
485
0
        int64_t now_ts = current_unix_time_us();
486
0
        g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
487
0
        auto rowset_latency_us = warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
488
0
        if (rowset_latency_us.has_value()) {
489
0
            g_file_cache_warm_up_rowset_latency << *rowset_latency_us;
490
0
        }
491
0
        g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - handle_ts);
492
0
        if (rowset_latency_us.has_value() &&
493
0
            *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) {
494
0
            g_file_cache_warm_up_rowset_slow_count << 1;
495
0
            LOG(INFO) << "warm up rowset took " << *rowset_latency_us
496
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
497
0
                      << ", segment_id: " << segment_id;
498
0
        }
499
0
        if (now_ts - handle_ts > config::warm_up_rowset_slow_log_ms * 1000) {
500
0
            g_file_cache_warm_up_rowset_handle_to_finish_slow_count << 1;
501
0
            LOG(INFO) << "warm up rowset (handle to finish) took " << now_ts - handle_ts
502
0
                      << " us, tablet_id: " << tablet_id << ", rowset_id: " << rowset_id.to_string()
503
0
                      << ", segment_id: " << segment_id;
504
0
        }
505
0
    } else {
506
0
        g_file_cache_event_driven_warm_up_failed_index_num << 1;
507
0
        g_file_cache_event_driven_warm_up_failed_index_size << idx_size;
508
0
        LOG(WARNING) << "download inverted index failed, tablet_id: " << tablet_id
509
0
                     << " rowset_id: " << rowset_id << ", error: " << st;
510
0
    }
511
0
    if (tablet->complete_rowset_segment_warmup(WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, st, 0,
512
0
                                               1)
513
0
                .trigger_source == WarmUpTriggerSource::EVENT_DRIVEN) {
514
0
        VLOG_DEBUG << "warmup rowset " << version.to_string() << "(" << rowset_id.to_string()
515
0
                   << ") completed";
516
0
    }
517
0
    if (wait) {
518
0
        wait->signal();
519
0
    }
520
0
}
521
522
void CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* controller
523
                                              [[maybe_unused]],
524
                                              const PWarmUpRowsetRequest* request,
525
                                              PWarmUpRowsetResponse* response,
526
0
                                              google::protobuf::Closure* done) {
527
0
    brpc::ClosureGuard closure_guard(done);
528
0
    std::shared_ptr<bthread::CountdownEvent> wait = nullptr;
529
0
    timespec due_time;
530
0
    if (request->has_sync_wait_timeout_ms() && request->sync_wait_timeout_ms() > 0) {
531
0
        g_file_cache_warm_up_rowset_wait_for_compaction_num << 1;
532
0
        wait = std::make_shared<bthread::CountdownEvent>(0);
533
0
        VLOG_DEBUG << "sync_wait_timeout: " << request->sync_wait_timeout_ms() << " ms";
534
0
        due_time = butil::milliseconds_from_now(request->sync_wait_timeout_ms());
535
0
    }
536
537
0
    for (auto& rs_meta_pb : request->rowset_metas()) {
538
0
        RowsetMeta rs_meta;
539
0
        rs_meta.init_from_pb(rs_meta_pb);
540
0
        auto storage_resource = rs_meta.remote_storage_resource();
541
0
        if (!storage_resource) {
542
0
            LOG(WARNING) << storage_resource.error();
543
0
            continue;
544
0
        }
545
0
        int64_t tablet_id = rs_meta.tablet_id();
546
0
        auto rowset_id = rs_meta.rowset_id();
547
0
        bool local_only = !(request->has_skip_existence_check() && request->skip_existence_check());
548
0
        auto res = _engine.tablet_mgr().get_tablet(tablet_id, /* warmup_data = */ false,
549
0
                                                   /* sync_delete_bitmap = */ true,
550
0
                                                   /* sync_stats = */ nullptr,
551
0
                                                   /* local_only = */ local_only);
552
0
        if (!res.has_value()) {
553
0
            LOG_WARNING("Warm up error ").tag("tablet_id", tablet_id).error(res.error());
554
0
            if (res.error().msg().find("local_only=true") != std::string::npos ||
555
0
                res.error().msg().find("force_use_only_cached=true") != std::string::npos) {
556
0
                res.error().set_code(ErrorCode::TABLE_NOT_FOUND);
557
0
            }
558
0
            res.error().to_protobuf(response->mutable_status());
559
0
            continue;
560
0
        }
561
0
        auto tablet = res.value();
562
0
        auto tablet_meta = tablet->tablet_meta();
563
564
0
        int64_t handle_ts = current_unix_time_us();
565
0
        g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts);
566
0
        int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() : 0;
567
0
        auto request_to_handle_latency_us =
568
0
                warm_up_rowset_cross_host_latency_us(request_ts, handle_ts);
569
0
        if (request_to_handle_latency_us.has_value()) {
570
0
            g_file_cache_warm_up_rowset_request_to_handle_latency << *request_to_handle_latency_us;
571
0
        }
572
0
        if (request_to_handle_latency_us.has_value() &&
573
0
            *request_to_handle_latency_us > config::warm_up_rowset_slow_log_ms * 1000) {
574
0
            g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
575
0
            LOG(INFO) << "warm up rowset (request to handle) took " << *request_to_handle_latency_us
576
0
                      << " us, tablet_id: " << rs_meta.tablet_id()
577
0
                      << ", rowset_id: " << rowset_id.to_string();
578
0
        }
579
0
        int64_t expiration_time = tablet_meta->ttl_seconds();
580
581
0
        if (!tablet->add_rowset_warmup_state(rs_meta, WarmUpTriggerSource::EVENT_DRIVEN)) {
582
0
            LOG(INFO) << "found duplicate warmup task for rowset " << rowset_id.to_string()
583
0
                      << ", skip it";
584
0
            continue;
585
0
        }
586
587
0
        for (int64_t segment_id = 0; segment_id < rs_meta.num_segments(); segment_id++) {
588
0
            if (!config::file_cache_enable_only_warm_up_idx) {
589
0
                auto segment_size = rs_meta.segment_file_size(segment_id);
590
591
                // Use rs_meta.fs() instead of storage_resource.value()->fs to support packed files.
592
                // PackedFileSystem wrapper in rs_meta.fs() handles the index_map lookup and
593
                // reads from the correct packed file.
594
0
                io::DownloadFileMeta download_meta {
595
0
                        .path = storage_resource.value()->remote_segment_path(rs_meta, segment_id),
596
0
                        .file_size = segment_size,
597
0
                        .offset = 0,
598
0
                        .download_size = segment_size,
599
0
                        .file_system = rs_meta.fs(),
600
0
                        .ctx = {.is_index_data = false,
601
0
                                .expiration_time = expiration_time,
602
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
603
0
                                .is_warmup = true},
604
0
                        .download_done =
605
0
                                [=, version = rs_meta.version()](Status st) {
606
0
                                    handle_segment_download_done(
607
0
                                            st, tablet_id, rowset_id, segment_id, tablet, wait,
608
0
                                            version, segment_size, request_ts, handle_ts);
609
0
                                },
610
0
                        .tablet_id = tablet_id};
611
612
0
                g_file_cache_event_driven_warm_up_submitted_segment_num << 1;
613
0
                g_file_cache_event_driven_warm_up_submitted_segment_size << segment_size;
614
0
                if (wait) {
615
0
                    wait->add_count();
616
0
                }
617
618
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
619
0
            }
620
621
            // Use rs_meta.fs() to support packed files for inverted index download.
622
0
            auto download_inverted_index = [&, tablet](std::string index_path, uint64_t idx_size) {
623
0
                io::DownloadFileMeta download_meta {
624
0
                        .path = io::Path(index_path),
625
0
                        .file_size = static_cast<int64_t>(idx_size),
626
0
                        .file_system = rs_meta.fs(),
627
0
                        .ctx = {.is_index_data = false, // DORIS-20877
628
0
                                .expiration_time = expiration_time,
629
0
                                .is_dryrun = config::enable_reader_dryrun_when_download_file_cache,
630
0
                                .is_warmup = true},
631
0
                        .download_done =
632
0
                                [=, version = rs_meta.version()](Status st) {
633
0
                                    handle_inverted_index_download_done(
634
0
                                            st, tablet_id, rowset_id, segment_id, index_path,
635
0
                                            tablet, wait, version, idx_size, request_ts, handle_ts);
636
0
                                },
637
0
                        .tablet_id = tablet_id};
638
0
                g_file_cache_event_driven_warm_up_submitted_index_num << 1;
639
0
                g_file_cache_event_driven_warm_up_submitted_index_size << idx_size;
640
0
                tablet->update_rowset_warmup_state_inverted_idx_num(
641
0
                        WarmUpTriggerSource::EVENT_DRIVEN, rowset_id, 1);
642
0
                if (wait) {
643
0
                    wait->add_count();
644
0
                }
645
0
                _engine.file_cache_block_downloader().submit_download_task(download_meta);
646
0
            };
647
648
            // inverted index
649
0
            auto schema_ptr = rs_meta.tablet_schema();
650
0
            auto idx_version = schema_ptr->get_inverted_index_storage_format();
651
652
0
            if (schema_ptr->has_inverted_index() || schema_ptr->has_ann_index()) {
653
0
                if (idx_version == InvertedIndexStorageFormatPB::V1) {
654
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
655
0
                    std::unordered_map<int64_t, int64_t> index_size_map;
656
0
                    for (const auto& info : inverted_index_info.index_info()) {
657
0
                        if (info.index_file_size() != -1) {
658
0
                            index_size_map[info.index_id()] = info.index_file_size();
659
0
                        } else {
660
0
                            VLOG_DEBUG << "Invalid index_file_size for segment_id " << segment_id
661
0
                                       << ", index_id " << info.index_id();
662
0
                        }
663
0
                    }
664
0
                    for (const auto& index : schema_ptr->inverted_indexes()) {
665
0
                        auto idx_path = storage_resource.value()->remote_idx_v1_path(
666
0
                                rs_meta, segment_id, index->index_id(), index->get_index_suffix());
667
0
                        download_inverted_index(idx_path, index_size_map[index->index_id()]);
668
0
                    }
669
0
                } else { // InvertedIndexStorageFormatPB::V2
670
0
                    auto&& inverted_index_info = rs_meta.inverted_index_file_info(segment_id);
671
0
                    int64_t idx_size = 0;
672
0
                    if (inverted_index_info.has_index_size()) {
673
0
                        idx_size = inverted_index_info.index_size();
674
0
                    } else {
675
0
                        VLOG_DEBUG << "index_size is not set for segment " << segment_id;
676
0
                    }
677
0
                    auto idx_path =
678
0
                            storage_resource.value()->remote_idx_v2_path(rs_meta, segment_id);
679
0
                    download_inverted_index(idx_path, idx_size);
680
0
                }
681
0
            }
682
0
        }
683
0
    }
684
0
    if (wait && wait->timed_wait(due_time)) {
685
0
        g_file_cache_warm_up_rowset_wait_for_compaction_timeout_num << 1;
686
0
        LOG_WARNING("the time spent warming up {} rowsets exceeded {} ms",
687
0
                    request->rowset_metas().size(), request->sync_wait_timeout_ms());
688
0
    }
689
0
}
690
691
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_segment_num(
692
        "file_cache_recycle_cache_finished_segment_num");
693
bvar::Adder<uint64_t> g_file_cache_recycle_cache_finished_index_num(
694
        "file_cache_recycle_cache_finished_index_num");
695
696
void CloudInternalServiceImpl::recycle_cache(google::protobuf::RpcController* controller
697
                                             [[maybe_unused]],
698
                                             const PRecycleCacheRequest* request,
699
                                             PRecycleCacheResponse* response,
700
0
                                             google::protobuf::Closure* done) {
701
0
    brpc::ClosureGuard closure_guard(done);
702
703
0
    if (!config::enable_file_cache) {
704
0
        return;
705
0
    }
706
0
    for (const auto& meta : request->cache_metas()) {
707
0
        for (int64_t segment_id = 0; segment_id < meta.num_segments(); segment_id++) {
708
0
            auto file_key = Segment::file_cache_key(meta.rowset_id(), segment_id);
709
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
710
0
            file_cache->remove_if_cached_async(file_key);
711
0
            g_file_cache_recycle_cache_finished_segment_num << 1;
712
0
        }
713
714
        // inverted index
715
0
        for (const auto& file_name : meta.index_file_names()) {
716
0
            auto file_key = io::BlockFileCache::hash(file_name);
717
0
            auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key);
718
0
            file_cache->remove_if_cached_async(file_key);
719
0
            g_file_cache_recycle_cache_finished_index_num << 1;
720
0
        }
721
0
    }
722
0
}
723
724
#include "common/compile_check_avoid_end.h"
725
} // namespace doris