Coverage Report

Created: 2025-05-22 09:45

/root/doris/cloud/src/recycler/checker.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#if defined(USE_LIBCPP) && _LIBCPP_ABI_VERSION <= 1
21
#define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE
22
#endif
23
#include <atomic>
24
#include <condition_variable>
25
#include <deque>
26
#include <functional>
27
#include <thread>
28
#include <unordered_map>
29
#include <unordered_set>
30
31
#include "recycler/storage_vault_accessor.h"
32
#include "recycler/white_black_list.h"
33
34
namespace doris {
35
class RowsetMetaCloudPB;
36
} // namespace doris
37
38
namespace doris::cloud {
39
class StorageVaultAccessor;
40
class InstanceChecker;
41
class TxnKv;
42
class InstanceInfoPB;
43
44
class Checker {
45
public:
46
    explicit Checker(std::shared_ptr<TxnKv> txn_kv);
47
    ~Checker();
48
49
    int start();
50
51
    void stop();
52
326
    bool stopped() const { return stopped_.load(std::memory_order_acquire); }
53
54
private:
55
    void lease_check_jobs();
56
    void inspect_instance_check_interval();
57
    void do_inspect(const InstanceInfoPB& instance);
58
59
private:
60
    friend class RecyclerServiceImpl;
61
62
    std::shared_ptr<TxnKv> txn_kv_;
63
    std::atomic_bool stopped_ {false};
64
    std::string ip_port_;
65
    std::vector<std::thread> workers_;
66
67
    std::mutex mtx_;
68
    // notify check workers
69
    std::condition_variable pending_instance_cond_;
70
    std::deque<InstanceInfoPB> pending_instance_queue_;
71
    // instance_id -> enqueue_timestamp
72
    std::unordered_map<std::string, long> pending_instance_map_;
73
    std::unordered_map<std::string, std::shared_ptr<InstanceChecker>> working_instance_map_;
74
    // notify instance scanner and lease thread
75
    std::condition_variable notifier_;
76
77
    WhiteBlackList instance_filter_;
78
};
79
80
class InstanceChecker {
81
public:
82
    explicit InstanceChecker(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id);
83
    // Return 0 if success, otherwise error
84
    int init(const InstanceInfoPB& instance);
85
    // Return 0 if success.
86
    // Return 1 if data leak is identified.
87
    // Return negative if a temporary error occurred during the check process.
88
    int do_inverted_check();
89
90
    // Return 0 if success.
91
    // Return 1 if data loss is identified.
92
    // Return negative if a temporary error occurred during the check process.
93
    int do_check();
94
95
    // Return 0 if success.
96
    // Return 1 if delete bitmap leak is identified.
97
    // Return negative if a temporary error occurred during the check process.
98
    int do_delete_bitmap_inverted_check();
99
100
    // checks if https://github.com/apache/doris/pull/40204 works as expected
101
    // the stale delete bitmap will be cleared in MS when BE delete expired stale rowsets
102
    // NOTE: stale rowsets will be lost after BE restarts, so there may be some stale delete bitmaps
103
    // which will not be cleared.
104
    int do_delete_bitmap_storage_optimize_check();
105
106
    int do_mow_compaction_key_check();
107
108
    // If there are multiple buckets, return the minimum lifecycle; if there are no buckets (i.e.
109
    // all accessors are HdfsAccessor), return INT64_MAX.
110
    // Return 0 if success, otherwise error
111
    int get_bucket_lifecycle(int64_t* lifecycle_days);
112
0
    void stop() { stopped_.store(true, std::memory_order_release); }
113
5.10k
    bool stopped() const { return stopped_.load(std::memory_order_acquire); }
114
115
private:
116
    // returns 0 for success otherwise error
117
    int init_obj_store_accessors(const InstanceInfoPB& instance);
118
119
    // returns 0 for success otherwise error
120
    int init_storage_vault_accessors(const InstanceInfoPB& instance);
121
122
    int traverse_mow_tablet(const std::function<int(int64_t)>& check_func);
123
    int traverse_rowset_delete_bitmaps(
124
            int64_t tablet_id, std::string rowset_id,
125
            const std::function<int(int64_t, std::string_view, int64_t, int64_t)>& callback);
126
    int collect_tablet_rowsets(
127
            int64_t tablet_id,
128
            const std::function<void(const doris::RowsetMetaCloudPB&)>& collect_cb);
129
    int traverse_delete_bitmaps(const std::function<int(int64_t)>& check_func);
130
131
    int check_delete_bitmap_storage_optimize(int64_t tablet_id);
132
133
    std::atomic_bool stopped_ {false};
134
    std::shared_ptr<TxnKv> txn_kv_;
135
    std::string instance_id_;
136
    // id -> accessor
137
    std::unordered_map<std::string, std::shared_ptr<StorageVaultAccessor>> accessor_map_;
138
};
139
140
} // namespace doris::cloud