/root/doris/cloud/src/recycler/checker.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #if defined(USE_LIBCPP) && _LIBCPP_ABI_VERSION <= 1 |
21 | | #define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE |
22 | | #endif |
23 | | #include <atomic> |
24 | | #include <condition_variable> |
25 | | #include <deque> |
26 | | #include <functional> |
27 | | #include <thread> |
28 | | #include <unordered_map> |
29 | | #include <unordered_set> |
30 | | |
31 | | #include "recycler/storage_vault_accessor.h" |
32 | | #include "recycler/white_black_list.h" |
33 | | |
34 | | namespace doris { |
35 | | class RowsetMetaCloudPB; |
36 | | } // namespace doris |
37 | | |
38 | | namespace doris::cloud { |
39 | | class StorageVaultAccessor; |
40 | | class InstanceChecker; |
41 | | class TxnKv; |
42 | | class InstanceInfoPB; |
43 | | |
44 | | class Checker { |
45 | | public: |
46 | | explicit Checker(std::shared_ptr<TxnKv> txn_kv); |
47 | | ~Checker(); |
48 | | |
49 | | int start(); |
50 | | |
51 | | void stop(); |
52 | 319 | bool stopped() const { return stopped_.load(std::memory_order_acquire); } |
53 | | |
54 | | private: |
55 | | void lease_check_jobs(); |
56 | | void inspect_instance_check_interval(); |
57 | | void do_inspect(const InstanceInfoPB& instance); |
58 | | |
59 | | private: |
60 | | friend class RecyclerServiceImpl; |
61 | | |
62 | | std::shared_ptr<TxnKv> txn_kv_; |
63 | | std::atomic_bool stopped_ {false}; |
64 | | std::string ip_port_; |
65 | | std::vector<std::thread> workers_; |
66 | | |
67 | | std::mutex mtx_; |
68 | | // notify check workers |
69 | | std::condition_variable pending_instance_cond_; |
70 | | std::deque<InstanceInfoPB> pending_instance_queue_; |
71 | | // instance_id -> enqueue_timestamp |
72 | | std::unordered_map<std::string, long> pending_instance_map_; |
73 | | std::unordered_map<std::string, std::shared_ptr<InstanceChecker>> working_instance_map_; |
74 | | // notify instance scanner and lease thread |
75 | | std::condition_variable notifier_; |
76 | | |
77 | | WhiteBlackList instance_filter_; |
78 | | }; |
79 | | |
80 | | class InstanceChecker { |
81 | | public: |
82 | | explicit InstanceChecker(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id); |
83 | | // Return 0 if success, otherwise error |
84 | | int init(const InstanceInfoPB& instance); |
85 | | // Return 0 if success. |
86 | | // Return 1 if data leak is identified. |
87 | | // Return negative if a temporary error occurred during the check process. |
88 | | int do_inverted_check(); |
89 | | |
90 | | // Return 0 if success. |
91 | | // Return 1 if data loss is identified. |
92 | | // Return negative if a temporary error occurred during the check process. |
93 | | int do_check(); |
94 | | |
95 | | // Return 0 if success. |
96 | | // Return 1 if delete bitmap leak is identified. |
97 | | // Return negative if a temporary error occurred during the check process. |
98 | | int do_delete_bitmap_inverted_check(); |
99 | | |
100 | | // checks if https://github.com/apache/doris/pull/40204 works as expected |
101 | | // the stale delete bitmap will be cleared in MS when BE delete expired stale rowsets |
102 | | // NOTE: stale rowsets will be lost after BE restarts, so there may be some stale delete bitmaps |
103 | | // which will not be cleared. |
104 | | int do_delete_bitmap_storage_optimize_check(); |
105 | | |
106 | | int do_mow_compaction_key_check(); |
107 | | |
108 | | // If there are multiple buckets, return the minimum lifecycle; if there are no buckets (i.e. |
109 | | // all accessors are HdfsAccessor), return INT64_MAX. |
110 | | // Return 0 if success, otherwise error |
111 | | int get_bucket_lifecycle(int64_t* lifecycle_days); |
112 | 0 | void stop() { stopped_.store(true, std::memory_order_release); } |
113 | 5.10k | bool stopped() const { return stopped_.load(std::memory_order_acquire); } |
114 | | |
115 | | private: |
116 | | // returns 0 for success otherwise error |
117 | | int init_obj_store_accessors(const InstanceInfoPB& instance); |
118 | | |
119 | | // returns 0 for success otherwise error |
120 | | int init_storage_vault_accessors(const InstanceInfoPB& instance); |
121 | | |
122 | | int traverse_mow_tablet(const std::function<int(int64_t)>& check_func); |
123 | | int traverse_rowset_delete_bitmaps( |
124 | | int64_t tablet_id, std::string rowset_id, |
125 | | const std::function<int(int64_t, std::string_view, int64_t, int64_t)>& callback); |
126 | | int collect_tablet_rowsets( |
127 | | int64_t tablet_id, |
128 | | const std::function<void(const doris::RowsetMetaCloudPB&)>& collect_cb); |
129 | | int traverse_delete_bitmaps(const std::function<int(int64_t)>& check_func); |
130 | | |
131 | | int check_delete_bitmap_storage_optimize(int64_t tablet_id); |
132 | | |
133 | | std::atomic_bool stopped_ {false}; |
134 | | std::shared_ptr<TxnKv> txn_kv_; |
135 | | std::string instance_id_; |
136 | | // id -> accessor |
137 | | std::unordered_map<std::string, std::shared_ptr<StorageVaultAccessor>> accessor_map_; |
138 | | }; |
139 | | |
140 | | } // namespace doris::cloud |