/root/doris/be/src/olap/data_dir.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/Types_types.h> |
21 | | #include <stddef.h> |
22 | | |
23 | | #include <atomic> |
24 | | #include <condition_variable> |
25 | | #include <cstdint> |
26 | | #include <memory> |
27 | | #include <mutex> |
28 | | #include <set> |
29 | | #include <shared_mutex> |
30 | | #include <string> |
31 | | #include <vector> |
32 | | |
33 | | #include "common/status.h" |
34 | | #include "io/fs/file_system.h" |
35 | | #include "olap/olap_common.h" |
36 | | #include "util/metrics.h" |
37 | | |
38 | | namespace doris { |
39 | | |
40 | | class Tablet; |
41 | | class TabletManager; |
42 | | class TxnManager; |
43 | | class OlapMeta; |
44 | | class RowsetIdGenerator; |
45 | | |
46 | | const char* const kTestFilePath = ".testfile"; |
47 | | |
48 | | // A DataDir used to manage data in same path. |
49 | | // Now, After DataDir was created, it will never be deleted for easy implementation. |
50 | | class DataDir { |
51 | | public: |
52 | | DataDir(const std::string& path, int64_t capacity_bytes = -1, |
53 | | TStorageMedium::type storage_medium = TStorageMedium::HDD, |
54 | | TabletManager* tablet_manager = nullptr, TxnManager* txn_manager = nullptr); |
55 | | ~DataDir(); |
56 | | |
57 | | Status init(bool init_meta = true); |
58 | | void stop_bg_worker(); |
59 | | |
60 | 425 | const std::string& path() const { return _path; } |
61 | 10 | size_t path_hash() const { return _path_hash; } |
62 | | |
63 | 36 | const io::FileSystemSPtr& fs() const { return _fs; } |
64 | | |
65 | 449 | bool is_used() const { return _is_used; } |
66 | 34 | int32_t cluster_id() const { return _cluster_id; } |
67 | 34 | bool cluster_id_incomplete() const { return _cluster_id_incomplete; } |
68 | | |
69 | 18 | DataDirInfo get_dir_info() { |
70 | 18 | DataDirInfo info; |
71 | 18 | info.path = _path; |
72 | 18 | info.path_hash = _path_hash; |
73 | 18 | info.disk_capacity = _disk_capacity_bytes; |
74 | 18 | info.available = _available_bytes; |
75 | 18 | info.trash_used_capacity = _trash_used_bytes; |
76 | 18 | info.is_used = _is_used; |
77 | 18 | info.storage_medium = _storage_medium; |
78 | 18 | return info; |
79 | 18 | } |
80 | | |
81 | | // save a cluster_id file under data path to prevent |
82 | | // invalid be config for example two be use the same |
83 | | // data path |
84 | | Status set_cluster_id(int32_t cluster_id); |
85 | | void health_check(); |
86 | | |
87 | | uint64_t get_shard(); |
88 | | |
89 | 2.71k | OlapMeta* get_meta() { return _meta; } |
90 | | |
91 | 41 | bool is_ssd_disk() const { return _storage_medium == TStorageMedium::SSD; } |
92 | | |
93 | 60 | TStorageMedium::type storage_medium() const { return _storage_medium; } |
94 | | |
95 | | void register_tablet(Tablet* tablet); |
96 | | void deregister_tablet(Tablet* tablet); |
97 | | void clear_tablets(std::vector<TabletInfo>* tablet_infos); |
98 | | |
99 | | std::string get_absolute_shard_path(int64_t shard_id); |
100 | | std::string get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, int32_t schema_hash); |
101 | | |
102 | | void find_tablet_in_trash(int64_t tablet_id, std::vector<std::string>* paths); |
103 | | |
104 | | static std::string get_root_path_from_schema_hash_path_in_trash( |
105 | | const std::string& schema_hash_dir_in_trash); |
106 | | |
107 | | // load data from meta and data files |
108 | | Status load(); |
109 | | |
110 | | void perform_path_gc(); |
111 | | |
112 | | void perform_remote_rowset_gc(); |
113 | | |
114 | | void perform_remote_tablet_gc(); |
115 | | |
116 | | // check if the capacity reach the limit after adding the incoming data |
117 | | // return true if limit reached, otherwise, return false. |
118 | | // TODO(cmy): for now we can not precisely calculate the capacity Doris used, |
119 | | // so in order to avoid running out of disk capacity, we currently use the actual |
120 | | // disk available capacity and total capacity to do the calculation. |
121 | | // So that the capacity Doris actually used may exceeds the user specified capacity. |
122 | | bool reach_capacity_limit(int64_t incoming_data_size); |
123 | | |
124 | | Status update_capacity(); |
125 | | |
126 | | void update_trash_capacity(); |
127 | | |
128 | | void update_local_data_size(int64_t size); |
129 | | |
130 | | void update_remote_data_size(int64_t size); |
131 | | |
132 | | size_t tablet_size() const; |
133 | | |
134 | | void disks_compaction_score_increment(int64_t delta); |
135 | | |
136 | | void disks_compaction_num_increment(int64_t delta); |
137 | | |
138 | 469 | double get_usage(int64_t incoming_data_size) const { |
139 | 469 | return _disk_capacity_bytes == 0 |
140 | 469 | ? 0 |
141 | 469 | : (_disk_capacity_bytes - _available_bytes + incoming_data_size) / |
142 | 469 | (double)_disk_capacity_bytes; |
143 | 469 | } |
144 | | |
145 | | // Move tablet to trash. |
146 | | Status move_to_trash(const std::string& tablet_path); |
147 | | |
148 | | static Status delete_tablet_parent_path_if_empty(const std::string& tablet_path); |
149 | | |
150 | | private: |
151 | | Status _init_cluster_id(); |
152 | | Status _init_capacity_and_create_shards(); |
153 | | Status _init_meta(); |
154 | | |
155 | | Status _check_disk(); |
156 | | Status _read_and_write_test_file(); |
157 | | Status read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id); |
158 | | Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id); |
159 | | // Check whether has old format (hdr_ start) in olap. When doris updating to current version, |
160 | | // it may lead to data missing. When conf::storage_strict_check_incompatible_old_format is true, |
161 | | // process will log fatal. |
162 | | Status _check_incompatible_old_format_tablet(); |
163 | | |
164 | | int _path_gc_step {0}; |
165 | | |
166 | | void _perform_tablet_gc(const std::string& tablet_schema_hash_path, int16_t shard_name); |
167 | | |
168 | | void _perform_rowset_gc(const std::string& tablet_schema_hash_path); |
169 | | |
170 | | private: |
171 | | std::atomic<bool> _stop_bg_worker = false; |
172 | | |
173 | | std::string _path; |
174 | | size_t _path_hash; |
175 | | |
176 | | io::FileSystemSPtr _fs; |
177 | | // the actual available capacity of the disk of this data dir |
178 | | size_t _available_bytes; |
179 | | // the actual capacity of the disk of this data dir |
180 | | size_t _disk_capacity_bytes; |
181 | | size_t _trash_used_bytes; |
182 | | TStorageMedium::type _storage_medium; |
183 | | bool _is_used; |
184 | | |
185 | | TabletManager* _tablet_manager = nullptr; |
186 | | TxnManager* _txn_manager = nullptr; |
187 | | int32_t _cluster_id; |
188 | | bool _cluster_id_incomplete = false; |
189 | | // This flag will be set true if this store was not in root path when reloading |
190 | | bool _to_be_deleted; |
191 | | |
192 | | // used to protect _current_shard and _tablet_set |
193 | | mutable std::mutex _mutex; |
194 | | uint64_t _current_shard; |
195 | | std::set<TabletInfo> _tablet_set; |
196 | | |
197 | | static const uint32_t MAX_SHARD_NUM = 1024; |
198 | | |
199 | | OlapMeta* _meta = nullptr; |
200 | | |
201 | | std::shared_ptr<MetricEntity> _data_dir_metric_entity; |
202 | | IntGauge* disks_total_capacity = nullptr; |
203 | | IntGauge* disks_avail_capacity = nullptr; |
204 | | IntGauge* disks_local_used_capacity = nullptr; |
205 | | IntGauge* disks_remote_used_capacity = nullptr; |
206 | | IntGauge* disks_trash_used_capacity = nullptr; |
207 | | IntGauge* disks_state = nullptr; |
208 | | IntGauge* disks_compaction_score = nullptr; |
209 | | IntGauge* disks_compaction_num = nullptr; |
210 | | }; |
211 | | |
212 | | } // namespace doris |