Coverage Report

Created: 2024-11-20 21:05

/root/doris/be/src/olap/data_dir.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/Types_types.h>
21
#include <stddef.h>
22
23
#include <atomic>
24
#include <condition_variable>
25
#include <cstdint>
26
#include <memory>
27
#include <mutex>
28
#include <set>
29
#include <shared_mutex>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "olap/olap_common.h"
35
#include "util/metrics.h"
36
37
namespace doris {
38
39
class Tablet;
40
class TabletManager;
41
class TxnManager;
42
class OlapMeta;
43
class RowsetIdGenerator;
44
class StorageEngine;
45
46
const char* const kTestFilePath = ".testfile";
47
48
// A DataDir used to manage data in same path.
49
// Now, After DataDir was created, it will never be deleted for easy implementation.
50
class DataDir {
51
public:
52
    DataDir(StorageEngine& engine, const std::string& path, int64_t capacity_bytes = -1,
53
            TStorageMedium::type storage_medium = TStorageMedium::HDD);
54
    ~DataDir();
55
56
    Status init(bool init_meta = true);
57
    void stop_bg_worker();
58
59
378
    const std::string& path() const { return _path; }
60
110
    size_t path_hash() const { return _path_hash; }
61
62
288
    bool is_used() const { return _is_used; }
63
41
    int32_t cluster_id() const { return _cluster_id; }
64
41
    bool cluster_id_incomplete() const { return _cluster_id_incomplete; }
65
66
0
    DataDirInfo get_dir_info() {
67
0
        DataDirInfo info;
68
0
        info.path = _path;
69
0
        info.path_hash = _path_hash;
70
0
        info.disk_capacity = _disk_capacity_bytes;
71
0
        info.available = _available_bytes;
72
0
        info.trash_used_capacity = _trash_used_bytes;
73
0
        info.is_used = _is_used;
74
0
        info.storage_medium = _storage_medium;
75
0
        return info;
76
0
    }
77
78
    // save a cluster_id file under data path to prevent
79
    // invalid be config for example two be use the same
80
    // data path
81
    Status set_cluster_id(int32_t cluster_id);
82
    void health_check();
83
84
93
    uint64_t get_shard() {
85
93
        return _current_shard.fetch_add(1, std::memory_order_relaxed) % MAX_SHARD_NUM;
86
93
    }
87
88
2.71k
    OlapMeta* get_meta() { return _meta; }
89
90
0
    bool is_ssd_disk() const { return _storage_medium == TStorageMedium::SSD; }
91
92
41
    TStorageMedium::type storage_medium() const { return _storage_medium; }
93
94
    void register_tablet(Tablet* tablet);
95
    void deregister_tablet(Tablet* tablet);
96
    void clear_tablets(std::vector<TabletInfo>* tablet_infos);
97
98
    std::string get_absolute_shard_path(int64_t shard_id);
99
    std::string get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, int32_t schema_hash);
100
101
    void find_tablet_in_trash(int64_t tablet_id, std::vector<std::string>* paths);
102
103
    static std::string get_root_path_from_schema_hash_path_in_trash(
104
            const std::string& schema_hash_dir_in_trash);
105
106
    // load data from meta and data files
107
    Status load();
108
109
    void perform_path_gc();
110
111
    void perform_remote_rowset_gc();
112
113
    void perform_remote_tablet_gc();
114
115
    // check if the capacity reach the limit after adding the incoming data
116
    // return true if limit reached, otherwise, return false.
117
    // TODO(cmy): for now we can not precisely calculate the capacity Doris used,
118
    // so in order to avoid running out of disk capacity, we currently use the actual
119
    // disk available capacity and total capacity to do the calculation.
120
    // So that the capacity Doris actually used may exceeds the user specified capacity.
121
    bool reach_capacity_limit(int64_t incoming_data_size);
122
123
    Status update_capacity();
124
125
    void update_trash_capacity();
126
127
    void update_local_data_size(int64_t size);
128
129
    void update_remote_data_size(int64_t size);
130
131
    size_t tablet_size() const;
132
133
    void disks_compaction_score_increment(int64_t delta);
134
135
    void disks_compaction_num_increment(int64_t delta);
136
137
320
    double get_usage(int64_t incoming_data_size) const {
138
320
        return _disk_capacity_bytes == 0
139
320
                       ? 0
140
320
                       : (_disk_capacity_bytes - _available_bytes + incoming_data_size) /
141
320
                                 (double)_disk_capacity_bytes;
142
320
    }
143
144
    // Move tablet to trash.
145
    Status move_to_trash(const std::string& tablet_path);
146
147
    static Status delete_tablet_parent_path_if_empty(const std::string& tablet_path);
148
149
private:
150
    Status _init_cluster_id();
151
    Status _init_capacity_and_create_shards();
152
    Status _init_meta();
153
154
    Status _check_disk();
155
    Status _read_and_write_test_file();
156
    // Check whether has old format (hdr_ start) in olap. When doris updating to current version,
157
    // it may lead to data missing. When conf::storage_strict_check_incompatible_old_format is true,
158
    // process will log fatal.
159
    Status _check_incompatible_old_format_tablet();
160
161
    int _path_gc_step {0};
162
163
    void _perform_tablet_gc(const std::string& tablet_schema_hash_path, int16_t shard_name);
164
165
    void _perform_rowset_gc(const std::string& tablet_schema_hash_path);
166
167
private:
168
    std::atomic<bool> _stop_bg_worker = false;
169
170
    StorageEngine& _engine;
171
    std::string _path;
172
    size_t _path_hash;
173
174
    // the actual available capacity of the disk of this data dir
175
    size_t _available_bytes;
176
    // the actual capacity of the disk of this data dir
177
    size_t _disk_capacity_bytes;
178
    size_t _trash_used_bytes;
179
    TStorageMedium::type _storage_medium;
180
    bool _is_used;
181
182
    int32_t _cluster_id;
183
    bool _cluster_id_incomplete = false;
184
    // This flag will be set true if this store was not in root path when reloading
185
    bool _to_be_deleted;
186
187
    static constexpr uint64_t MAX_SHARD_NUM = 1024;
188
    std::atomic<uint64_t> _current_shard {0};
189
    // used to protect and _tablet_set
190
    mutable std::mutex _mutex;
191
    std::set<TabletInfo> _tablet_set;
192
193
    OlapMeta* _meta = nullptr;
194
195
    std::shared_ptr<MetricEntity> _data_dir_metric_entity;
196
    IntGauge* disks_total_capacity = nullptr;
197
    IntGauge* disks_avail_capacity = nullptr;
198
    IntGauge* disks_local_used_capacity = nullptr;
199
    IntGauge* disks_remote_used_capacity = nullptr;
200
    IntGauge* disks_trash_used_capacity = nullptr;
201
    IntGauge* disks_state = nullptr;
202
    IntGauge* disks_compaction_score = nullptr;
203
    IntGauge* disks_compaction_num = nullptr;
204
};
205
206
} // namespace doris