Coverage Report

Created: 2025-03-10 19:30

/root/doris/be/src/util/s3_util.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/s3_util.h"
19
20
#include <aws/core/auth/AWSAuthSigner.h>
21
#include <aws/core/auth/AWSCredentials.h>
22
#include <aws/core/auth/AWSCredentialsProviderChain.h>
23
#include <aws/core/client/DefaultRetryStrategy.h>
24
#include <aws/core/utils/logging/LogLevel.h>
25
#include <aws/core/utils/logging/LogSystemInterface.h>
26
#include <aws/core/utils/memory/stl/AWSStringStream.h>
27
#include <aws/s3/S3Client.h>
28
#include <bvar/reducer.h>
29
#include <util/string_util.h>
30
31
#include <atomic>
32
#include <cstdlib>
33
#include <filesystem>
34
#include <functional>
35
#include <memory>
36
#include <ostream>
37
#include <utility>
38
39
#include "common/config.h"
40
#include "common/logging.h"
41
#include "runtime/exec_env.h"
42
#include "s3_uri.h"
43
#include "vec/exec/scan/scanner_scheduler.h"
44
45
namespace doris {
46
47
namespace s3_bvar {
48
bvar::LatencyRecorder s3_get_latency("s3_get");
49
bvar::LatencyRecorder s3_put_latency("s3_put");
50
bvar::LatencyRecorder s3_delete_latency("s3_delete");
51
bvar::LatencyRecorder s3_head_latency("s3_head");
52
bvar::LatencyRecorder s3_multi_part_upload_latency("s3_multi_part_upload");
53
bvar::LatencyRecorder s3_list_latency("s3_list");
54
bvar::LatencyRecorder s3_list_object_versions_latency("s3_list_object_versions");
55
bvar::LatencyRecorder s3_get_bucket_version_latency("s3_get_bucket_version");
56
bvar::LatencyRecorder s3_copy_object_latency("s3_copy_object");
57
}; // namespace s3_bvar
58
59
class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface {
60
public:
61
0
    DorisAWSLogger() : _log_level(Aws::Utils::Logging::LogLevel::Info) {}
62
0
    DorisAWSLogger(Aws::Utils::Logging::LogLevel log_level) : _log_level(log_level) {}
63
0
    ~DorisAWSLogger() final = default;
64
0
    Aws::Utils::Logging::LogLevel GetLogLevel() const final { return _log_level; }
65
    void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* format_str,
66
0
             ...) final {
67
0
        _log_impl(log_level, tag, format_str);
68
0
    }
69
    void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag,
70
0
                   const Aws::OStringStream& message_stream) final {
71
0
        _log_impl(log_level, tag, message_stream.str().c_str());
72
0
    }
73
74
0
    void Flush() final {}
75
76
private:
77
0
    void _log_impl(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* message) {
78
0
        switch (log_level) {
79
0
        case Aws::Utils::Logging::LogLevel::Off:
80
0
            break;
81
0
        case Aws::Utils::Logging::LogLevel::Fatal:
82
0
            LOG(FATAL) << "[" << tag << "] " << message;
83
0
            break;
84
0
        case Aws::Utils::Logging::LogLevel::Error:
85
0
            LOG(ERROR) << "[" << tag << "] " << message;
86
0
            break;
87
0
        case Aws::Utils::Logging::LogLevel::Warn:
88
0
            LOG(WARNING) << "[" << tag << "] " << message;
89
0
            break;
90
0
        case Aws::Utils::Logging::LogLevel::Info:
91
0
            LOG(INFO) << "[" << tag << "] " << message;
92
0
            break;
93
0
        case Aws::Utils::Logging::LogLevel::Debug:
94
0
            LOG(INFO) << "[" << tag << "] " << message;
95
0
            break;
96
0
        case Aws::Utils::Logging::LogLevel::Trace:
97
0
            LOG(INFO) << "[" << tag << "] " << message;
98
0
            break;
99
0
        default:
100
0
            break;
101
0
        }
102
0
    }
103
104
    std::atomic<Aws::Utils::Logging::LogLevel> _log_level;
105
};
106
107
const static std::string USE_PATH_STYLE = "use_path_style";
108
109
1
S3ClientFactory::S3ClientFactory() {
110
1
    _aws_options = Aws::SDKOptions {};
111
1
    Aws::Utils::Logging::LogLevel logLevel =
112
1
            static_cast<Aws::Utils::Logging::LogLevel>(config::aws_log_level);
113
1
    _aws_options.loggingOptions.logLevel = logLevel;
114
1
    _aws_options.loggingOptions.logger_create_fn = [logLevel] {
115
0
        return std::make_shared<DorisAWSLogger>(logLevel);
116
0
    };
117
1
    Aws::InitAPI(_aws_options);
118
1
    _ca_cert_file_path = get_valid_ca_cert_path();
119
1
}
120
121
2
string S3ClientFactory::get_valid_ca_cert_path() {
122
2
    vector<std::string> vec_ca_file_path = doris::split(config::ca_cert_file_paths, ";");
123
2
    vector<std::string>::iterator it = vec_ca_file_path.begin();
124
2
    for (; it != vec_ca_file_path.end(); ++it) {
125
2
        if (std::filesystem::exists(*it)) {
126
2
            return *it;
127
2
        }
128
2
    }
129
0
    return "";
130
2
}
131
132
1
S3ClientFactory::~S3ClientFactory() {
133
1
    Aws::ShutdownAPI(_aws_options);
134
1
}
135
136
1
S3ClientFactory& S3ClientFactory::instance() {
137
1
    static S3ClientFactory ret;
138
1
    return ret;
139
1
}
140
141
0
bool S3ClientFactory::is_s3_conf_valid(const std::map<std::string, std::string>& prop) {
142
0
    StringCaseMap<std::string> properties(prop.begin(), prop.end());
143
0
    if (properties.find(S3_ENDPOINT) == properties.end() ||
144
0
        properties.find(S3_REGION) == properties.end()) {
145
0
        DCHECK(false) << "aws properties is incorrect.";
146
0
        LOG(ERROR) << "aws properties is incorrect.";
147
0
        return false;
148
0
    }
149
0
    return true;
150
0
}
151
152
1
bool S3ClientFactory::is_s3_conf_valid(const S3Conf& s3_conf) {
153
1
    return !s3_conf.endpoint.empty();
154
1
}
155
156
1
std::shared_ptr<Aws::S3::S3Client> S3ClientFactory::create(const S3Conf& s3_conf) {
157
1
    if (!is_s3_conf_valid(s3_conf)) {
158
0
        return nullptr;
159
0
    }
160
161
1
    uint64_t hash = s3_conf.get_hash();
162
1
    {
163
1
        std::lock_guard l(_lock);
164
1
        auto it = _cache.find(hash);
165
1
        if (it != _cache.end()) {
166
0
            return it->second;
167
0
        }
168
1
    }
169
170
1
    Aws::Client::ClientConfiguration aws_config = S3ClientFactory::getClientConfiguration();
171
1
    aws_config.endpointOverride = s3_conf.endpoint;
172
1
    aws_config.region = s3_conf.region;
173
1
    std::string ca_cert = get_valid_ca_cert_path();
174
1
    if ("" != _ca_cert_file_path) {
175
1
        aws_config.caFile = _ca_cert_file_path;
176
1
    } else {
177
        // config::ca_cert_file_paths is valmutable,get newest value if file path invaild
178
0
        _ca_cert_file_path = get_valid_ca_cert_path();
179
0
        if ("" != _ca_cert_file_path) {
180
0
            aws_config.caFile = _ca_cert_file_path;
181
0
        }
182
0
    }
183
1
    if (s3_conf.max_connections > 0) {
184
0
        aws_config.maxConnections = s3_conf.max_connections;
185
1
    } else {
186
1
#ifdef BE_TEST
187
        // the S3Client may shared by many threads.
188
        // So need to set the number of connections large enough.
189
1
        aws_config.maxConnections = config::doris_scanner_thread_pool_thread_num;
190
#else
191
        aws_config.maxConnections =
192
                ExecEnv::GetInstance()->scanner_scheduler()->remote_thread_pool_max_thread_num();
193
#endif
194
1
    }
195
196
1
    if (s3_conf.request_timeout_ms > 0) {
197
0
        aws_config.requestTimeoutMs = s3_conf.request_timeout_ms;
198
0
    }
199
1
    if (s3_conf.connect_timeout_ms > 0) {
200
0
        aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms;
201
0
    }
202
1
    aws_config.retryStrategy =
203
1
            std::make_shared<Aws::Client::DefaultRetryStrategy>(config::max_s3_client_retry);
204
1
    std::shared_ptr<Aws::S3::S3Client> new_client;
205
1
    if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
206
1
        Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
207
1
        DCHECK(!aws_cred.IsExpiredOrEmpty());
208
1
        if (!s3_conf.token.empty()) {
209
0
            aws_cred.SetSessionToken(s3_conf.token);
210
0
        }
211
1
        new_client = std::make_shared<Aws::S3::S3Client>(
212
1
                std::move(aws_cred), std::move(aws_config),
213
1
                Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
214
1
                s3_conf.use_virtual_addressing);
215
1
    } else {
216
0
        std::shared_ptr<Aws::Auth::AWSCredentialsProvider> aws_provider_chain =
217
0
                std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
218
0
        new_client = std::make_shared<Aws::S3::S3Client>(
219
0
                std::move(aws_provider_chain), std::move(aws_config),
220
0
                Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
221
0
                s3_conf.use_virtual_addressing);
222
0
    }
223
224
1
    {
225
1
        std::lock_guard l(_lock);
226
1
        _cache[hash] = new_client;
227
1
    }
228
1
    return new_client;
229
1
}
230
231
Status S3ClientFactory::convert_properties_to_s3_conf(
232
0
        const std::map<std::string, std::string>& prop, const S3URI& s3_uri, S3Conf* s3_conf) {
233
0
    if (!is_s3_conf_valid(prop)) {
234
0
        return Status::InvalidArgument("S3 properties are incorrect, please check properties.");
235
0
    }
236
0
    StringCaseMap<std::string> properties(prop.begin(), prop.end());
237
0
    if (properties.find(S3_AK) != properties.end() && properties.find(S3_SK) != properties.end()) {
238
0
        s3_conf->ak = properties.find(S3_AK)->second;
239
0
        s3_conf->sk = properties.find(S3_SK)->second;
240
0
    }
241
0
    if (properties.find(S3_TOKEN) != properties.end()) {
242
0
        s3_conf->token = properties.find(S3_TOKEN)->second;
243
0
    }
244
0
    s3_conf->endpoint = properties.find(S3_ENDPOINT)->second;
245
0
    s3_conf->region = properties.find(S3_REGION)->second;
246
247
0
    if (properties.find(S3_MAX_CONN_SIZE) != properties.end()) {
248
0
        s3_conf->max_connections = std::atoi(properties.find(S3_MAX_CONN_SIZE)->second.c_str());
249
0
    }
250
0
    if (properties.find(S3_REQUEST_TIMEOUT_MS) != properties.end()) {
251
0
        s3_conf->request_timeout_ms =
252
0
                std::atoi(properties.find(S3_REQUEST_TIMEOUT_MS)->second.c_str());
253
0
    }
254
0
    if (properties.find(S3_CONN_TIMEOUT_MS) != properties.end()) {
255
0
        s3_conf->connect_timeout_ms =
256
0
                std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str());
257
0
    }
258
0
    if (s3_uri.get_bucket() == "") {
259
0
        return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified",
260
0
                                       s3_uri.to_string());
261
0
    }
262
0
    s3_conf->bucket = s3_uri.get_bucket();
263
0
    s3_conf->prefix = "";
264
265
    // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html
266
0
    s3_conf->use_virtual_addressing = true;
267
0
    if (properties.find(USE_PATH_STYLE) != properties.end()) {
268
0
        s3_conf->use_virtual_addressing =
269
0
                properties.find(USE_PATH_STYLE)->second == "true" ? false : true;
270
0
    }
271
0
    return Status::OK();
272
0
}
273
274
} // end namespace doris