/root/doris/be/src/util/s3_util.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/s3_util.h" |
19 | | |
20 | | #include <aws/core/auth/AWSAuthSigner.h> |
21 | | #include <aws/core/auth/AWSCredentials.h> |
22 | | #include <aws/core/auth/AWSCredentialsProviderChain.h> |
23 | | #include <aws/core/client/DefaultRetryStrategy.h> |
24 | | #include <aws/core/utils/logging/LogLevel.h> |
25 | | #include <aws/core/utils/logging/LogSystemInterface.h> |
26 | | #include <aws/core/utils/memory/stl/AWSStringStream.h> |
27 | | #include <aws/s3/S3Client.h> |
28 | | #include <bvar/reducer.h> |
29 | | #include <util/string_util.h> |
30 | | |
31 | | #include <atomic> |
32 | | #include <cstdlib> |
33 | | #include <filesystem> |
34 | | #include <functional> |
35 | | #include <memory> |
36 | | #include <ostream> |
37 | | #include <utility> |
38 | | |
39 | | #include "common/config.h" |
40 | | #include "common/logging.h" |
41 | | #include "runtime/exec_env.h" |
42 | | #include "s3_uri.h" |
43 | | #include "vec/exec/scan/scanner_scheduler.h" |
44 | | |
45 | | namespace doris { |
46 | | |
47 | | namespace s3_bvar { |
48 | | bvar::LatencyRecorder s3_get_latency("s3_get"); |
49 | | bvar::LatencyRecorder s3_put_latency("s3_put"); |
50 | | bvar::LatencyRecorder s3_delete_latency("s3_delete"); |
51 | | bvar::LatencyRecorder s3_head_latency("s3_head"); |
52 | | bvar::LatencyRecorder s3_multi_part_upload_latency("s3_multi_part_upload"); |
53 | | bvar::LatencyRecorder s3_list_latency("s3_list"); |
54 | | bvar::LatencyRecorder s3_list_object_versions_latency("s3_list_object_versions"); |
55 | | bvar::LatencyRecorder s3_get_bucket_version_latency("s3_get_bucket_version"); |
56 | | bvar::LatencyRecorder s3_copy_object_latency("s3_copy_object"); |
57 | | }; // namespace s3_bvar |
58 | | |
59 | | class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface { |
60 | | public: |
61 | 0 | DorisAWSLogger() : _log_level(Aws::Utils::Logging::LogLevel::Info) {} |
62 | 0 | DorisAWSLogger(Aws::Utils::Logging::LogLevel log_level) : _log_level(log_level) {} |
63 | 0 | ~DorisAWSLogger() final = default; |
64 | 0 | Aws::Utils::Logging::LogLevel GetLogLevel() const final { return _log_level; } |
65 | | void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* format_str, |
66 | 0 | ...) final { |
67 | 0 | _log_impl(log_level, tag, format_str); |
68 | 0 | } |
69 | | void LogStream(Aws::Utils::Logging::LogLevel log_level, const char* tag, |
70 | 0 | const Aws::OStringStream& message_stream) final { |
71 | 0 | _log_impl(log_level, tag, message_stream.str().c_str()); |
72 | 0 | } |
73 | | |
74 | 0 | void Flush() final {} |
75 | | |
76 | | private: |
77 | 0 | void _log_impl(Aws::Utils::Logging::LogLevel log_level, const char* tag, const char* message) { |
78 | 0 | switch (log_level) { |
79 | 0 | case Aws::Utils::Logging::LogLevel::Off: |
80 | 0 | break; |
81 | 0 | case Aws::Utils::Logging::LogLevel::Fatal: |
82 | 0 | LOG(FATAL) << "[" << tag << "] " << message; |
83 | 0 | break; |
84 | 0 | case Aws::Utils::Logging::LogLevel::Error: |
85 | 0 | LOG(ERROR) << "[" << tag << "] " << message; |
86 | 0 | break; |
87 | 0 | case Aws::Utils::Logging::LogLevel::Warn: |
88 | 0 | LOG(WARNING) << "[" << tag << "] " << message; |
89 | 0 | break; |
90 | 0 | case Aws::Utils::Logging::LogLevel::Info: |
91 | 0 | LOG(INFO) << "[" << tag << "] " << message; |
92 | 0 | break; |
93 | 0 | case Aws::Utils::Logging::LogLevel::Debug: |
94 | 0 | LOG(INFO) << "[" << tag << "] " << message; |
95 | 0 | break; |
96 | 0 | case Aws::Utils::Logging::LogLevel::Trace: |
97 | 0 | LOG(INFO) << "[" << tag << "] " << message; |
98 | 0 | break; |
99 | 0 | default: |
100 | 0 | break; |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | | std::atomic<Aws::Utils::Logging::LogLevel> _log_level; |
105 | | }; |
106 | | |
107 | | const static std::string USE_PATH_STYLE = "use_path_style"; |
108 | | |
109 | 1 | S3ClientFactory::S3ClientFactory() { |
110 | 1 | _aws_options = Aws::SDKOptions {}; |
111 | 1 | Aws::Utils::Logging::LogLevel logLevel = |
112 | 1 | static_cast<Aws::Utils::Logging::LogLevel>(config::aws_log_level); |
113 | 1 | _aws_options.loggingOptions.logLevel = logLevel; |
114 | 1 | _aws_options.loggingOptions.logger_create_fn = [logLevel] { |
115 | 0 | return std::make_shared<DorisAWSLogger>(logLevel); |
116 | 0 | }; |
117 | 1 | Aws::InitAPI(_aws_options); |
118 | 1 | _ca_cert_file_path = get_valid_ca_cert_path(); |
119 | 1 | } |
120 | | |
121 | 2 | string S3ClientFactory::get_valid_ca_cert_path() { |
122 | 2 | vector<std::string> vec_ca_file_path = doris::split(config::ca_cert_file_paths, ";"); |
123 | 2 | vector<std::string>::iterator it = vec_ca_file_path.begin(); |
124 | 2 | for (; it != vec_ca_file_path.end(); ++it) { |
125 | 2 | if (std::filesystem::exists(*it)) { |
126 | 2 | return *it; |
127 | 2 | } |
128 | 2 | } |
129 | 0 | return ""; |
130 | 2 | } |
131 | | |
132 | 1 | S3ClientFactory::~S3ClientFactory() { |
133 | 1 | Aws::ShutdownAPI(_aws_options); |
134 | 1 | } |
135 | | |
136 | 1 | S3ClientFactory& S3ClientFactory::instance() { |
137 | 1 | static S3ClientFactory ret; |
138 | 1 | return ret; |
139 | 1 | } |
140 | | |
141 | 0 | bool S3ClientFactory::is_s3_conf_valid(const std::map<std::string, std::string>& prop) { |
142 | 0 | StringCaseMap<std::string> properties(prop.begin(), prop.end()); |
143 | 0 | if (properties.find(S3_ENDPOINT) == properties.end() || |
144 | 0 | properties.find(S3_REGION) == properties.end()) { |
145 | 0 | DCHECK(false) << "aws properties is incorrect."; |
146 | 0 | LOG(ERROR) << "aws properties is incorrect."; |
147 | 0 | return false; |
148 | 0 | } |
149 | 0 | return true; |
150 | 0 | } |
151 | | |
152 | 1 | bool S3ClientFactory::is_s3_conf_valid(const S3Conf& s3_conf) { |
153 | 1 | return !s3_conf.endpoint.empty(); |
154 | 1 | } |
155 | | |
156 | 1 | std::shared_ptr<Aws::S3::S3Client> S3ClientFactory::create(const S3Conf& s3_conf) { |
157 | 1 | if (!is_s3_conf_valid(s3_conf)) { |
158 | 0 | return nullptr; |
159 | 0 | } |
160 | | |
161 | 1 | uint64_t hash = s3_conf.get_hash(); |
162 | 1 | { |
163 | 1 | std::lock_guard l(_lock); |
164 | 1 | auto it = _cache.find(hash); |
165 | 1 | if (it != _cache.end()) { |
166 | 0 | return it->second; |
167 | 0 | } |
168 | 1 | } |
169 | | |
170 | 1 | Aws::Client::ClientConfiguration aws_config = S3ClientFactory::getClientConfiguration(); |
171 | 1 | aws_config.endpointOverride = s3_conf.endpoint; |
172 | 1 | aws_config.region = s3_conf.region; |
173 | 1 | std::string ca_cert = get_valid_ca_cert_path(); |
174 | 1 | if ("" != _ca_cert_file_path) { |
175 | 1 | aws_config.caFile = _ca_cert_file_path; |
176 | 1 | } else { |
177 | | // config::ca_cert_file_paths is valmutable,get newest value if file path invaild |
178 | 0 | _ca_cert_file_path = get_valid_ca_cert_path(); |
179 | 0 | if ("" != _ca_cert_file_path) { |
180 | 0 | aws_config.caFile = _ca_cert_file_path; |
181 | 0 | } |
182 | 0 | } |
183 | 1 | if (s3_conf.max_connections > 0) { |
184 | 0 | aws_config.maxConnections = s3_conf.max_connections; |
185 | 1 | } else { |
186 | 1 | #ifdef BE_TEST |
187 | | // the S3Client may shared by many threads. |
188 | | // So need to set the number of connections large enough. |
189 | 1 | aws_config.maxConnections = config::doris_scanner_thread_pool_thread_num; |
190 | | #else |
191 | | aws_config.maxConnections = |
192 | | ExecEnv::GetInstance()->scanner_scheduler()->remote_thread_pool_max_thread_num(); |
193 | | #endif |
194 | 1 | } |
195 | | |
196 | 1 | if (s3_conf.request_timeout_ms > 0) { |
197 | 0 | aws_config.requestTimeoutMs = s3_conf.request_timeout_ms; |
198 | 0 | } |
199 | 1 | if (s3_conf.connect_timeout_ms > 0) { |
200 | 0 | aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms; |
201 | 0 | } |
202 | 1 | aws_config.retryStrategy = |
203 | 1 | std::make_shared<Aws::Client::DefaultRetryStrategy>(config::max_s3_client_retry); |
204 | 1 | std::shared_ptr<Aws::S3::S3Client> new_client; |
205 | 1 | if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) { |
206 | 1 | Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk); |
207 | 1 | DCHECK(!aws_cred.IsExpiredOrEmpty()); |
208 | 1 | if (!s3_conf.token.empty()) { |
209 | 0 | aws_cred.SetSessionToken(s3_conf.token); |
210 | 0 | } |
211 | 1 | new_client = std::make_shared<Aws::S3::S3Client>( |
212 | 1 | std::move(aws_cred), std::move(aws_config), |
213 | 1 | Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, |
214 | 1 | s3_conf.use_virtual_addressing); |
215 | 1 | } else { |
216 | 0 | std::shared_ptr<Aws::Auth::AWSCredentialsProvider> aws_provider_chain = |
217 | 0 | std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>(); |
218 | 0 | new_client = std::make_shared<Aws::S3::S3Client>( |
219 | 0 | std::move(aws_provider_chain), std::move(aws_config), |
220 | 0 | Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, |
221 | 0 | s3_conf.use_virtual_addressing); |
222 | 0 | } |
223 | | |
224 | 1 | { |
225 | 1 | std::lock_guard l(_lock); |
226 | 1 | _cache[hash] = new_client; |
227 | 1 | } |
228 | 1 | return new_client; |
229 | 1 | } |
230 | | |
231 | | Status S3ClientFactory::convert_properties_to_s3_conf( |
232 | 0 | const std::map<std::string, std::string>& prop, const S3URI& s3_uri, S3Conf* s3_conf) { |
233 | 0 | if (!is_s3_conf_valid(prop)) { |
234 | 0 | return Status::InvalidArgument("S3 properties are incorrect, please check properties."); |
235 | 0 | } |
236 | 0 | StringCaseMap<std::string> properties(prop.begin(), prop.end()); |
237 | 0 | if (properties.find(S3_AK) != properties.end() && properties.find(S3_SK) != properties.end()) { |
238 | 0 | s3_conf->ak = properties.find(S3_AK)->second; |
239 | 0 | s3_conf->sk = properties.find(S3_SK)->second; |
240 | 0 | } |
241 | 0 | if (properties.find(S3_TOKEN) != properties.end()) { |
242 | 0 | s3_conf->token = properties.find(S3_TOKEN)->second; |
243 | 0 | } |
244 | 0 | s3_conf->endpoint = properties.find(S3_ENDPOINT)->second; |
245 | 0 | s3_conf->region = properties.find(S3_REGION)->second; |
246 | |
|
247 | 0 | if (properties.find(S3_MAX_CONN_SIZE) != properties.end()) { |
248 | 0 | s3_conf->max_connections = std::atoi(properties.find(S3_MAX_CONN_SIZE)->second.c_str()); |
249 | 0 | } |
250 | 0 | if (properties.find(S3_REQUEST_TIMEOUT_MS) != properties.end()) { |
251 | 0 | s3_conf->request_timeout_ms = |
252 | 0 | std::atoi(properties.find(S3_REQUEST_TIMEOUT_MS)->second.c_str()); |
253 | 0 | } |
254 | 0 | if (properties.find(S3_CONN_TIMEOUT_MS) != properties.end()) { |
255 | 0 | s3_conf->connect_timeout_ms = |
256 | 0 | std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); |
257 | 0 | } |
258 | 0 | if (s3_uri.get_bucket() == "") { |
259 | 0 | return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified", |
260 | 0 | s3_uri.to_string()); |
261 | 0 | } |
262 | 0 | s3_conf->bucket = s3_uri.get_bucket(); |
263 | 0 | s3_conf->prefix = ""; |
264 | | |
265 | | // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html |
266 | 0 | s3_conf->use_virtual_addressing = true; |
267 | 0 | if (properties.find(USE_PATH_STYLE) != properties.end()) { |
268 | 0 | s3_conf->use_virtual_addressing = |
269 | 0 | properties.find(USE_PATH_STYLE)->second == "true" ? false : true; |
270 | 0 | } |
271 | 0 | return Status::OK(); |
272 | 0 | } |
273 | | |
274 | | } // end namespace doris |