Coverage Report

Created: 2026-03-11 12:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/s3_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/s3_util.h"
19
20
#include <aws/core/auth/AWSAuthSigner.h>
21
#include <aws/core/auth/AWSCredentials.h>
22
#include <aws/core/auth/AWSCredentialsProviderChain.h>
23
#include <aws/core/auth/STSCredentialsProvider.h>
24
#include <aws/core/client/DefaultRetryStrategy.h>
25
#include <aws/core/platform/Environment.h>
26
#include <aws/core/utils/logging/LogLevel.h>
27
#include <aws/core/utils/logging/LogSystemInterface.h>
28
#include <aws/core/utils/memory/stl/AWSStringStream.h>
29
#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
30
#include <aws/s3/S3Client.h>
31
#include <aws/sts/STSClient.h>
32
#include <bvar/reducer.h>
33
#include <cpp/s3_rate_limiter.h>
34
35
#include <atomic>
36
37
#include "util/string_util.h"
38
39
#ifdef USE_AZURE
40
#include <azure/core/diagnostics/logger.hpp>
41
#include <azure/storage/blobs/blob_container_client.hpp>
42
#endif
43
#include <cstdlib>
44
#include <filesystem>
45
#include <functional>
46
#include <memory>
47
#include <ostream>
48
#include <utility>
49
50
#include "common/config.h"
51
#include "common/logging.h"
52
#include "common/status.h"
53
#include "cpp/aws_logger.h"
54
#include "cpp/custom_aws_credentials_provider_chain.h"
55
#include "cpp/obj_retry_strategy.h"
56
#include "cpp/sync_point.h"
57
#include "cpp/util.h"
58
#ifdef USE_AZURE
59
#include "io/fs/azure_obj_storage_client.h"
60
#endif
61
#include "exec/scan/scanner_scheduler.h"
62
#include "io/fs/obj_storage_client.h"
63
#include "io/fs/s3_obj_storage_client.h"
64
#include "runtime/exec_env.h"
65
#include "util/s3_uri.h"
66
67
namespace doris {
68
namespace s3_bvar {
69
bvar::LatencyRecorder s3_get_latency("s3_get");
70
bvar::LatencyRecorder s3_put_latency("s3_put");
71
bvar::LatencyRecorder s3_delete_object_latency("s3_delete_object");
72
bvar::LatencyRecorder s3_delete_objects_latency("s3_delete_objects");
73
bvar::LatencyRecorder s3_head_latency("s3_head");
74
bvar::LatencyRecorder s3_multi_part_upload_latency("s3_multi_part_upload");
75
bvar::LatencyRecorder s3_list_latency("s3_list");
76
bvar::LatencyRecorder s3_list_object_versions_latency("s3_list_object_versions");
77
bvar::LatencyRecorder s3_get_bucket_version_latency("s3_get_bucket_version");
78
bvar::LatencyRecorder s3_copy_object_latency("s3_copy_object");
79
}; // namespace s3_bvar
80
81
namespace {
82
83
22
doris::Status is_s3_conf_valid(const S3ClientConf& conf) {
84
22
    if (conf.endpoint.empty()) {
85
0
        return Status::InvalidArgument<false>("Invalid s3 conf, empty endpoint");
86
0
    }
87
22
    if (conf.region.empty()) {
88
0
        return Status::InvalidArgument<false>("Invalid s3 conf, empty region");
89
0
    }
90
91
22
    if (conf.role_arn.empty()) {
92
        // Allow anonymous access when both ak and sk are empty
93
17
        bool hasAk = !conf.ak.empty();
94
17
        bool hasSk = !conf.sk.empty();
95
96
        // Either both credentials are provided or both are empty (anonymous access)
97
17
        if (hasAk && conf.sk.empty()) {
98
1
            return Status::InvalidArgument<false>("Invalid s3 conf, empty sk");
99
1
        }
100
16
        if (hasSk && conf.ak.empty()) {
101
1
            return Status::InvalidArgument<false>("Invalid s3 conf, empty ak");
102
1
        }
103
16
    }
104
20
    return Status::OK();
105
22
}
106
107
// Return true is convert `str` to int successfully
108
0
bool to_int(std::string_view str, int& res) {
109
0
    auto [_, ec] = std::from_chars(str.data(), str.data() + str.size(), res);
110
0
    return ec == std::errc {};
111
0
}
112
113
constexpr char USE_PATH_STYLE[] = "use_path_style";
114
115
constexpr char AZURE_PROVIDER_STRING[] = "AZURE";
116
constexpr char S3_PROVIDER[] = "provider";
117
constexpr char S3_AK[] = "AWS_ACCESS_KEY";
118
constexpr char S3_SK[] = "AWS_SECRET_KEY";
119
constexpr char S3_ENDPOINT[] = "AWS_ENDPOINT";
120
constexpr char S3_REGION[] = "AWS_REGION";
121
constexpr char S3_TOKEN[] = "AWS_TOKEN";
122
constexpr char S3_MAX_CONN_SIZE[] = "AWS_MAX_CONNECTIONS";
123
constexpr char S3_REQUEST_TIMEOUT_MS[] = "AWS_REQUEST_TIMEOUT_MS";
124
constexpr char S3_CONN_TIMEOUT_MS[] = "AWS_CONNECTION_TIMEOUT_MS";
125
constexpr char S3_NEED_OVERRIDE_ENDPOINT[] = "AWS_NEED_OVERRIDE_ENDPOINT";
126
127
constexpr char S3_ROLE_ARN[] = "AWS_ROLE_ARN";
128
constexpr char S3_EXTERNAL_ID[] = "AWS_EXTERNAL_ID";
129
constexpr char S3_CREDENTIALS_PROVIDER_TYPE[] = "AWS_CREDENTIALS_PROVIDER_TYPE";
130
} // namespace
131
132
bvar::Adder<int64_t> get_rate_limit_ns("get_rate_limit_ns");
133
bvar::Adder<int64_t> get_rate_limit_exceed_req_num("get_rate_limit_exceed_req_num");
134
bvar::Adder<int64_t> put_rate_limit_ns("put_rate_limit_ns");
135
bvar::Adder<int64_t> put_rate_limit_exceed_req_num("put_rate_limit_exceed_req_num");
136
137
static std::atomic<int64_t> last_s3_get_token_bucket_tokens {0};
138
static std::atomic<int64_t> last_s3_get_token_limit {0};
139
static std::atomic<int64_t> last_s3_get_token_per_second {0};
140
static std::atomic<int64_t> last_s3_put_token_per_second {0};
141
static std::atomic<int64_t> last_s3_put_token_bucket_tokens {0};
142
static std::atomic<int64_t> last_s3_put_token_limit {0};
143
144
static std::atomic<bool> updating_get_limiter {false};
145
static std::atomic<bool> updating_put_limiter {false};
146
147
2
S3RateLimiterHolder* S3ClientFactory::rate_limiter(S3RateLimitType type) {
148
2
    CHECK(type == S3RateLimitType::GET || type == S3RateLimitType::PUT) << to_string(type);
149
2
    return _rate_limiters[static_cast<size_t>(type)].get();
150
2
}
151
152
template <S3RateLimitType LimiterType>
153
void update_rate_limiter_if_changed(int64_t current_tps, int64_t current_bucket,
154
                                    int64_t current_limit, std::atomic<int64_t>& last_tps,
155
                                    std::atomic<int64_t>& last_bucket,
156
                                    std::atomic<int64_t>& last_limit,
157
14
                                    std::atomic<bool>& updating_flag, const char* limiter_name) {
158
14
    if (last_tps.load(std::memory_order_relaxed) != current_tps ||
159
14
        last_bucket.load(std::memory_order_relaxed) != current_bucket ||
160
14
        last_limit.load(std::memory_order_relaxed) != current_limit) {
161
2
        bool expected = false;
162
2
        if (!updating_flag.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
163
0
            return;
164
0
        }
165
2
        if (last_tps.load(std::memory_order_acquire) != current_tps ||
166
2
            last_bucket.load(std::memory_order_acquire) != current_bucket ||
167
2
            last_limit.load(std::memory_order_acquire) != current_limit) {
168
2
            int ret =
169
2
                    reset_s3_rate_limiter(LimiterType, current_tps, current_bucket, current_limit);
170
171
2
            if (ret == 0) {
172
2
                last_tps.store(current_tps, std::memory_order_release);
173
2
                last_bucket.store(current_bucket, std::memory_order_release);
174
2
                last_limit.store(current_limit, std::memory_order_release);
175
2
            } else {
176
0
                LOG(WARNING) << "Failed to reset S3 " << limiter_name
177
0
                             << " rate limiter, error code: " << ret;
178
0
            }
179
2
        }
180
181
2
        updating_flag.store(false, std::memory_order_release);
182
2
    }
183
14
}
_ZN5doris30update_rate_limiter_if_changedILNS_15S3RateLimitTypeE0EEEvlllRSt6atomicIlES4_S4_RS2_IbEPKc
Line
Count
Source
157
7
                                    std::atomic<bool>& updating_flag, const char* limiter_name) {
158
7
    if (last_tps.load(std::memory_order_relaxed) != current_tps ||
159
7
        last_bucket.load(std::memory_order_relaxed) != current_bucket ||
160
7
        last_limit.load(std::memory_order_relaxed) != current_limit) {
161
1
        bool expected = false;
162
1
        if (!updating_flag.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
163
0
            return;
164
0
        }
165
1
        if (last_tps.load(std::memory_order_acquire) != current_tps ||
166
1
            last_bucket.load(std::memory_order_acquire) != current_bucket ||
167
1
            last_limit.load(std::memory_order_acquire) != current_limit) {
168
1
            int ret =
169
1
                    reset_s3_rate_limiter(LimiterType, current_tps, current_bucket, current_limit);
170
171
1
            if (ret == 0) {
172
1
                last_tps.store(current_tps, std::memory_order_release);
173
1
                last_bucket.store(current_bucket, std::memory_order_release);
174
1
                last_limit.store(current_limit, std::memory_order_release);
175
1
            } else {
176
0
                LOG(WARNING) << "Failed to reset S3 " << limiter_name
177
0
                             << " rate limiter, error code: " << ret;
178
0
            }
179
1
        }
180
181
1
        updating_flag.store(false, std::memory_order_release);
182
1
    }
183
7
}
_ZN5doris30update_rate_limiter_if_changedILNS_15S3RateLimitTypeE1EEEvlllRSt6atomicIlES4_S4_RS2_IbEPKc
Line
Count
Source
157
7
                                    std::atomic<bool>& updating_flag, const char* limiter_name) {
158
7
    if (last_tps.load(std::memory_order_relaxed) != current_tps ||
159
7
        last_bucket.load(std::memory_order_relaxed) != current_bucket ||
160
7
        last_limit.load(std::memory_order_relaxed) != current_limit) {
161
1
        bool expected = false;
162
1
        if (!updating_flag.compare_exchange_strong(expected, true, std::memory_order_acq_rel)) {
163
0
            return;
164
0
        }
165
1
        if (last_tps.load(std::memory_order_acquire) != current_tps ||
166
1
            last_bucket.load(std::memory_order_acquire) != current_bucket ||
167
1
            last_limit.load(std::memory_order_acquire) != current_limit) {
168
1
            int ret =
169
1
                    reset_s3_rate_limiter(LimiterType, current_tps, current_bucket, current_limit);
170
171
1
            if (ret == 0) {
172
1
                last_tps.store(current_tps, std::memory_order_release);
173
1
                last_bucket.store(current_bucket, std::memory_order_release);
174
1
                last_limit.store(current_limit, std::memory_order_release);
175
1
            } else {
176
0
                LOG(WARNING) << "Failed to reset S3 " << limiter_name
177
0
                             << " rate limiter, error code: " << ret;
178
0
            }
179
1
        }
180
181
1
        updating_flag.store(false, std::memory_order_release);
182
1
    }
183
7
}
184
185
7
void check_s3_rate_limiter_config_changed() {
186
7
    update_rate_limiter_if_changed<S3RateLimitType::GET>(
187
7
            config::s3_get_token_per_second, config::s3_get_bucket_tokens,
188
7
            config::s3_get_token_limit, last_s3_get_token_per_second,
189
7
            last_s3_get_token_bucket_tokens, last_s3_get_token_limit, updating_get_limiter, "GET");
190
191
7
    update_rate_limiter_if_changed<S3RateLimitType::PUT>(
192
7
            config::s3_put_token_per_second, config::s3_put_bucket_tokens,
193
7
            config::s3_put_token_limit, last_s3_put_token_per_second,
194
7
            last_s3_put_token_bucket_tokens, last_s3_put_token_limit, updating_put_limiter, "PUT");
195
7
}
196
197
2
int reset_s3_rate_limiter(S3RateLimitType type, size_t max_speed, size_t max_burst, size_t limit) {
198
2
    if (type == S3RateLimitType::UNKNOWN) {
199
0
        return -1;
200
0
    }
201
2
    return S3ClientFactory::instance().rate_limiter(type)->reset(max_speed, max_burst, limit);
202
2
}
203
204
1
S3ClientFactory::S3ClientFactory() {
205
1
    _aws_options = Aws::SDKOptions {};
206
1
    auto logLevel = static_cast<Aws::Utils::Logging::LogLevel>(config::aws_log_level);
207
1
    _aws_options.loggingOptions.logLevel = logLevel;
208
1
    _aws_options.loggingOptions.logger_create_fn = [logLevel] {
209
1
        return std::make_shared<DorisAWSLogger>(logLevel);
210
1
    };
211
1
    Aws::InitAPI(_aws_options);
212
1
    _ca_cert_file_path = get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
213
1
    _rate_limiters = {
214
1
            std::make_unique<S3RateLimiterHolder>(
215
1
                    config::s3_get_token_per_second, config::s3_get_bucket_tokens,
216
1
                    config::s3_get_token_limit,
217
1
                    metric_func_factory(get_rate_limit_ns, get_rate_limit_exceed_req_num)),
218
1
            std::make_unique<S3RateLimiterHolder>(
219
1
                    config::s3_put_token_per_second, config::s3_put_bucket_tokens,
220
1
                    config::s3_put_token_limit,
221
1
                    metric_func_factory(put_rate_limit_ns, put_rate_limit_exceed_req_num))};
222
223
1
#ifdef USE_AZURE
224
1
    auto azureLogLevel =
225
1
            static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
226
1
    Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
227
1
    Azure::Core::Diagnostics::Logger::SetListener(
228
1
            [&](Azure::Core::Diagnostics::Logger::Level level, const std::string& message) {
229
0
                switch (level) {
230
0
                case Azure::Core::Diagnostics::Logger::Level::Verbose:
231
0
                    LOG(INFO) << message;
232
0
                    break;
233
0
                case Azure::Core::Diagnostics::Logger::Level::Informational:
234
0
                    LOG(INFO) << message;
235
0
                    break;
236
0
                case Azure::Core::Diagnostics::Logger::Level::Warning:
237
0
                    LOG(WARNING) << message;
238
0
                    break;
239
0
                case Azure::Core::Diagnostics::Logger::Level::Error:
240
0
                    LOG(ERROR) << message;
241
0
                    break;
242
0
                default:
243
0
                    LOG(WARNING) << "Unknown level: " << static_cast<int>(level)
244
0
                                 << ", message: " << message;
245
0
                    break;
246
0
                }
247
0
            });
248
1
#endif
249
1
}
250
251
1
S3ClientFactory::~S3ClientFactory() {
252
1
    Aws::ShutdownAPI(_aws_options);
253
1
}
254
255
17
S3ClientFactory& S3ClientFactory::instance() {
256
17
    static S3ClientFactory ret;
257
17
    return ret;
258
17
}
259
260
7
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::create(const S3ClientConf& s3_conf) {
261
7
    if (!is_s3_conf_valid(s3_conf).ok()) {
262
0
        return nullptr;
263
0
    }
264
265
7
    check_s3_rate_limiter_config_changed();
266
267
7
#ifdef BE_TEST
268
7
    {
269
7
        std::lock_guard l(_lock);
270
7
        if (_test_client_creator) {
271
1
            return _test_client_creator(s3_conf);
272
1
        }
273
7
    }
274
6
#endif
275
276
6
    {
277
6
        uint64_t hash = s3_conf.get_hash();
278
6
        std::lock_guard l(_lock);
279
6
        auto it = _cache.find(hash);
280
6
        if (it != _cache.end()) {
281
3
            return it->second;
282
3
        }
283
6
    }
284
285
3
    auto obj_client = (s3_conf.provider == io::ObjStorageType::AZURE)
286
3
                              ? _create_azure_client(s3_conf)
287
3
                              : _create_s3_client(s3_conf);
288
289
3
    {
290
3
        uint64_t hash = s3_conf.get_hash();
291
3
        std::lock_guard l(_lock);
292
3
        _cache[hash] = obj_client;
293
3
    }
294
3
    return obj_client;
295
6
}
296
297
#ifdef BE_TEST
298
void S3ClientFactory::set_client_creator_for_test(
299
1
        std::function<std::shared_ptr<io::ObjStorageClient>(const S3ClientConf&)> creator) {
300
1
    std::lock_guard l(_lock);
301
1
    _test_client_creator = std::move(creator);
302
1
}
303
304
1
void S3ClientFactory::clear_client_creator_for_test() {
305
1
    std::lock_guard l(_lock);
306
1
    _test_client_creator = nullptr;
307
1
}
308
#endif
309
310
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::_create_azure_client(
311
0
        const S3ClientConf& s3_conf) {
312
0
#ifdef USE_AZURE
313
0
    auto cred =
314
0
            std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak, s3_conf.sk);
315
316
0
    const std::string container_name = s3_conf.bucket;
317
0
    std::string uri;
318
0
    if (config::force_azure_blob_global_endpoint) {
319
0
        uri = fmt::format("https://{}.blob.core.windows.net/{}", s3_conf.ak, container_name);
320
0
    } else {
321
0
        uri = fmt::format("{}/{}", s3_conf.endpoint, container_name);
322
0
        if (s3_conf.endpoint.find("://") == std::string::npos) {
323
0
            uri = "https://" + uri;
324
0
        }
325
0
    }
326
327
0
    Azure::Storage::Blobs::BlobClientOptions options;
328
0
    options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
329
0
    options.Retry.MaxRetries = config::max_s3_client_retry;
330
0
    options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
331
332
0
    std::string normalized_uri = normalize_http_uri(uri);
333
0
    VLOG_DEBUG << "uri:" << uri << ", normalized_uri:" << normalized_uri;
334
335
0
    auto containerClient = std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
336
0
            uri, cred, std::move(options));
337
0
    LOG_INFO("create one azure client with {}", s3_conf.to_string());
338
0
    return std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
339
#else
340
    LOG_FATAL("BE is not compiled with azure support, export BUILD_AZURE=ON before building");
341
    return nullptr;
342
#endif
343
0
}
344
345
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
346
6
S3ClientFactory::_get_aws_credentials_provider_v1(const S3ClientConf& s3_conf) {
347
6
    if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
348
2
        Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
349
2
        DCHECK(!aws_cred.IsExpiredOrEmpty());
350
2
        if (!s3_conf.token.empty()) {
351
0
            aws_cred.SetSessionToken(s3_conf.token);
352
0
        }
353
2
        return std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
354
2
    }
355
356
4
    if (s3_conf.cred_provider_type == CredProviderType::InstanceProfile) {
357
2
        if (s3_conf.role_arn.empty()) {
358
1
            return std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
359
1
        }
360
361
1
        Aws::Client::ClientConfiguration clientConfiguration =
362
1
                S3ClientFactory::getClientConfiguration();
363
364
1
        if (_ca_cert_file_path.empty()) {
365
0
            _ca_cert_file_path =
366
0
                    get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
367
0
        }
368
1
        if (!_ca_cert_file_path.empty()) {
369
1
            clientConfiguration.caFile = _ca_cert_file_path;
370
1
        }
371
372
1
        auto stsClient = std::make_shared<Aws::STS::STSClient>(
373
1
                std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>(),
374
1
                clientConfiguration);
375
376
1
        return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
377
1
                s3_conf.role_arn, Aws::String(), s3_conf.external_id,
378
1
                Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
379
2
    }
380
381
    // Support anonymous access for public datasets when no credentials are provided
382
2
    if (s3_conf.ak.empty() && s3_conf.sk.empty()) {
383
2
        return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
384
2
    }
385
386
0
    return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
387
2
}
388
389
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> S3ClientFactory::_create_credentials_provider(
390
18
        CredProviderType type) {
391
18
    switch (type) {
392
2
    case CredProviderType::Env:
393
2
        return std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>();
394
2
    case CredProviderType::SystemProperties:
395
2
        return std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>();
396
3
    case CredProviderType::WebIdentity:
397
3
        return std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
398
2
    case CredProviderType::Container:
399
2
        return std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(
400
2
                Aws::Environment::GetEnv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI").c_str());
401
4
    case CredProviderType::InstanceProfile:
402
4
        return std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
403
2
    case CredProviderType::Anonymous:
404
2
        return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
405
3
    case CredProviderType::Default:
406
3
    default:
407
3
        return std::make_shared<CustomAwsCredentialsProviderChain>();
408
18
    }
409
18
}
410
411
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
412
22
S3ClientFactory::_get_aws_credentials_provider_v2(const S3ClientConf& s3_conf) {
413
22
    if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
414
4
        Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
415
4
        DCHECK(!aws_cred.IsExpiredOrEmpty());
416
4
        if (!s3_conf.token.empty()) {
417
0
            aws_cred.SetSessionToken(s3_conf.token);
418
0
        }
419
4
        return std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
420
4
    }
421
422
    // Handle role_arn for assume role scenario
423
18
    if (!s3_conf.role_arn.empty()) {
424
8
        Aws::Client::ClientConfiguration clientConfiguration =
425
8
                S3ClientFactory::getClientConfiguration();
426
427
8
        if (_ca_cert_file_path.empty()) {
428
0
            _ca_cert_file_path =
429
0
                    get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
430
0
        }
431
8
        if (!_ca_cert_file_path.empty()) {
432
8
            clientConfiguration.caFile = _ca_cert_file_path;
433
8
        }
434
435
8
        auto baseProvider = _create_credentials_provider(s3_conf.cred_provider_type);
436
8
        auto stsClient = std::make_shared<Aws::STS::STSClient>(baseProvider, clientConfiguration);
437
438
8
        return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
439
8
                s3_conf.role_arn, Aws::String(), s3_conf.external_id,
440
8
                Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
441
8
    }
442
443
    // Return provider based on cred_provider_type
444
10
    return _create_credentials_provider(s3_conf.cred_provider_type);
445
18
}
446
447
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> S3ClientFactory::get_aws_credentials_provider(
448
28
        const S3ClientConf& s3_conf) {
449
28
    if (config::aws_credentials_provider_version == "v2") {
450
22
        return _get_aws_credentials_provider_v2(s3_conf);
451
22
    }
452
6
    return _get_aws_credentials_provider_v1(s3_conf);
453
28
}
454
455
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::_create_s3_client(
456
3
        const S3ClientConf& s3_conf) {
457
3
    TEST_SYNC_POINT_RETURN_WITH_VALUE(
458
2
            "s3_client_factory::create",
459
2
            std::make_shared<io::S3ObjStorageClient>(std::make_shared<Aws::S3::S3Client>()));
460
2
    Aws::Client::ClientConfiguration aws_config = S3ClientFactory::getClientConfiguration();
461
2
    if (s3_conf.need_override_endpoint) {
462
2
        aws_config.endpointOverride = s3_conf.endpoint;
463
2
    }
464
2
    aws_config.region = s3_conf.region;
465
466
2
    if (_ca_cert_file_path.empty()) {
467
0
        _ca_cert_file_path = get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
468
0
    }
469
470
2
    if (!_ca_cert_file_path.empty()) {
471
2
        aws_config.caFile = _ca_cert_file_path;
472
2
    }
473
474
2
    if (s3_conf.max_connections > 0) {
475
0
        aws_config.maxConnections = s3_conf.max_connections;
476
2
    } else {
477
2
        aws_config.maxConnections = 102400;
478
2
    }
479
480
2
    aws_config.requestTimeoutMs = 30000;
481
2
    if (s3_conf.request_timeout_ms > 0) {
482
0
        aws_config.requestTimeoutMs = s3_conf.request_timeout_ms;
483
0
    }
484
485
2
    if (s3_conf.connect_timeout_ms > 0) {
486
0
        aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms;
487
0
    }
488
489
2
    if (config::s3_client_http_scheme == "http") {
490
2
        aws_config.scheme = Aws::Http::Scheme::HTTP;
491
2
    }
492
493
2
    aws_config.retryStrategy = std::make_shared<S3CustomRetryStrategy>(
494
2
            config::max_s3_client_retry /*scaleFactor = 25*/);
495
496
2
    std::shared_ptr<Aws::S3::S3Client> new_client = std::make_shared<Aws::S3::S3Client>(
497
2
            get_aws_credentials_provider(s3_conf), std::move(aws_config),
498
2
            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
499
2
            s3_conf.use_virtual_addressing);
500
501
2
    auto obj_client = std::make_shared<io::S3ObjStorageClient>(std::move(new_client));
502
2
    LOG_INFO("create one s3 client with {}", s3_conf.to_string());
503
2
    return obj_client;
504
3
}
505
506
Status S3ClientFactory::convert_properties_to_s3_conf(
507
15
        const std::map<std::string, std::string>& prop, const S3URI& s3_uri, S3Conf* s3_conf) {
508
15
    StringCaseMap<std::string> properties(prop.begin(), prop.end());
509
15
    if (auto it = properties.find(S3_AK); it != properties.end()) {
510
2
        s3_conf->client_conf.ak = it->second;
511
2
    }
512
15
    if (auto it = properties.find(S3_SK); it != properties.end()) {
513
2
        s3_conf->client_conf.sk = it->second;
514
2
    }
515
15
    if (auto it = properties.find(S3_TOKEN); it != properties.end()) {
516
0
        s3_conf->client_conf.token = it->second;
517
0
    }
518
15
    if (auto it = properties.find(S3_ENDPOINT); it != properties.end()) {
519
15
        s3_conf->client_conf.endpoint = it->second;
520
15
    }
521
15
    if (auto it = properties.find(S3_NEED_OVERRIDE_ENDPOINT); it != properties.end()) {
522
0
        s3_conf->client_conf.need_override_endpoint = (it->second == "true");
523
0
    }
524
15
    if (auto it = properties.find(S3_REGION); it != properties.end()) {
525
15
        s3_conf->client_conf.region = it->second;
526
15
    }
527
15
    if (auto it = properties.find(S3_MAX_CONN_SIZE); it != properties.end()) {
528
0
        if (!to_int(it->second, s3_conf->client_conf.max_connections)) {
529
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_MAX_CONN_SIZE, it->second);
530
0
        }
531
0
    }
532
15
    if (auto it = properties.find(S3_REQUEST_TIMEOUT_MS); it != properties.end()) {
533
0
        if (!to_int(it->second, s3_conf->client_conf.request_timeout_ms)) {
534
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_REQUEST_TIMEOUT_MS,
535
0
                                           it->second);
536
0
        }
537
0
    }
538
15
    if (auto it = properties.find(S3_CONN_TIMEOUT_MS); it != properties.end()) {
539
0
        if (!to_int(it->second, s3_conf->client_conf.connect_timeout_ms)) {
540
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_CONN_TIMEOUT_MS,
541
0
                                           it->second);
542
0
        }
543
0
    }
544
15
    if (auto it = properties.find(S3_PROVIDER); it != properties.end()) {
545
        // S3 Provider properties should be case insensitive.
546
0
        if (0 == strcasecmp(it->second.c_str(), AZURE_PROVIDER_STRING)) {
547
0
            s3_conf->client_conf.provider = io::ObjStorageType::AZURE;
548
0
        }
549
0
    }
550
551
15
    if (s3_uri.get_bucket().empty()) {
552
0
        return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified",
553
0
                                       s3_uri.to_string());
554
0
    }
555
15
    s3_conf->bucket = s3_uri.get_bucket();
556
    // For azure's compatibility
557
15
    s3_conf->client_conf.bucket = s3_uri.get_bucket();
558
15
    s3_conf->prefix = "";
559
560
    // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html
561
15
    s3_conf->client_conf.use_virtual_addressing = true;
562
15
    if (auto it = properties.find(USE_PATH_STYLE); it != properties.end()) {
563
0
        s3_conf->client_conf.use_virtual_addressing = it->second != "true";
564
0
    }
565
566
15
    if (auto it = properties.find(S3_ROLE_ARN); it != properties.end()) {
567
        // Keep provider type as Default unless explicitly configured by
568
        // AWS_CREDENTIALS_PROVIDER_TYPE, consistent with FE behavior.
569
5
        s3_conf->client_conf.role_arn = it->second;
570
5
    }
571
572
15
    if (auto it = properties.find(S3_EXTERNAL_ID); it != properties.end()) {
573
0
        s3_conf->client_conf.external_id = it->second;
574
0
    }
575
576
15
    if (auto it = properties.find(S3_CREDENTIALS_PROVIDER_TYPE); it != properties.end()) {
577
8
        s3_conf->client_conf.cred_provider_type = cred_provider_type_from_string(it->second);
578
8
    }
579
580
15
    if (auto st = is_s3_conf_valid(s3_conf->client_conf); !st.ok()) {
581
2
        return st;
582
2
    }
583
13
    return Status::OK();
584
15
}
585
586
0
static CredProviderType cred_provider_type_from_thrift(TCredProviderType::type cred_provider_type) {
587
0
    switch (cred_provider_type) {
588
0
    case TCredProviderType::DEFAULT:
589
0
        return CredProviderType::Default;
590
0
    case TCredProviderType::SIMPLE:
591
0
        return CredProviderType::Simple;
592
0
    case TCredProviderType::INSTANCE_PROFILE:
593
0
        return CredProviderType::InstanceProfile;
594
0
    default:
595
0
        __builtin_unreachable();
596
0
        LOG(WARNING) << "Invalid TCredProviderType value: " << cred_provider_type
597
0
                     << ", use default instead.";
598
0
        return CredProviderType::Default;
599
0
    }
600
0
}
601
602
0
S3Conf S3Conf::get_s3_conf(const cloud::ObjectStoreInfoPB& info) {
603
0
    S3Conf ret {
604
0
            .bucket = info.bucket(),
605
0
            .prefix = info.prefix(),
606
0
            .client_conf {
607
0
                    .endpoint = info.endpoint(),
608
0
                    .region = info.region(),
609
0
                    .ak = info.ak(),
610
0
                    .sk = info.sk(),
611
0
                    .token {},
612
0
                    .bucket = info.bucket(),
613
0
                    .provider = io::ObjStorageType::AWS,
614
0
                    .use_virtual_addressing =
615
0
                            info.has_use_path_style() ? !info.use_path_style() : true,
616
617
0
                    .role_arn = info.role_arn(),
618
0
                    .external_id = info.external_id(),
619
0
            },
620
0
            .sse_enabled = info.sse_enabled(),
621
0
    };
622
623
0
    if (info.has_cred_provider_type()) {
624
0
        ret.client_conf.cred_provider_type = cred_provider_type_from_pb(info.cred_provider_type());
625
0
    }
626
627
0
    io::ObjStorageType type = io::ObjStorageType::AWS;
628
0
    switch (info.provider()) {
629
0
    case cloud::ObjectStoreInfoPB_Provider_OSS:
630
0
        type = io::ObjStorageType::OSS;
631
0
        break;
632
0
    case cloud::ObjectStoreInfoPB_Provider_S3:
633
0
        type = io::ObjStorageType::AWS;
634
0
        break;
635
0
    case cloud::ObjectStoreInfoPB_Provider_COS:
636
0
        type = io::ObjStorageType::COS;
637
0
        break;
638
0
    case cloud::ObjectStoreInfoPB_Provider_OBS:
639
0
        type = io::ObjStorageType::OBS;
640
0
        break;
641
0
    case cloud::ObjectStoreInfoPB_Provider_BOS:
642
0
        type = io::ObjStorageType::BOS;
643
0
        break;
644
0
    case cloud::ObjectStoreInfoPB_Provider_GCP:
645
0
        type = io::ObjStorageType::GCP;
646
0
        break;
647
0
    case cloud::ObjectStoreInfoPB_Provider_AZURE:
648
0
        type = io::ObjStorageType::AZURE;
649
0
        break;
650
0
    case cloud::ObjectStoreInfoPB_Provider_TOS:
651
0
        type = io::ObjStorageType::TOS;
652
0
        break;
653
0
    default:
654
0
        __builtin_unreachable();
655
0
        LOG_FATAL("unknown provider type {}, info {}", info.provider(), ret.to_string());
656
0
    }
657
0
    ret.client_conf.provider = type;
658
0
    return ret;
659
0
}
660
661
0
S3Conf S3Conf::get_s3_conf(const TS3StorageParam& param) {
662
0
    S3Conf ret {
663
0
            .bucket = param.bucket,
664
0
            .prefix = param.root_path,
665
0
            .client_conf = {
666
0
                    .endpoint = param.endpoint,
667
0
                    .region = param.region,
668
0
                    .ak = param.ak,
669
0
                    .sk = param.sk,
670
0
                    .token = param.token,
671
0
                    .bucket = param.bucket,
672
0
                    .provider = io::ObjStorageType::AWS,
673
0
                    .max_connections = param.max_conn,
674
0
                    .request_timeout_ms = param.request_timeout_ms,
675
0
                    .connect_timeout_ms = param.conn_timeout_ms,
676
                    // When using cold heat separation in minio, user might use ip address directly,
677
                    // which needs enable use_virtual_addressing to true
678
0
                    .use_virtual_addressing = !param.use_path_style,
679
0
                    .role_arn = param.role_arn,
680
0
                    .external_id = param.external_id,
681
0
            }};
682
683
0
    if (param.__isset.cred_provider_type) {
684
0
        ret.client_conf.cred_provider_type =
685
0
                cred_provider_type_from_thrift(param.cred_provider_type);
686
0
    }
687
688
0
    io::ObjStorageType type = io::ObjStorageType::AWS;
689
0
    switch (param.provider) {
690
0
    case TObjStorageType::UNKNOWN:
691
0
        LOG_INFO("Receive one legal storage resource, set provider type to aws, param detail {}",
692
0
                 ret.to_string());
693
0
        type = io::ObjStorageType::AWS;
694
0
        break;
695
0
    case TObjStorageType::AWS:
696
0
        type = io::ObjStorageType::AWS;
697
0
        break;
698
0
    case TObjStorageType::AZURE:
699
0
        type = io::ObjStorageType::AZURE;
700
0
        break;
701
0
    case TObjStorageType::BOS:
702
0
        type = io::ObjStorageType::BOS;
703
0
        break;
704
0
    case TObjStorageType::COS:
705
0
        type = io::ObjStorageType::COS;
706
0
        break;
707
0
    case TObjStorageType::OBS:
708
0
        type = io::ObjStorageType::OBS;
709
0
        break;
710
0
    case TObjStorageType::OSS:
711
0
        type = io::ObjStorageType::OSS;
712
0
        break;
713
0
    case TObjStorageType::GCP:
714
0
        type = io::ObjStorageType::GCP;
715
0
        break;
716
0
    case TObjStorageType::TOS:
717
0
        type = io::ObjStorageType::TOS;
718
0
        break;
719
0
    default:
720
0
        LOG_FATAL("unknown provider type {}, info {}", param.provider, ret.to_string());
721
0
        __builtin_unreachable();
722
0
    }
723
0
    ret.client_conf.provider = type;
724
0
    return ret;
725
0
}
726
727
13
std::string hide_access_key(const std::string& ak) {
728
13
    std::string key = ak;
729
13
    size_t key_len = key.length();
730
13
    size_t reserved_count;
731
13
    if (key_len > 7) {
732
3
        reserved_count = 6;
733
10
    } else if (key_len > 2) {
734
6
        reserved_count = key_len - 2;
735
6
    } else {
736
4
        reserved_count = 0;
737
4
    }
738
739
13
    size_t x_count = key_len - reserved_count;
740
13
    size_t left_x_count = (x_count + 1) / 2;
741
742
13
    if (left_x_count > 0) {
743
12
        key.replace(0, left_x_count, left_x_count, 'x');
744
12
    }
745
746
13
    if (x_count - left_x_count > 0) {
747
11
        key.replace(key_len - (x_count - left_x_count), x_count - left_x_count,
748
11
                    x_count - left_x_count, 'x');
749
11
    }
750
13
    return key;
751
13
}
752
753
} // end namespace doris