Coverage Report

Created: 2025-12-30 16:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/s3_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/s3_util.h"
19
20
#include <aws/core/auth/AWSAuthSigner.h>
21
#include <aws/core/auth/AWSCredentials.h>
22
#include <aws/core/auth/AWSCredentialsProviderChain.h>
23
#include <aws/core/auth/STSCredentialsProvider.h>
24
#include <aws/core/client/DefaultRetryStrategy.h>
25
#include <aws/core/platform/Environment.h>
26
#include <aws/core/utils/logging/LogLevel.h>
27
#include <aws/core/utils/logging/LogSystemInterface.h>
28
#include <aws/core/utils/memory/stl/AWSStringStream.h>
29
#include <aws/identity-management/auth/STSAssumeRoleCredentialsProvider.h>
30
#include <aws/s3/S3Client.h>
31
#include <aws/sts/STSClient.h>
32
#include <bvar/reducer.h>
33
#include <util/string_util.h>
34
35
#include <atomic>
36
#ifdef USE_AZURE
37
#include <azure/core/diagnostics/logger.hpp>
38
#include <azure/storage/blobs/blob_container_client.hpp>
39
#endif
40
#include <cstdlib>
41
#include <filesystem>
42
#include <functional>
43
#include <memory>
44
#include <ostream>
45
#include <utility>
46
47
#include "common/config.h"
48
#include "common/logging.h"
49
#include "common/status.h"
50
#include "cpp/aws_logger.h"
51
#include "cpp/custom_aws_credentials_provider_chain.h"
52
#include "cpp/obj_retry_strategy.h"
53
#include "cpp/sync_point.h"
54
#include "cpp/util.h"
55
#ifdef USE_AZURE
56
#include "io/fs/azure_obj_storage_client.h"
57
#endif
58
#include "io/fs/obj_storage_client.h"
59
#include "io/fs/s3_obj_storage_client.h"
60
#include "runtime/exec_env.h"
61
#include "s3_uri.h"
62
#include "vec/exec/scan/scanner_scheduler.h"
63
64
namespace doris {
65
namespace s3_bvar {
66
bvar::LatencyRecorder s3_get_latency("s3_get");
67
bvar::LatencyRecorder s3_put_latency("s3_put");
68
bvar::LatencyRecorder s3_delete_object_latency("s3_delete_object");
69
bvar::LatencyRecorder s3_delete_objects_latency("s3_delete_objects");
70
bvar::LatencyRecorder s3_head_latency("s3_head");
71
bvar::LatencyRecorder s3_multi_part_upload_latency("s3_multi_part_upload");
72
bvar::LatencyRecorder s3_list_latency("s3_list");
73
bvar::LatencyRecorder s3_list_object_versions_latency("s3_list_object_versions");
74
bvar::LatencyRecorder s3_get_bucket_version_latency("s3_get_bucket_version");
75
bvar::LatencyRecorder s3_copy_object_latency("s3_copy_object");
76
}; // namespace s3_bvar
77
78
namespace {
79
80
6
doris::Status is_s3_conf_valid(const S3ClientConf& conf) {
81
6
    if (conf.endpoint.empty()) {
82
0
        return Status::InvalidArgument<false>("Invalid s3 conf, empty endpoint");
83
0
    }
84
6
    if (conf.region.empty()) {
85
0
        return Status::InvalidArgument<false>("Invalid s3 conf, empty region");
86
0
    }
87
88
6
    if (conf.role_arn.empty()) {
89
        // Allow anonymous access when both ak and sk are empty
90
6
        bool hasAk = !conf.ak.empty();
91
6
        bool hasSk = !conf.sk.empty();
92
93
        // Either both credentials are provided or both are empty (anonymous access)
94
6
        if (hasAk && conf.sk.empty()) {
95
0
            return Status::InvalidArgument<false>("Invalid s3 conf, empty sk");
96
0
        }
97
6
        if (hasSk && conf.ak.empty()) {
98
0
            return Status::InvalidArgument<false>("Invalid s3 conf, empty ak");
99
0
        }
100
6
    }
101
6
    return Status::OK();
102
6
}
103
104
// Return true is convert `str` to int successfully
105
0
bool to_int(std::string_view str, int& res) {
106
0
    auto [_, ec] = std::from_chars(str.data(), str.data() + str.size(), res);
107
0
    return ec == std::errc {};
108
0
}
109
110
constexpr char USE_PATH_STYLE[] = "use_path_style";
111
112
constexpr char AZURE_PROVIDER_STRING[] = "AZURE";
113
constexpr char S3_PROVIDER[] = "provider";
114
constexpr char S3_AK[] = "AWS_ACCESS_KEY";
115
constexpr char S3_SK[] = "AWS_SECRET_KEY";
116
constexpr char S3_ENDPOINT[] = "AWS_ENDPOINT";
117
constexpr char S3_REGION[] = "AWS_REGION";
118
constexpr char S3_TOKEN[] = "AWS_TOKEN";
119
constexpr char S3_MAX_CONN_SIZE[] = "AWS_MAX_CONNECTIONS";
120
constexpr char S3_REQUEST_TIMEOUT_MS[] = "AWS_REQUEST_TIMEOUT_MS";
121
constexpr char S3_CONN_TIMEOUT_MS[] = "AWS_CONNECTION_TIMEOUT_MS";
122
constexpr char S3_NEED_OVERRIDE_ENDPOINT[] = "AWS_NEED_OVERRIDE_ENDPOINT";
123
124
constexpr char S3_ROLE_ARN[] = "AWS_ROLE_ARN";
125
constexpr char S3_EXTERNAL_ID[] = "AWS_EXTERNAL_ID";
126
constexpr char S3_CREDENTIALS_PROVIDER_TYPE[] = "AWS_CREDENTIALS_PROVIDER_TYPE";
127
} // namespace
128
129
bvar::Adder<int64_t> get_rate_limit_ns("get_rate_limit_ns");
130
bvar::Adder<int64_t> get_rate_limit_exceed_req_num("get_rate_limit_exceed_req_num");
131
bvar::Adder<int64_t> put_rate_limit_ns("put_rate_limit_ns");
132
bvar::Adder<int64_t> put_rate_limit_exceed_req_num("put_rate_limit_exceed_req_num");
133
134
0
S3RateLimiterHolder* S3ClientFactory::rate_limiter(S3RateLimitType type) {
135
0
    CHECK(type == S3RateLimitType::GET || type == S3RateLimitType::PUT) << to_string(type);
136
0
    return _rate_limiters[static_cast<size_t>(type)].get();
137
0
}
138
139
0
int reset_s3_rate_limiter(S3RateLimitType type, size_t max_speed, size_t max_burst, size_t limit) {
140
0
    if (type == S3RateLimitType::UNKNOWN) {
141
0
        return -1;
142
0
    }
143
0
    return S3ClientFactory::instance().rate_limiter(type)->reset(max_speed, max_burst, limit);
144
0
}
145
146
1
S3ClientFactory::S3ClientFactory() {
147
1
    _aws_options = Aws::SDKOptions {};
148
1
    auto logLevel = static_cast<Aws::Utils::Logging::LogLevel>(config::aws_log_level);
149
1
    _aws_options.loggingOptions.logLevel = logLevel;
150
1
    _aws_options.loggingOptions.logger_create_fn = [logLevel] {
151
1
        return std::make_shared<DorisAWSLogger>(logLevel);
152
1
    };
153
1
    Aws::InitAPI(_aws_options);
154
1
    _ca_cert_file_path = get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
155
1
    _rate_limiters = {
156
1
            std::make_unique<S3RateLimiterHolder>(
157
1
                    config::s3_get_token_per_second, config::s3_get_bucket_tokens,
158
1
                    config::s3_get_token_limit,
159
1
                    metric_func_factory(get_rate_limit_ns, get_rate_limit_exceed_req_num)),
160
1
            std::make_unique<S3RateLimiterHolder>(
161
1
                    config::s3_put_token_per_second, config::s3_put_bucket_tokens,
162
1
                    config::s3_put_token_limit,
163
1
                    metric_func_factory(put_rate_limit_ns, put_rate_limit_exceed_req_num))};
164
165
1
#ifdef USE_AZURE
166
1
    auto azureLogLevel =
167
1
            static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
168
1
    Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
169
1
    Azure::Core::Diagnostics::Logger::SetListener(
170
1
            [&](Azure::Core::Diagnostics::Logger::Level level, const std::string& message) {
171
0
                switch (level) {
172
0
                case Azure::Core::Diagnostics::Logger::Level::Verbose:
173
0
                    LOG(INFO) << message;
174
0
                    break;
175
0
                case Azure::Core::Diagnostics::Logger::Level::Informational:
176
0
                    LOG(INFO) << message;
177
0
                    break;
178
0
                case Azure::Core::Diagnostics::Logger::Level::Warning:
179
0
                    LOG(WARNING) << message;
180
0
                    break;
181
0
                case Azure::Core::Diagnostics::Logger::Level::Error:
182
0
                    LOG(ERROR) << message;
183
0
                    break;
184
0
                default:
185
0
                    LOG(WARNING) << "Unknown level: " << static_cast<int>(level)
186
0
                                 << ", message: " << message;
187
0
                    break;
188
0
                }
189
0
            });
190
1
#endif
191
1
}
192
193
1
S3ClientFactory::~S3ClientFactory() {
194
1
    Aws::ShutdownAPI(_aws_options);
195
1
}
196
197
8
S3ClientFactory& S3ClientFactory::instance() {
198
8
    static S3ClientFactory ret;
199
8
    return ret;
200
8
}
201
202
6
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::create(const S3ClientConf& s3_conf) {
203
6
    if (!is_s3_conf_valid(s3_conf).ok()) {
204
0
        return nullptr;
205
0
    }
206
207
6
    uint64_t hash = s3_conf.get_hash();
208
6
    {
209
6
        std::lock_guard l(_lock);
210
6
        auto it = _cache.find(hash);
211
6
        if (it != _cache.end()) {
212
3
            return it->second;
213
3
        }
214
6
    }
215
216
3
    auto obj_client = (s3_conf.provider == io::ObjStorageType::AZURE)
217
3
                              ? _create_azure_client(s3_conf)
218
3
                              : _create_s3_client(s3_conf);
219
220
3
    {
221
3
        uint64_t hash = s3_conf.get_hash();
222
3
        std::lock_guard l(_lock);
223
3
        _cache[hash] = obj_client;
224
3
    }
225
3
    return obj_client;
226
6
}
227
228
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::_create_azure_client(
229
0
        const S3ClientConf& s3_conf) {
230
0
#ifdef USE_AZURE
231
0
    auto cred =
232
0
            std::make_shared<Azure::Storage::StorageSharedKeyCredential>(s3_conf.ak, s3_conf.sk);
233
234
0
    const std::string container_name = s3_conf.bucket;
235
0
    std::string uri;
236
0
    if (config::force_azure_blob_global_endpoint) {
237
0
        uri = fmt::format("https://{}.blob.core.windows.net/{}", s3_conf.ak, container_name);
238
0
    } else {
239
0
        uri = fmt::format("{}/{}", s3_conf.endpoint, container_name);
240
0
        if (s3_conf.endpoint.find("://") == std::string::npos) {
241
0
            uri = "https://" + uri;
242
0
        }
243
0
    }
244
245
0
    Azure::Storage::Blobs::BlobClientOptions options;
246
0
    options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
247
0
    options.Retry.MaxRetries = config::max_s3_client_retry;
248
0
    options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
249
250
0
    std::string normalized_uri = normalize_http_uri(uri);
251
0
    VLOG_DEBUG << "uri:" << uri << ", normalized_uri:" << normalized_uri;
252
253
0
    auto containerClient = std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
254
0
            uri, cred, std::move(options));
255
0
    LOG_INFO("create one azure client with {}", s3_conf.to_string());
256
0
    return std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
257
#else
258
    LOG_FATAL("BE is not compiled with azure support, export BUILD_AZURE=ON before building");
259
    return nullptr;
260
#endif
261
0
}
262
263
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
264
4
S3ClientFactory::_get_aws_credentials_provider_v1(const S3ClientConf& s3_conf) {
265
4
    if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
266
1
        Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
267
1
        DCHECK(!aws_cred.IsExpiredOrEmpty());
268
1
        if (!s3_conf.token.empty()) {
269
0
            aws_cred.SetSessionToken(s3_conf.token);
270
0
        }
271
1
        return std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
272
1
    }
273
274
3
    if (s3_conf.cred_provider_type == CredProviderType::InstanceProfile) {
275
2
        if (s3_conf.role_arn.empty()) {
276
1
            return std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
277
1
        }
278
279
1
        Aws::Client::ClientConfiguration clientConfiguration =
280
1
                S3ClientFactory::getClientConfiguration();
281
282
1
        if (_ca_cert_file_path.empty()) {
283
0
            _ca_cert_file_path =
284
0
                    get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
285
0
        }
286
1
        if (!_ca_cert_file_path.empty()) {
287
1
            clientConfiguration.caFile = _ca_cert_file_path;
288
1
        }
289
290
1
        auto stsClient = std::make_shared<Aws::STS::STSClient>(
291
1
                std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>(),
292
1
                clientConfiguration);
293
294
1
        return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
295
1
                s3_conf.role_arn, Aws::String(), s3_conf.external_id,
296
1
                Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
297
2
    }
298
299
    // Support anonymous access for public datasets when no credentials are provided
300
1
    if (s3_conf.ak.empty() && s3_conf.sk.empty()) {
301
1
        return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
302
1
    }
303
304
0
    return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
305
1
}
306
307
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> S3ClientFactory::_create_credentials_provider(
308
3
        CredProviderType type) {
309
3
    switch (type) {
310
0
    case CredProviderType::Env:
311
0
        return std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>();
312
0
    case CredProviderType::SystemProperties:
313
0
        return std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>();
314
0
    case CredProviderType::WebIdentity:
315
0
        return std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>();
316
0
    case CredProviderType::Container:
317
0
        return std::make_shared<Aws::Auth::TaskRoleCredentialsProvider>(
318
0
                Aws::Environment::GetEnv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI").c_str());
319
2
    case CredProviderType::InstanceProfile:
320
2
        return std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>();
321
0
    case CredProviderType::Anonymous:
322
0
        return std::make_shared<Aws::Auth::AnonymousAWSCredentialsProvider>();
323
1
    case CredProviderType::Default:
324
1
    default:
325
1
        return std::make_shared<CustomAwsCredentialsProviderChain>();
326
3
    }
327
3
}
328
329
std::shared_ptr<Aws::Auth::AWSCredentialsProvider>
330
6
S3ClientFactory::_get_aws_credentials_provider_v2(const S3ClientConf& s3_conf) {
331
6
    if (!s3_conf.ak.empty() && !s3_conf.sk.empty()) {
332
3
        Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
333
3
        DCHECK(!aws_cred.IsExpiredOrEmpty());
334
3
        if (!s3_conf.token.empty()) {
335
0
            aws_cred.SetSessionToken(s3_conf.token);
336
0
        }
337
3
        return std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(std::move(aws_cred));
338
3
    }
339
340
    // Handle role_arn for assume role scenario
341
3
    if (!s3_conf.role_arn.empty()) {
342
1
        Aws::Client::ClientConfiguration clientConfiguration =
343
1
                S3ClientFactory::getClientConfiguration();
344
345
1
        if (_ca_cert_file_path.empty()) {
346
0
            _ca_cert_file_path =
347
0
                    get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
348
0
        }
349
1
        if (!_ca_cert_file_path.empty()) {
350
1
            clientConfiguration.caFile = _ca_cert_file_path;
351
1
        }
352
353
1
        auto baseProvider = _create_credentials_provider(s3_conf.cred_provider_type);
354
1
        auto stsClient = std::make_shared<Aws::STS::STSClient>(baseProvider, clientConfiguration);
355
356
1
        return std::make_shared<Aws::Auth::STSAssumeRoleCredentialsProvider>(
357
1
                s3_conf.role_arn, Aws::String(), s3_conf.external_id,
358
1
                Aws::Auth::DEFAULT_CREDS_LOAD_FREQ_SECONDS, stsClient);
359
1
    }
360
361
    // Return provider based on cred_provider_type
362
2
    return _create_credentials_provider(s3_conf.cred_provider_type);
363
3
}
364
365
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> S3ClientFactory::get_aws_credentials_provider(
366
10
        const S3ClientConf& s3_conf) {
367
10
    if (config::aws_credentials_provider_version == "v2") {
368
6
        return _get_aws_credentials_provider_v2(s3_conf);
369
6
    }
370
4
    return _get_aws_credentials_provider_v1(s3_conf);
371
10
}
372
373
std::shared_ptr<io::ObjStorageClient> S3ClientFactory::_create_s3_client(
374
3
        const S3ClientConf& s3_conf) {
375
3
    TEST_SYNC_POINT_RETURN_WITH_VALUE(
376
2
            "s3_client_factory::create",
377
2
            std::make_shared<io::S3ObjStorageClient>(std::make_shared<Aws::S3::S3Client>()));
378
2
    Aws::Client::ClientConfiguration aws_config = S3ClientFactory::getClientConfiguration();
379
2
    if (s3_conf.need_override_endpoint) {
380
2
        aws_config.endpointOverride = s3_conf.endpoint;
381
2
    }
382
2
    aws_config.region = s3_conf.region;
383
384
2
    if (_ca_cert_file_path.empty()) {
385
0
        _ca_cert_file_path = get_valid_ca_cert_path(doris::split(config::ca_cert_file_paths, ";"));
386
0
    }
387
388
2
    if (!_ca_cert_file_path.empty()) {
389
2
        aws_config.caFile = _ca_cert_file_path;
390
2
    }
391
392
2
    if (s3_conf.max_connections > 0) {
393
0
        aws_config.maxConnections = s3_conf.max_connections;
394
2
    } else {
395
2
        aws_config.maxConnections = 102400;
396
2
    }
397
398
2
    aws_config.requestTimeoutMs = 30000;
399
2
    if (s3_conf.request_timeout_ms > 0) {
400
0
        aws_config.requestTimeoutMs = s3_conf.request_timeout_ms;
401
0
    }
402
403
2
    if (s3_conf.connect_timeout_ms > 0) {
404
0
        aws_config.connectTimeoutMs = s3_conf.connect_timeout_ms;
405
0
    }
406
407
2
    if (config::s3_client_http_scheme == "http") {
408
2
        aws_config.scheme = Aws::Http::Scheme::HTTP;
409
2
    }
410
411
2
    aws_config.retryStrategy = std::make_shared<S3CustomRetryStrategy>(
412
2
            config::max_s3_client_retry /*scaleFactor = 25*/);
413
414
2
    std::shared_ptr<Aws::S3::S3Client> new_client = std::make_shared<Aws::S3::S3Client>(
415
2
            get_aws_credentials_provider(s3_conf), std::move(aws_config),
416
2
            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
417
2
            s3_conf.use_virtual_addressing);
418
419
2
    auto obj_client = std::make_shared<io::S3ObjStorageClient>(std::move(new_client));
420
2
    LOG_INFO("create one s3 client with {}", s3_conf.to_string());
421
2
    return obj_client;
422
3
}
423
424
Status S3ClientFactory::convert_properties_to_s3_conf(
425
0
        const std::map<std::string, std::string>& prop, const S3URI& s3_uri, S3Conf* s3_conf) {
426
0
    StringCaseMap<std::string> properties(prop.begin(), prop.end());
427
0
    if (auto it = properties.find(S3_AK); it != properties.end()) {
428
0
        s3_conf->client_conf.ak = it->second;
429
0
    }
430
0
    if (auto it = properties.find(S3_SK); it != properties.end()) {
431
0
        s3_conf->client_conf.sk = it->second;
432
0
    }
433
0
    if (auto it = properties.find(S3_TOKEN); it != properties.end()) {
434
0
        s3_conf->client_conf.token = it->second;
435
0
    }
436
0
    if (auto it = properties.find(S3_ENDPOINT); it != properties.end()) {
437
0
        s3_conf->client_conf.endpoint = it->second;
438
0
    }
439
0
    if (auto it = properties.find(S3_NEED_OVERRIDE_ENDPOINT); it != properties.end()) {
440
0
        s3_conf->client_conf.need_override_endpoint = (it->second == "true");
441
0
    }
442
0
    if (auto it = properties.find(S3_REGION); it != properties.end()) {
443
0
        s3_conf->client_conf.region = it->second;
444
0
    }
445
0
    if (auto it = properties.find(S3_MAX_CONN_SIZE); it != properties.end()) {
446
0
        if (!to_int(it->second, s3_conf->client_conf.max_connections)) {
447
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_MAX_CONN_SIZE, it->second);
448
0
        }
449
0
    }
450
0
    if (auto it = properties.find(S3_REQUEST_TIMEOUT_MS); it != properties.end()) {
451
0
        if (!to_int(it->second, s3_conf->client_conf.request_timeout_ms)) {
452
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_REQUEST_TIMEOUT_MS,
453
0
                                           it->second);
454
0
        }
455
0
    }
456
0
    if (auto it = properties.find(S3_CONN_TIMEOUT_MS); it != properties.end()) {
457
0
        if (!to_int(it->second, s3_conf->client_conf.connect_timeout_ms)) {
458
0
            return Status::InvalidArgument("invalid {} value \"{}\"", S3_CONN_TIMEOUT_MS,
459
0
                                           it->second);
460
0
        }
461
0
    }
462
0
    if (auto it = properties.find(S3_PROVIDER); it != properties.end()) {
463
        // S3 Provider properties should be case insensitive.
464
0
        if (0 == strcasecmp(it->second.c_str(), AZURE_PROVIDER_STRING)) {
465
0
            s3_conf->client_conf.provider = io::ObjStorageType::AZURE;
466
0
        }
467
0
    }
468
469
0
    if (s3_uri.get_bucket().empty()) {
470
0
        return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified",
471
0
                                       s3_uri.to_string());
472
0
    }
473
0
    s3_conf->bucket = s3_uri.get_bucket();
474
    // For azure's compatibility
475
0
    s3_conf->client_conf.bucket = s3_uri.get_bucket();
476
0
    s3_conf->prefix = "";
477
478
    // See https://sdk.amazonaws.com/cpp/api/LATEST/class_aws_1_1_s3_1_1_s3_client.html
479
0
    s3_conf->client_conf.use_virtual_addressing = true;
480
0
    if (auto it = properties.find(USE_PATH_STYLE); it != properties.end()) {
481
0
        s3_conf->client_conf.use_virtual_addressing = it->second != "true";
482
0
    }
483
484
0
    if (auto it = properties.find(S3_ROLE_ARN); it != properties.end()) {
485
0
        s3_conf->client_conf.cred_provider_type = CredProviderType::InstanceProfile;
486
0
        s3_conf->client_conf.role_arn = it->second;
487
0
    }
488
489
0
    if (auto it = properties.find(S3_EXTERNAL_ID); it != properties.end()) {
490
0
        s3_conf->client_conf.external_id = it->second;
491
0
    }
492
493
0
    if (auto it = properties.find(S3_CREDENTIALS_PROVIDER_TYPE); it != properties.end()) {
494
0
        s3_conf->client_conf.cred_provider_type = cred_provider_type_from_string(it->second);
495
0
    }
496
497
0
    if (auto st = is_s3_conf_valid(s3_conf->client_conf); !st.ok()) {
498
0
        return st;
499
0
    }
500
0
    return Status::OK();
501
0
}
502
503
0
static CredProviderType cred_provider_type_from_thrift(TCredProviderType::type cred_provider_type) {
504
0
    switch (cred_provider_type) {
505
0
    case TCredProviderType::DEFAULT:
506
0
        return CredProviderType::Default;
507
0
    case TCredProviderType::SIMPLE:
508
0
        return CredProviderType::Simple;
509
0
    case TCredProviderType::INSTANCE_PROFILE:
510
0
        return CredProviderType::InstanceProfile;
511
0
    default:
512
0
        __builtin_unreachable();
513
0
        LOG(WARNING) << "Invalid TCredProviderType value: " << cred_provider_type
514
0
                     << ", use default instead.";
515
0
        return CredProviderType::Default;
516
0
    }
517
0
}
518
519
0
S3Conf S3Conf::get_s3_conf(const cloud::ObjectStoreInfoPB& info) {
520
0
    S3Conf ret {
521
0
            .bucket = info.bucket(),
522
0
            .prefix = info.prefix(),
523
0
            .client_conf {
524
0
                    .endpoint = info.endpoint(),
525
0
                    .region = info.region(),
526
0
                    .ak = info.ak(),
527
0
                    .sk = info.sk(),
528
0
                    .token {},
529
0
                    .bucket = info.bucket(),
530
0
                    .provider = io::ObjStorageType::AWS,
531
0
                    .use_virtual_addressing =
532
0
                            info.has_use_path_style() ? !info.use_path_style() : true,
533
534
0
                    .role_arn = info.role_arn(),
535
0
                    .external_id = info.external_id(),
536
0
            },
537
0
            .sse_enabled = info.sse_enabled(),
538
0
    };
539
540
0
    if (info.has_cred_provider_type()) {
541
0
        ret.client_conf.cred_provider_type = cred_provider_type_from_pb(info.cred_provider_type());
542
0
    }
543
544
0
    io::ObjStorageType type = io::ObjStorageType::AWS;
545
0
    switch (info.provider()) {
546
0
    case cloud::ObjectStoreInfoPB_Provider_OSS:
547
0
        type = io::ObjStorageType::OSS;
548
0
        break;
549
0
    case cloud::ObjectStoreInfoPB_Provider_S3:
550
0
        type = io::ObjStorageType::AWS;
551
0
        break;
552
0
    case cloud::ObjectStoreInfoPB_Provider_COS:
553
0
        type = io::ObjStorageType::COS;
554
0
        break;
555
0
    case cloud::ObjectStoreInfoPB_Provider_OBS:
556
0
        type = io::ObjStorageType::OBS;
557
0
        break;
558
0
    case cloud::ObjectStoreInfoPB_Provider_BOS:
559
0
        type = io::ObjStorageType::BOS;
560
0
        break;
561
0
    case cloud::ObjectStoreInfoPB_Provider_GCP:
562
0
        type = io::ObjStorageType::GCP;
563
0
        break;
564
0
    case cloud::ObjectStoreInfoPB_Provider_AZURE:
565
0
        type = io::ObjStorageType::AZURE;
566
0
        break;
567
0
    case cloud::ObjectStoreInfoPB_Provider_TOS:
568
0
        type = io::ObjStorageType::TOS;
569
0
        break;
570
0
    default:
571
0
        __builtin_unreachable();
572
0
        LOG_FATAL("unknown provider type {}, info {}", info.provider(), ret.to_string());
573
0
    }
574
0
    ret.client_conf.provider = type;
575
0
    return ret;
576
0
}
577
578
0
S3Conf S3Conf::get_s3_conf(const TS3StorageParam& param) {
579
0
    S3Conf ret {
580
0
            .bucket = param.bucket,
581
0
            .prefix = param.root_path,
582
0
            .client_conf = {
583
0
                    .endpoint = param.endpoint,
584
0
                    .region = param.region,
585
0
                    .ak = param.ak,
586
0
                    .sk = param.sk,
587
0
                    .token = param.token,
588
0
                    .bucket = param.bucket,
589
0
                    .provider = io::ObjStorageType::AWS,
590
0
                    .max_connections = param.max_conn,
591
0
                    .request_timeout_ms = param.request_timeout_ms,
592
0
                    .connect_timeout_ms = param.conn_timeout_ms,
593
                    // When using cold heat separation in minio, user might use ip address directly,
594
                    // which needs enable use_virtual_addressing to true
595
0
                    .use_virtual_addressing = !param.use_path_style,
596
0
                    .role_arn = param.role_arn,
597
0
                    .external_id = param.external_id,
598
0
            }};
599
600
0
    if (param.__isset.cred_provider_type) {
601
0
        ret.client_conf.cred_provider_type =
602
0
                cred_provider_type_from_thrift(param.cred_provider_type);
603
0
    }
604
605
0
    io::ObjStorageType type = io::ObjStorageType::AWS;
606
0
    switch (param.provider) {
607
0
    case TObjStorageType::UNKNOWN:
608
0
        LOG_INFO("Receive one legal storage resource, set provider type to aws, param detail {}",
609
0
                 ret.to_string());
610
0
        type = io::ObjStorageType::AWS;
611
0
        break;
612
0
    case TObjStorageType::AWS:
613
0
        type = io::ObjStorageType::AWS;
614
0
        break;
615
0
    case TObjStorageType::AZURE:
616
0
        type = io::ObjStorageType::AZURE;
617
0
        break;
618
0
    case TObjStorageType::BOS:
619
0
        type = io::ObjStorageType::BOS;
620
0
        break;
621
0
    case TObjStorageType::COS:
622
0
        type = io::ObjStorageType::COS;
623
0
        break;
624
0
    case TObjStorageType::OBS:
625
0
        type = io::ObjStorageType::OBS;
626
0
        break;
627
0
    case TObjStorageType::OSS:
628
0
        type = io::ObjStorageType::OSS;
629
0
        break;
630
0
    case TObjStorageType::GCP:
631
0
        type = io::ObjStorageType::GCP;
632
0
        break;
633
0
    case TObjStorageType::TOS:
634
0
        type = io::ObjStorageType::TOS;
635
0
        break;
636
0
    default:
637
0
        LOG_FATAL("unknown provider type {}, info {}", param.provider, ret.to_string());
638
0
        __builtin_unreachable();
639
0
    }
640
0
    ret.client_conf.provider = type;
641
0
    return ret;
642
0
}
643
644
13
std::string hide_access_key(const std::string& ak) {
645
13
    std::string key = ak;
646
13
    size_t key_len = key.length();
647
13
    size_t reserved_count;
648
13
    if (key_len > 7) {
649
3
        reserved_count = 6;
650
10
    } else if (key_len > 2) {
651
6
        reserved_count = key_len - 2;
652
6
    } else {
653
4
        reserved_count = 0;
654
4
    }
655
656
13
    size_t x_count = key_len - reserved_count;
657
13
    size_t left_x_count = (x_count + 1) / 2;
658
659
13
    if (left_x_count > 0) {
660
12
        key.replace(0, left_x_count, left_x_count, 'x');
661
12
    }
662
663
13
    if (x_count - left_x_count > 0) {
664
11
        key.replace(key_len - (x_count - left_x_count), x_count - left_x_count,
665
11
                    x_count - left_x_count, 'x');
666
11
    }
667
13
    return key;
668
13
}
669
670
} // end namespace doris