Coverage Report

Created: 2026-06-26 03:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/s3_util.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <aws/core/Aws.h>
21
#include <aws/core/client/ClientConfiguration.h>
22
#include <aws/s3/S3Errors.h>
23
#include <bvar/bvar.h>
24
#include <fmt/format.h>
25
#include <gen_cpp/AgentService_types.h>
26
#include <gen_cpp/cloud.pb.h>
27
28
#include <functional>
29
#include <map>
30
#include <memory>
31
#include <mutex>
32
#include <string>
33
#include <unordered_map>
34
35
#include "common/status.h"
36
#include "core/string_ref.h"
37
#include "cpp/aws_common.h"
38
#include "cpp/token_bucket_rate_limiter.h"
39
#include "io/fs/obj_storage_client.h"
40
41
namespace Aws::S3 {
42
class S3Client;
43
} // namespace Aws::S3
44
45
namespace bvar {
46
template <typename T>
47
class Adder;
48
}
49
50
namespace doris {
51
52
namespace s3_bvar {
53
extern bvar::LatencyRecorder s3_get_latency;
54
extern bvar::LatencyRecorder s3_put_latency;
55
extern bvar::LatencyRecorder s3_delete_object_latency;
56
extern bvar::LatencyRecorder s3_delete_objects_latency;
57
extern bvar::LatencyRecorder s3_head_latency;
58
extern bvar::LatencyRecorder s3_multi_part_upload_latency;
59
extern bvar::LatencyRecorder s3_list_latency;
60
extern bvar::LatencyRecorder s3_list_object_versions_latency;
61
extern bvar::LatencyRecorder s3_get_bucket_version_latency;
62
extern bvar::LatencyRecorder s3_copy_object_latency;
63
}; // namespace s3_bvar
64
65
std::string hide_access_key(const std::string& ak);
66
int reset_s3_rate_limiter(S3RateLimitType type, size_t max_speed, size_t max_burst, size_t limit);
67
// Rebuild the S3 GET/PUT rate limiters if the related configs have changed.
68
// Safe to call periodically; it is a no-op when nothing changed.
69
void check_s3_rate_limiter_config_changed();
70
71
class S3URI;
72
struct S3ClientConf {
73
    std::string endpoint;
74
    std::string region;
75
    std::string ak;
76
    std::string sk;
77
    std::string token;
78
    // For azure we'd better support the bucket at the first time init azure blob container client
79
    std::string bucket;
80
    io::ObjStorageType provider = io::ObjStorageType::AWS;
81
    int max_connections = -1;
82
    int request_timeout_ms = -1;
83
    int connect_timeout_ms = -1;
84
    bool use_virtual_addressing = true;
85
    // For aws s3, no need to override endpoint
86
    bool need_override_endpoint = true;
87
88
    CredProviderType cred_provider_type = CredProviderType::Default;
89
    std::string role_arn;
90
    std::string external_id;
91
92
38.4k
    uint64_t get_hash() const {
93
38.4k
        uint64_t hash_code = 0;
94
        // Use crc32_hash(ak + sk) hash to prevent swapped AK/SK order from producing same result.
95
38.4k
        hash_code ^= crc32_hash(ak + sk);
96
38.4k
        hash_code ^= crc32_hash(token);
97
38.4k
        hash_code ^= crc32_hash(endpoint);
98
38.4k
        hash_code ^= crc32_hash(region);
99
38.4k
        hash_code ^= crc32_hash(bucket);
100
38.4k
        hash_code ^= max_connections;
101
38.4k
        hash_code ^= request_timeout_ms;
102
38.4k
        hash_code ^= connect_timeout_ms;
103
38.4k
        hash_code ^= use_virtual_addressing;
104
38.4k
        hash_code ^= static_cast<int>(provider);
105
106
38.4k
        hash_code ^= static_cast<int>(cred_provider_type);
107
38.4k
        hash_code ^= crc32_hash(role_arn);
108
38.4k
        hash_code ^= crc32_hash(external_id);
109
38.4k
        return hash_code;
110
38.4k
    }
111
112
119
    std::string to_string() const {
113
119
        return fmt::format(
114
119
                "(ak={}, token={}, endpoint={}, region={}, bucket={}, max_connections={}, "
115
119
                "request_timeout_ms={}, connect_timeout_ms={}, use_virtual_addressing={}, "
116
119
                "cred_provider_type={},role_arn={}, external_id={}",
117
119
                hide_access_key(ak), token, endpoint, region, bucket, max_connections,
118
119
                request_timeout_ms, connect_timeout_ms, use_virtual_addressing, cred_provider_type,
119
119
                role_arn, external_id);
120
119
    }
121
};
122
123
struct S3Conf {
124
    std::string bucket;
125
    std::string prefix;
126
    S3ClientConf client_conf;
127
128
    bool sse_enabled = false;
129
    static S3Conf get_s3_conf(const cloud::ObjectStoreInfoPB&);
130
    static S3Conf get_s3_conf(const TS3StorageParam&);
131
132
36
    std::string to_string() const {
133
36
        return fmt::format("(bucket={}, prefix={}, client_conf={}, sse_enabled={})", bucket, prefix,
134
36
                           client_conf.to_string(), sse_enabled);
135
36
    }
136
};
137
138
class S3ClientFactory {
139
public:
140
    ~S3ClientFactory();
141
142
    static S3ClientFactory& instance();
143
144
    std::shared_ptr<io::ObjStorageClient> create(const S3ClientConf& s3_conf);
145
146
    static Status convert_properties_to_s3_conf(const std::map<std::string, std::string>& prop,
147
                                                const S3URI& s3_uri, S3Conf* s3_conf);
148
149
84
    static Aws::Client::ClientConfiguration& getClientConfiguration() {
150
        // The default constructor of ClientConfiguration will do some http call
151
        // such as Aws::Internal::GetEC2MetadataClient and other init operation,
152
        // which is unnecessary.
153
        // So here we use a static instance, and deep copy every time
154
        // to avoid unnecessary operations.
155
84
        static Aws::Client::ClientConfiguration instance;
156
84
        instance.requestTimeoutMs = config::aws_client_request_timeout_ms;
157
84
        return instance;
158
84
    }
159
160
    S3RateLimiterHolder* rate_limiter(S3RateLimitType type);
161
162
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> get_aws_credentials_provider(
163
            const S3ClientConf& s3_conf);
164
165
#ifdef BE_TEST
166
    void set_client_creator_for_test(
167
            std::function<std::shared_ptr<io::ObjStorageClient>(const S3ClientConf&)> creator);
168
169
    void clear_client_creator_for_test();
170
#endif
171
172
private:
173
    std::shared_ptr<io::ObjStorageClient> _create_s3_client(const S3ClientConf& s3_conf);
174
    std::shared_ptr<io::ObjStorageClient> _create_azure_client(const S3ClientConf& s3_conf);
175
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> _get_aws_credentials_provider_v1(
176
            const S3ClientConf& s3_conf);
177
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> _get_aws_credentials_provider_v2(
178
            const S3ClientConf& s3_conf);
179
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> _create_credentials_provider(
180
            CredProviderType type);
181
182
    S3ClientFactory();
183
184
    Aws::SDKOptions _aws_options;
185
    std::mutex _lock;
186
    std::unordered_map<uint64_t, std::shared_ptr<io::ObjStorageClient>> _cache;
187
    std::string _ca_cert_file_path;
188
    std::array<std::unique_ptr<S3RateLimiterHolder>, 2> _rate_limiters;
189
#ifdef BE_TEST
190
    std::function<std::shared_ptr<io::ObjStorageClient>(const S3ClientConf&)> _test_client_creator;
191
#endif
192
};
193
194
} // end namespace doris