Coverage Report

Created: 2025-11-21 14:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/s3_util.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <aws/core/Aws.h>
21
#include <aws/core/client/ClientConfiguration.h>
22
#include <aws/s3/S3Errors.h>
23
#include <bvar/bvar.h>
24
#include <fmt/format.h>
25
#include <gen_cpp/AgentService_types.h>
26
#include <gen_cpp/cloud.pb.h>
27
28
#include <map>
29
#include <memory>
30
#include <mutex>
31
#include <string>
32
#include <unordered_map>
33
34
#include "common/status.h"
35
#include "cpp/aws_common.h"
36
#include "cpp/s3_rate_limiter.h"
37
#include "io/fs/obj_storage_client.h"
38
#include "vec/common/string_ref.h"
39
40
namespace Aws::S3 {
41
class S3Client;
42
} // namespace Aws::S3
43
44
namespace bvar {
45
template <typename T>
46
class Adder;
47
}
48
49
namespace doris {
50
51
namespace s3_bvar {
52
extern bvar::LatencyRecorder s3_get_latency;
53
extern bvar::LatencyRecorder s3_put_latency;
54
extern bvar::LatencyRecorder s3_delete_object_latency;
55
extern bvar::LatencyRecorder s3_delete_objects_latency;
56
extern bvar::LatencyRecorder s3_head_latency;
57
extern bvar::LatencyRecorder s3_multi_part_upload_latency;
58
extern bvar::LatencyRecorder s3_list_latency;
59
extern bvar::LatencyRecorder s3_list_object_versions_latency;
60
extern bvar::LatencyRecorder s3_get_bucket_version_latency;
61
extern bvar::LatencyRecorder s3_copy_object_latency;
62
}; // namespace s3_bvar
63
64
std::string hide_access_key(const std::string& ak);
65
int reset_s3_rate_limiter(S3RateLimitType type, size_t max_speed, size_t max_burst, size_t limit);
66
67
class S3URI;
68
struct S3ClientConf {
69
    std::string endpoint;
70
    std::string region;
71
    std::string ak;
72
    std::string sk;
73
    std::string token;
74
    // For azure we'd better support the bucket at the first time init azure blob container client
75
    std::string bucket;
76
    io::ObjStorageType provider = io::ObjStorageType::AWS;
77
    int max_connections = -1;
78
    int request_timeout_ms = -1;
79
    int connect_timeout_ms = -1;
80
    bool use_virtual_addressing = true;
81
    // For aws s3, no need to override endpoint
82
    bool need_override_endpoint = true;
83
84
    CredProviderType cred_provider_type = CredProviderType::Default;
85
    std::string role_arn;
86
    std::string external_id;
87
88
8
    uint64_t get_hash() const {
89
8
        uint64_t hash_code = 0;
90
8
        hash_code ^= crc32_hash(ak);
91
8
        hash_code ^= crc32_hash(sk);
92
8
        hash_code ^= crc32_hash(token);
93
8
        hash_code ^= crc32_hash(endpoint);
94
8
        hash_code ^= crc32_hash(region);
95
8
        hash_code ^= crc32_hash(bucket);
96
8
        hash_code ^= max_connections;
97
8
        hash_code ^= request_timeout_ms;
98
8
        hash_code ^= connect_timeout_ms;
99
8
        hash_code ^= use_virtual_addressing;
100
8
        hash_code ^= static_cast<int>(provider);
101
102
8
        hash_code ^= static_cast<int>(cred_provider_type);
103
8
        hash_code ^= crc32_hash(role_arn);
104
8
        hash_code ^= crc32_hash(external_id);
105
8
        return hash_code;
106
8
    }
107
108
3
    std::string to_string() const {
109
3
        return fmt::format(
110
3
                "(ak={}, token={}, endpoint={}, region={}, bucket={}, max_connections={}, "
111
3
                "request_timeout_ms={}, connect_timeout_ms={}, use_virtual_addressing={}, "
112
3
                "cred_provider_type={},role_arn={}, external_id={}",
113
3
                hide_access_key(ak), token, endpoint, region, bucket, max_connections,
114
3
                request_timeout_ms, connect_timeout_ms, use_virtual_addressing, cred_provider_type,
115
3
                role_arn, external_id);
116
3
    }
117
};
118
119
struct S3Conf {
120
    std::string bucket;
121
    std::string prefix;
122
    S3ClientConf client_conf;
123
124
    bool sse_enabled = false;
125
    static S3Conf get_s3_conf(const cloud::ObjectStoreInfoPB&);
126
    static S3Conf get_s3_conf(const TS3StorageParam&);
127
128
1
    std::string to_string() const {
129
1
        return fmt::format("(bucket={}, prefix={}, client_conf={}, sse_enabled={})", bucket, prefix,
130
1
                           client_conf.to_string(), sse_enabled);
131
1
    }
132
};
133
134
class S3ClientFactory {
135
public:
136
    ~S3ClientFactory();
137
138
    static S3ClientFactory& instance();
139
140
    std::shared_ptr<io::ObjStorageClient> create(const S3ClientConf& s3_conf);
141
142
    static Status convert_properties_to_s3_conf(const std::map<std::string, std::string>& prop,
143
                                                const S3URI& s3_uri, S3Conf* s3_conf);
144
145
4
    static Aws::Client::ClientConfiguration& getClientConfiguration() {
146
        // The default constructor of ClientConfiguration will do some http call
147
        // such as Aws::Internal::GetEC2MetadataClient and other init operation,
148
        // which is unnecessary.
149
        // So here we use a static instance, and deep copy every time
150
        // to avoid unnecessary operations.
151
4
        static Aws::Client::ClientConfiguration instance;
152
4
        return instance;
153
4
    }
154
155
    S3RateLimiterHolder* rate_limiter(S3RateLimitType type);
156
157
private:
158
    std::shared_ptr<io::ObjStorageClient> _create_s3_client(const S3ClientConf& s3_conf);
159
    std::shared_ptr<io::ObjStorageClient> _create_azure_client(const S3ClientConf& s3_conf);
160
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> _get_aws_credentials_provider_v1(
161
            const S3ClientConf& s3_conf);
162
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> _get_aws_credentials_provider_v2(
163
            const S3ClientConf& s3_conf);
164
    std::shared_ptr<Aws::Auth::AWSCredentialsProvider> get_aws_credentials_provider(
165
            const S3ClientConf& s3_conf);
166
167
    S3ClientFactory();
168
169
    Aws::SDKOptions _aws_options;
170
    std::mutex _lock;
171
    std::unordered_map<uint64_t, std::shared_ptr<io::ObjStorageClient>> _cache;
172
    std::string _ca_cert_file_path;
173
    std::array<std::unique_ptr<S3RateLimiterHolder>, 2> _rate_limiters;
174
};
175
176
} // end namespace doris