Coverage Report

Created: 2026-03-12 17:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/io/fs/s3_file_system.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "io/fs/s3_file_system.h"
19
20
#include <fmt/format.h>
21
22
#include <cstddef>
23
24
#include "common/compiler_util.h" // IWYU pragma: keep
25
// IWYU pragma: no_include <bits/chrono.h>
26
#include <aws/core/utils/threading/Executor.h>
27
#include <aws/s3/S3Client.h>
28
29
#include <chrono> // IWYU pragma: keep
30
#include <filesystem>
31
#include <fstream> // IWYU pragma: keep
32
#include <future>
33
#include <memory>
34
35
#include "common/config.h"
36
#include "common/logging.h"
37
#include "common/status.h"
38
#include "io/fs/err_utils.h"
39
#include "io/fs/file_system.h"
40
#include "io/fs/file_writer.h"
41
#include "io/fs/local_file_system.h"
42
#include "io/fs/remote_file_system.h"
43
#include "io/fs/s3_common.h"
44
#include "io/fs/s3_file_reader.h"
45
#include "io/fs/s3_file_writer.h"
46
#include "io/fs/s3_obj_storage_client.h"
47
#include "runtime/exec_env.h"
48
#include "runtime/thread_context.h"
49
#include "util/s3_uri.h"
50
#include "util/s3_util.h"
51
52
namespace doris::io {
53
namespace {
54
constexpr std::string_view OSS_PRIVATE_ENDPOINT_SUFFIX = "-internal.aliyuncs.com";
55
constexpr int LEN_OF_OSS_PRIVATE_SUFFIX = 9; // length of "-internal"
56
57
#ifndef CHECK_S3_CLIENT
58
#define CHECK_S3_CLIENT(client)                                 \
59
4.62k
    if (!client) {                                              \
60
0
        return Status::InvalidArgument("init s3 client error"); \
61
0
    }
62
#endif
63
64
4.91k
Result<std::string> get_key(const Path& full_path) {
65
    // FIXME(plat1ko): Check bucket in full path and support relative path
66
4.91k
    S3URI uri(full_path.native());
67
4.91k
    RETURN_IF_ERROR_RESULT(uri.parse());
68
4.91k
    return uri.get_key();
69
4.91k
}
70
71
} // namespace
72
73
34.4k
ObjClientHolder::ObjClientHolder(S3ClientConf conf) : _conf(std::move(conf)) {}
74
75
34.4k
ObjClientHolder::~ObjClientHolder() = default;
76
77
34.3k
Status ObjClientHolder::init() {
78
34.3k
    _client = S3ClientFactory::instance().create(_conf);
79
34.3k
    if (!_client) {
80
7
        return Status::InvalidArgument("failed to init s3 client with conf {}", _conf.to_string());
81
7
    }
82
83
34.3k
    return Status::OK();
84
34.3k
}
85
86
0
Status ObjClientHolder::reset(const S3ClientConf& conf) {
87
0
    S3ClientConf reset_conf;
88
0
    {
89
0
        std::shared_lock lock(_mtx);
90
0
        if (conf.get_hash() == _conf.get_hash()) {
91
0
            return Status::OK(); // Same conf
92
0
        }
93
94
0
        reset_conf = _conf;
95
0
        reset_conf.ak = conf.ak;
96
0
        reset_conf.sk = conf.sk;
97
0
        reset_conf.token = conf.token;
98
0
        reset_conf.bucket = conf.bucket;
99
0
        reset_conf.connect_timeout_ms = conf.connect_timeout_ms;
100
0
        reset_conf.max_connections = conf.max_connections;
101
0
        reset_conf.request_timeout_ms = conf.request_timeout_ms;
102
0
        reset_conf.use_virtual_addressing = conf.use_virtual_addressing;
103
104
0
        reset_conf.role_arn = conf.role_arn;
105
0
        reset_conf.external_id = conf.external_id;
106
0
        reset_conf.cred_provider_type = conf.cred_provider_type;
107
        // Should check endpoint here?
108
0
    }
109
110
0
    auto client = S3ClientFactory::instance().create(reset_conf);
111
0
    if (!client) {
112
0
        return Status::InvalidArgument("failed to init s3 client with conf {}", conf.to_string());
113
0
    }
114
115
0
    LOG(WARNING) << "reset s3 client with new conf: " << conf.to_string();
116
117
0
    {
118
0
        std::lock_guard lock(_mtx);
119
0
        _client = std::move(client);
120
0
        _conf = std::move(reset_conf);
121
0
    }
122
123
0
    return Status::OK();
124
0
}
125
126
Result<int64_t> ObjClientHolder::object_file_size(const std::string& bucket,
127
5.66k
                                                  const std::string& key) const {
128
5.66k
    auto client = get();
129
5.66k
    if (!client) {
130
0
        return ResultError(Status::InvalidArgument("init s3 client error"));
131
0
    }
132
133
5.66k
    auto resp = client->head_object({
134
5.66k
            .bucket = bucket,
135
5.66k
            .key = key,
136
5.66k
    });
137
138
5.66k
    if (resp.resp.status.code != ErrorCode::OK) {
139
3
        return ResultError(std::move(Status(resp.resp.status.code, std::move(resp.resp.status.msg))
140
3
                                             .append(fmt::format("failed to head s3 file {}",
141
3
                                                                 full_s3_path(bucket, key)))));
142
3
    }
143
144
5.66k
    return resp.file_size;
145
5.66k
}
146
147
173
std::string ObjClientHolder::full_s3_path(std::string_view bucket, std::string_view key) const {
148
173
    return fmt::format("{}/{}/{}", _conf.endpoint, bucket, key);
149
173
}
150
151
170
std::string S3FileSystem::full_s3_path(std::string_view key) const {
152
170
    return _client->full_s3_path(_bucket, key);
153
170
}
154
155
1.73k
Result<std::shared_ptr<S3FileSystem>> S3FileSystem::create(S3Conf s3_conf, std::string id) {
156
1.73k
    std::shared_ptr<S3FileSystem> fs(new S3FileSystem(std::move(s3_conf), std::move(id)));
157
1.73k
    RETURN_IF_ERROR_RESULT(fs->init());
158
1.72k
    return fs;
159
1.73k
}
160
161
S3FileSystem::S3FileSystem(S3Conf s3_conf, std::string id)
162
1.73k
        : RemoteFileSystem(s3_conf.prefix, std::move(id), FileSystemType::S3),
163
1.73k
          _bucket(std::move(s3_conf.bucket)),
164
1.73k
          _prefix(std::move(s3_conf.prefix)),
165
1.73k
          _client(std::make_shared<ObjClientHolder>(std::move(s3_conf.client_conf))) {
166
    // FIXME(plat1ko): Normalize prefix
167
    // remove the first and last '/'
168
1.73k
    if (!_prefix.empty()) {
169
36
        size_t start = _prefix.find_first_not_of('/');
170
36
        if (start == std::string::npos) {
171
0
            _prefix = "";
172
36
        } else {
173
36
            size_t end = _prefix.find_last_not_of('/');
174
36
            if (start > 0 || end < _prefix.size() - 1) {
175
0
                _prefix = _prefix.substr(start, end - start + 1);
176
0
            }
177
36
        }
178
36
    }
179
1.73k
}
180
181
1.73k
Status S3FileSystem::init() {
182
1.73k
    return _client->init();
183
1.73k
}
184
185
1.71k
S3FileSystem::~S3FileSystem() = default;
186
187
Status S3FileSystem::create_file_impl(const Path& file, FileWriterPtr* writer,
188
3.90k
                                      const FileWriterOptions* opts) {
189
3.90k
    auto client = _client->get();
190
3.90k
    CHECK_S3_CLIENT(client);
191
3.90k
    auto key = DORIS_TRY(get_key(file));
192
3.90k
    *writer = std::make_unique<S3FileWriter>(_client, _bucket, std::move(key), opts);
193
3.90k
    return Status::OK();
194
3.90k
}
195
196
Status S3FileSystem::open_file_internal(const Path& file, FileReaderSPtr* reader,
197
69
                                        const FileReaderOptions& opts) {
198
69
    auto key = DORIS_TRY(get_key(file));
199
69
    *reader = DORIS_TRY(S3FileReader::create(_client, _bucket, key, opts.file_size, nullptr));
200
66
    return Status::OK();
201
69
}
202
203
0
Status S3FileSystem::create_directory_impl(const Path& dir, bool failed_if_exists) {
204
0
    return Status::OK();
205
0
}
206
207
0
Status S3FileSystem::delete_file_impl(const Path& file) {
208
0
    auto client = _client->get();
209
0
    CHECK_S3_CLIENT(client);
210
211
0
    auto key = DORIS_TRY(get_key(file));
212
213
0
    auto resp = client->delete_object({.bucket = _bucket, .key = key});
214
215
0
    if (resp.status.code == ErrorCode::OK || resp.status.code == ErrorCode::NOT_FOUND) {
216
0
        return Status::OK();
217
0
    }
218
0
    return std::move(Status(resp.status.code, std::move(resp.status.msg))
219
0
                             .append(fmt::format("failed to delete file {}", full_s3_path(key))));
220
0
}
221
222
0
Status S3FileSystem::delete_directory_impl(const Path& dir) {
223
0
    auto client = _client->get();
224
0
    CHECK_S3_CLIENT(client);
225
226
0
    auto prefix = DORIS_TRY(get_key(dir));
227
0
    if (!prefix.empty() && prefix.back() != '/') {
228
0
        prefix.push_back('/');
229
0
    }
230
231
0
    auto resp = client->delete_objects_recursively({
232
0
            .path = full_s3_path(prefix),
233
0
            .bucket = _bucket,
234
0
            .prefix = prefix,
235
0
    });
236
0
    return {resp.status.code, std::move(resp.status.msg)};
237
0
}
238
239
0
Status S3FileSystem::batch_delete_impl(const std::vector<Path>& remote_files) {
240
0
    auto client = _client->get();
241
0
    CHECK_S3_CLIENT(client);
242
243
    // `DeleteObjectsRequest` can only contain 1000 keys at most.
244
0
    constexpr size_t max_delete_batch = 1000;
245
0
    auto path_iter = remote_files.begin();
246
247
0
    do {
248
0
        std::vector<std::string> objects;
249
0
        auto path_begin = path_iter;
250
0
        for (; path_iter != remote_files.end() && (path_iter - path_begin < max_delete_batch);
251
0
             ++path_iter) {
252
0
            auto key = DORIS_TRY(get_key(*path_iter));
253
0
            objects.emplace_back(std::move(key));
254
0
        }
255
0
        if (objects.empty()) {
256
0
            return Status::OK();
257
0
        }
258
        // clang-format off
259
0
        if (auto resp = client->delete_objects( {.bucket = _bucket,}, std::move(objects)); resp.status.code != ErrorCode::OK) {
260
0
            return {resp.status.code, std::move(resp.status.msg)};
261
0
        }
262
        // clang-format on
263
0
    } while (path_iter != remote_files.end());
264
265
0
    return Status::OK();
266
0
}
267
268
4
Status S3FileSystem::exists_impl(const Path& path, bool* res) const {
269
4
    auto client = _client->get();
270
4
    CHECK_S3_CLIENT(client);
271
4
    auto key = DORIS_TRY(get_key(path));
272
273
4
    VLOG_DEBUG << "key:" << key << " path:" << path;
274
275
4
    auto resp = client->head_object({.bucket = _bucket, .key = key});
276
277
4
    if (resp.resp.status.code == ErrorCode::OK) {
278
0
        *res = true;
279
4
    } else if (resp.resp.status.code == ErrorCode::NOT_FOUND) {
280
4
        *res = false;
281
4
    } else {
282
0
        return std::move(
283
0
                Status(resp.resp.status.code, std::move(resp.resp.status.msg))
284
0
                        .append(fmt::format(" failed to check exists {}", full_s3_path(key))));
285
0
    }
286
4
    return Status::OK();
287
4
}
288
289
220
Status S3FileSystem::file_size_impl(const Path& file, int64_t* file_size) const {
290
220
    auto key = DORIS_TRY(get_key(file));
291
220
    *file_size = DORIS_TRY(_client->object_file_size(_bucket, key));
292
220
    return Status::OK();
293
220
}
294
295
Status S3FileSystem::list_impl(const Path& dir, bool only_file, std::vector<FileInfo>* files,
296
326
                               bool* exists) {
297
    // For object storage, this path is always not exist.
298
    // So we ignore this property and set exists to true.
299
326
    *exists = true;
300
326
    auto client = _client->get();
301
326
    CHECK_S3_CLIENT(client);
302
326
    auto prefix = DORIS_TRY(get_key(dir));
303
326
    if (!prefix.empty() && prefix.back() != '/') {
304
326
        prefix.push_back('/');
305
326
    }
306
307
    // clang-format off
308
326
    auto resp = client->list_objects( {.bucket = _bucket, .prefix = prefix,}, files);
309
    // clang-format on
310
326
    if (resp.status.code == ErrorCode::OK) {
311
326
        for (auto&& file : *files) {
312
248
            file.file_name.erase(0, prefix.size());
313
248
        }
314
326
    }
315
316
326
    return {resp.status.code, std::move(resp.status.msg)};
317
326
}
318
319
0
Status S3FileSystem::rename_impl(const Path& orig_name, const Path& new_name) {
320
0
    return Status::NotSupported("S3FileSystem::rename_impl");
321
0
}
322
323
170
Status S3FileSystem::upload_impl(const Path& local_file, const Path& remote_file) {
324
170
    auto client = _client->get();
325
170
    CHECK_S3_CLIENT(client);
326
327
170
    auto key = DORIS_TRY(get_key(remote_file));
328
170
    auto start = std::chrono::steady_clock::now();
329
170
    FileWriterPtr obj_writer;
330
170
    RETURN_IF_ERROR(create_file_impl(key, &obj_writer, nullptr));
331
170
    FileReaderSPtr local_reader;
332
170
    RETURN_IF_ERROR(io::global_local_filesystem()->open_file(local_file, &local_reader));
333
170
    size_t local_buffer_size = config::s3_file_system_local_upload_buffer_size;
334
170
    std::unique_ptr<char[]> write_buffer =
335
170
            std::make_unique_for_overwrite<char[]>(local_buffer_size);
336
170
    size_t cur_read = 0;
337
340
    while (cur_read < local_reader->size()) {
338
170
        size_t bytes_read = 0;
339
170
        RETURN_IF_ERROR(local_reader->read_at(
340
170
                cur_read, Slice {write_buffer.get(), local_buffer_size}, &bytes_read));
341
170
        RETURN_IF_ERROR(obj_writer->append({write_buffer.get(), bytes_read}));
342
170
        cur_read += bytes_read;
343
170
    }
344
170
    RETURN_IF_ERROR(obj_writer->close());
345
170
    auto duration = std::chrono::duration<float>(std::chrono::steady_clock::now() - start);
346
347
170
    auto size = local_reader->size();
348
170
    LOG(INFO) << "Upload " << local_file.native() << " to " << full_s3_path(key)
349
170
              << ", duration=" << duration.count() << ", bytes=" << size;
350
351
170
    return Status::OK();
352
170
}
353
354
Status S3FileSystem::batch_upload_impl(const std::vector<Path>& local_files,
355
0
                                       const std::vector<Path>& remote_files) {
356
0
    auto client = _client->get();
357
0
    CHECK_S3_CLIENT(client);
358
359
0
    if (local_files.size() != remote_files.size()) {
360
0
        return Status::InvalidArgument("local_files.size({}) != remote_files.size({})",
361
0
                                       local_files.size(), remote_files.size());
362
0
    }
363
364
0
    std::vector<FileWriterPtr> obj_writers(local_files.size());
365
366
0
    auto upload_task = [&, this](size_t idx) {
367
0
        SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->s3_file_buffer_tracker());
368
0
        const auto& local_file = local_files[idx];
369
0
        const auto& remote_file = remote_files[idx];
370
0
        auto& obj_writer = obj_writers[idx];
371
0
        auto key = DORIS_TRY(get_key(remote_file));
372
0
        LOG(INFO) << "Start to upload " << local_file.native() << " to " << full_s3_path(key);
373
0
        RETURN_IF_ERROR(create_file_impl(key, &obj_writer, nullptr));
374
0
        FileReaderSPtr local_reader;
375
0
        RETURN_IF_ERROR(io::global_local_filesystem()->open_file(local_file, &local_reader));
376
0
        size_t local_buffer_size = config::s3_file_system_local_upload_buffer_size;
377
0
        std::unique_ptr<char[]> write_buffer =
378
0
                std::make_unique_for_overwrite<char[]>(local_buffer_size);
379
0
        size_t cur_read = 0;
380
0
        while (cur_read < local_reader->size()) {
381
0
            size_t bytes_read = 0;
382
0
            RETURN_IF_ERROR(local_reader->read_at(
383
0
                    cur_read, Slice {write_buffer.get(), local_buffer_size}, &bytes_read));
384
0
            RETURN_IF_ERROR((*obj_writer).append({write_buffer.get(), bytes_read}));
385
0
            cur_read += bytes_read;
386
0
        }
387
0
        RETURN_IF_ERROR((*obj_writer).close());
388
0
        return Status::OK();
389
0
    };
390
391
0
    Status s = Status::OK();
392
0
    std::vector<std::future<Status>> futures;
393
0
    for (int i = 0; i < local_files.size(); ++i) {
394
0
        auto task = std::make_shared<std::packaged_task<Status(size_t idx)>>(upload_task);
395
0
        futures.emplace_back(task->get_future());
396
0
        auto st = ExecEnv::GetInstance()->s3_file_system_thread_pool()->submit_func(
397
0
                [t = std::move(task), idx = i]() mutable { (*t)(idx); });
398
        // We shouldn't return immediately since the previous submitted tasks might still be running in the thread pool
399
0
        if (!st.ok()) {
400
0
            s = st;
401
0
            break;
402
0
        }
403
0
    }
404
0
    for (auto&& f : futures) {
405
0
        auto cur_s = f.get();
406
0
        if (!cur_s.ok()) {
407
0
            s = std::move(cur_s);
408
0
        }
409
0
    }
410
0
    return s;
411
0
}
412
413
218
Status S3FileSystem::download_impl(const Path& remote_file, const Path& local_file) {
414
218
    auto client = _client->get();
415
218
    CHECK_S3_CLIENT(client);
416
218
    auto key = DORIS_TRY(get_key(remote_file));
417
218
    int64_t size;
418
218
    RETURN_IF_ERROR(file_size(remote_file, &size));
419
218
    std::unique_ptr<char[]> buf = std::make_unique_for_overwrite<char[]>(size);
420
218
    size_t bytes_read = 0;
421
    // clang-format off
422
218
    auto resp = client->get_object( {.bucket = _bucket, .key = key,},
423
218
            buf.get(), 0, size, &bytes_read);
424
    // clang-format on
425
218
    if (resp.status.code != ErrorCode::OK) {
426
0
        return {resp.status.code, std::move(resp.status.msg)};
427
0
    }
428
218
    Aws::OFStream local_file_s;
429
218
    local_file_s.open(local_file, std::ios::out | std::ios::binary);
430
218
    if (local_file_s.good()) {
431
218
        local_file_s << StringViewStream(buf.get(), size).rdbuf();
432
218
    } else {
433
0
        return localfs_error(errno, fmt::format("failed to write file {}", local_file.native()));
434
0
    }
435
436
218
    return Status::OK();
437
218
}
438
439
// oss has public endpoint and private endpoint, is_public_endpoint determines
440
// whether to return a public endpoint.
441
std::string S3FileSystem::generate_presigned_url(const Path& path, int64_t expiration_secs,
442
0
                                                 bool is_public_endpoint) const {
443
0
    std::string key = fmt::format("{}/{}", _prefix, path.native());
444
0
    std::shared_ptr<ObjStorageClient> client;
445
0
    if (is_public_endpoint &&
446
0
        _client->s3_client_conf().endpoint.ends_with(OSS_PRIVATE_ENDPOINT_SUFFIX)) {
447
0
        auto new_s3_conf = _client->s3_client_conf();
448
0
        new_s3_conf.endpoint.erase(
449
0
                _client->s3_client_conf().endpoint.size() - OSS_PRIVATE_ENDPOINT_SUFFIX.size(),
450
0
                LEN_OF_OSS_PRIVATE_SUFFIX);
451
0
        client = S3ClientFactory::instance().create(new_s3_conf);
452
0
    } else {
453
0
        client = _client->get();
454
0
    }
455
0
    return client->generate_presigned_url({.bucket = _bucket, .key = key}, expiration_secs,
456
0
                                          _client->s3_client_conf());
457
0
}
458
459
} // namespace doris::io