Coverage Report

Created: 2025-04-15 11:51

/root/doris/be/src/http/utils.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "http/utils.h"
19
20
#include <fcntl.h>
21
#include <stdint.h>
22
#include <sys/stat.h>
23
#include <unistd.h>
24
25
#include <ostream>
26
#include <string>
27
#include <unordered_map>
28
#include <vector>
29
30
#include "common/config.h"
31
#include "common/logging.h"
32
#include "common/status.h"
33
#include "common/utils.h"
34
#include "http/http_channel.h"
35
#include "http/http_client.h"
36
#include "http/http_common.h"
37
#include "http/http_headers.h"
38
#include "http/http_method.h"
39
#include "http/http_request.h"
40
#include "http/http_status.h"
41
#include "io/fs/file_system.h"
42
#include "io/fs/local_file_system.h"
43
#include "olap/wal/wal_manager.h"
44
#include "runtime/exec_env.h"
45
#include "util/md5.h"
46
#include "util/path_util.h"
47
#include "util/security.h"
48
#include "util/url_coding.h"
49
50
namespace doris {
51
52
const uint32_t CHECK_SUPPORT_TIMEOUT = 3;
53
const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3;
54
const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15;
55
56
1
std::string encode_basic_auth(const std::string& user, const std::string& passwd) {
57
1
    std::string auth = user + ":" + passwd;
58
1
    std::string encoded_auth;
59
1
    base64_encode(auth, &encoded_auth);
60
1
    static std::string s_prefix = "Basic ";
61
1
    return s_prefix + encoded_auth;
62
1
}
63
64
14
bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* passwd) {
65
14
    const char k_basic[] = "Basic ";
66
14
    const auto& auth = req.header(HttpHeaders::AUTHORIZATION);
67
14
    if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 0) {
68
3
        return false;
69
3
    }
70
11
    std::string encoded_str = auth.substr(sizeof(k_basic) - 1);
71
11
    std::string decoded_auth;
72
11
    if (!base64_decode(encoded_str, &decoded_auth)) {
73
2
        return false;
74
2
    }
75
9
    auto pos = decoded_auth.find(':');
76
9
    if (pos == std::string::npos) {
77
0
        return false;
78
0
    }
79
9
    user->assign(decoded_auth.c_str(), pos);
80
9
    passwd->assign(decoded_auth.c_str() + pos + 1);
81
82
9
    return true;
83
9
}
84
85
3
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
86
3
    const auto& token = req.header("token");
87
3
    const auto& auth_code = req.header(HTTP_AUTH_CODE);
88
3
    if (!token.empty()) {
89
0
        auth->token = token;
90
3
    } else if (!auth_code.empty()) {
91
0
        auth->auth_code = std::stoll(auth_code);
92
3
    } else {
93
3
        std::string full_user;
94
3
        if (!parse_basic_auth(req, &full_user, &auth->passwd)) {
95
1
            return false;
96
1
        }
97
2
        auto pos = full_user.find('@');
98
2
        if (pos != std::string::npos) {
99
0
            auth->user.assign(full_user.data(), pos);
100
0
            auth->cluster.assign(full_user.data() + pos + 1);
101
2
        } else {
102
2
            auth->user = full_user;
103
2
        }
104
2
    }
105
106
    // set user ip
107
2
    if (req.remote_host() != nullptr) {
108
0
        auth->user_ip.assign(req.remote_host());
109
2
    } else {
110
2
        auth->user_ip.assign("");
111
2
    }
112
113
2
    return true;
114
3
}
115
116
// Do a simple decision, only deal a few type
117
1
std::string get_content_type(const std::string& file_name) {
118
1
    std::string file_ext = path_util::file_extension(file_name);
119
1
    VLOG_TRACE << "file_name: " << file_name << "; file extension: [" << file_ext << "]";
120
1
    if (file_ext == std::string(".html") || file_ext == std::string(".htm")) {
121
0
        return "text/html; charset=utf-8";
122
1
    } else if (file_ext == std::string(".js")) {
123
0
        return "application/javascript; charset=utf-8";
124
1
    } else if (file_ext == std::string(".css")) {
125
0
        return "text/css; charset=utf-8";
126
1
    } else if (file_ext == std::string(".txt")) {
127
0
        return "text/plain; charset=utf-8";
128
1
    } else if (file_ext == std::string(".png")) {
129
0
        return "image/png";
130
1
    } else if (file_ext == std::string(".ico")) {
131
0
        return "image/x-icon";
132
1
    } else {
133
1
        return "text/plain; charset=utf-8";
134
1
    }
135
1
}
136
137
void do_file_response(const std::string& file_path, HttpRequest* req,
138
1
                      bufferevent_rate_limit_group* rate_limit_group, bool is_acquire_md5) {
139
1
    if (file_path.find("..") != std::string::npos) {
140
0
        LOG(WARNING) << "Not allowed to read relative path: " << file_path;
141
0
        HttpChannel::send_error(req, HttpStatus::FORBIDDEN);
142
0
        return;
143
0
    }
144
145
    // read file content and send response
146
1
    int fd = open(file_path.c_str(), O_RDONLY);
147
1
    if (fd < 0) {
148
0
        LOG(WARNING) << "Failed to open file: " << file_path;
149
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
150
0
        return;
151
0
    }
152
1
    struct stat st;
153
1
    auto res = fstat(fd, &st);
154
1
    if (res < 0) {
155
0
        close(fd);
156
0
        LOG(WARNING) << "Failed to open file: " << file_path;
157
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
158
0
        return;
159
0
    }
160
161
1
    int64_t file_size = st.st_size;
162
163
    // TODO(lingbin): process "IF_MODIFIED_SINCE" header
164
    // TODO(lingbin): process "RANGE" header
165
1
    const std::string& range_header = req->header(HttpHeaders::RANGE);
166
1
    if (!range_header.empty()) {
167
        // analyse range header
168
0
    }
169
170
1
    req->add_output_header(HttpHeaders::CONTENT_TYPE, get_content_type(file_path).c_str());
171
172
1
    if (is_acquire_md5) {
173
1
        Md5Digest md5;
174
175
1
        void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
176
1
        md5.update(buf, file_size);
177
1
        md5.digest();
178
1
        munmap(buf, file_size);
179
180
1
        req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str());
181
1
    }
182
183
1
    if (req->method() == HttpMethod::HEAD) {
184
1
        close(fd);
185
1
        req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(file_size).c_str());
186
1
        HttpChannel::send_reply(req);
187
1
        return;
188
1
    }
189
190
0
    HttpChannel::send_file(req, fd, 0, file_size, rate_limit_group);
191
0
}
192
193
1
void do_dir_response(const std::string& dir_path, HttpRequest* req, bool is_acquire_filesize) {
194
1
    bool exists = true;
195
1
    std::vector<io::FileInfo> files;
196
1
    Status st = io::global_local_filesystem()->list(dir_path, true, &files, &exists);
197
1
    if (!st.ok()) {
198
0
        LOG(WARNING) << "Failed to scan dir. " << st;
199
0
        HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR);
200
0
        return;
201
0
    }
202
203
1
    VLOG_DEBUG << "list dir: " << dir_path << ", file count: " << files.size();
204
205
1
    const std::string FILE_DELIMITER_IN_DIR_RESPONSE = "\n";
206
207
1
    std::stringstream result;
208
35
    for (auto& file : files) {
209
35
        result << file.file_name << FILE_DELIMITER_IN_DIR_RESPONSE;
210
35
        if (is_acquire_filesize) {
211
35
            result << file.file_size << FILE_DELIMITER_IN_DIR_RESPONSE;
212
35
        }
213
35
    }
214
215
1
    std::string result_str = result.str();
216
1
    HttpChannel::send_reply(req, result_str);
217
1
}
218
219
2
bool load_size_smaller_than_wal_limit(int64_t content_length) {
220
    // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload content length. If it is empty or equals to 0, it means this streamload
221
    // is a chunked streamload and we are not sure its size.
222
    // 2. if streamload content length is too large, like larger than 80% of the WAL constrain.
223
    //
224
    // This two cases, we are not certain that the Write-Ahead Logging (WAL) constraints allow for writing down
225
    // these blocks within the limited space. So we need to set group_commit = false to avoid dead lock.
226
2
    size_t max_available_size = ExecEnv::GetInstance()->wal_mgr()->get_max_available_size();
227
2
    return (content_length < 0.8 * max_available_size);
228
2
}
229
230
1
Status is_support_batch_download(const std::string& endpoint) {
231
1
    std::string url = fmt::format("http://{}/api/_tablet/_batch_download?check=true", endpoint);
232
1
    auto check_support_cb = [&url](HttpClient* client) {
233
1
        RETURN_IF_ERROR(client->init(url));
234
1
        client->set_timeout_ms(CHECK_SUPPORT_TIMEOUT * 1000);
235
1
        client->set_method(HttpMethod::HEAD);
236
1
        std::string response;
237
1
        return client->execute(&response);
238
1
    };
239
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, check_support_cb);
240
1
}
241
242
Status list_remote_files_v2(const std::string& address, const std::string& token,
243
                            const std::string& remote_dir,
244
1
                            std::vector<std::pair<std::string, size_t>>* file_info_list) {
245
1
    std::string remote_url =
246
1
            fmt::format("http://{}/api/_tablet/_batch_download?token={}&dir={}&list=true", address,
247
1
                        token, remote_dir);
248
249
1
    std::string file_list_str;
250
1
    auto list_files_cb = [&](HttpClient* client) {
251
1
        file_list_str.clear();
252
1
        RETURN_IF_ERROR(client->init(remote_url, false));
253
1
        client->set_method(HttpMethod::GET);
254
1
        client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000);
255
1
        return client->execute(&file_list_str);
256
1
    };
257
1
    Status status = HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb);
258
1
    if (!status.ok()) {
259
0
        LOG(WARNING) << "failed to list remote files from " << remote_url
260
0
                     << ", status: " << status.to_string() << ", response: " << file_list_str;
261
0
        return status;
262
0
    }
263
264
1
    std::vector<string> file_list = strings::Split(file_list_str, "\n", strings::SkipWhitespace());
265
1
    if (file_list.size() % 2 != 0) {
266
0
        return Status::InternalError("batch download files: invalid file list, size is not even");
267
0
    }
268
269
1
    VLOG_DEBUG << "list remote files from " << remote_url
270
0
               << ", file count: " << file_list.size() / 2;
271
272
36
    for (size_t i = 0; i < file_list.size(); i += 2) {
273
35
        uint64_t file_size = 0;
274
35
        try {
275
35
            file_size = std::stoull(file_list[i + 1]);
276
35
        } catch (std::exception&) {
277
0
            return Status::InternalError("batch download files: invalid file size format: " +
278
0
                                         file_list[i + 1]);
279
0
        }
280
35
        file_info_list->emplace_back(std::move(file_list[i]), file_size);
281
35
    }
282
283
1
    return Status::OK();
284
1
}
285
286
Status download_files_v2(const std::string& address, const std::string& token,
287
                         const std::string& remote_dir, const std::string& local_dir,
288
1
                         const std::vector<std::pair<std::string, size_t>>& file_info_list) {
289
1
    std::string remote_url = fmt::format("http://{}/api/_tablet/_batch_download?dir={}&token={}",
290
1
                                         address, remote_dir, token);
291
292
1
    size_t batch_file_size = 0;
293
1
    std::unordered_set<std::string> expected_files;
294
1
    std::stringstream ss;
295
35
    for (const auto& file_info : file_info_list) {
296
35
        ss << file_info.first << "\n";
297
35
        batch_file_size += file_info.second;
298
35
        expected_files.insert(file_info.first);
299
35
    }
300
1
    std::string payload = ss.str();
301
302
1
    uint64_t estimate_timeout = batch_file_size / config::download_low_speed_limit_kbps / 1024;
303
1
    if (estimate_timeout < config::download_low_speed_time) {
304
1
        estimate_timeout = config::download_low_speed_time;
305
1
    }
306
307
1
    LOG(INFO) << "begin to download files from " << remote_url << " to " << local_dir
308
1
              << ", file count: " << file_info_list.size() << ", total size: " << batch_file_size
309
1
              << ", timeout: " << estimate_timeout;
310
311
1
    auto callback = [&](HttpClient* client) -> Status {
312
1
        RETURN_IF_ERROR(client->init(remote_url, false));
313
1
        client->set_method(HttpMethod::POST);
314
1
        client->set_payload(payload);
315
1
        client->set_timeout_ms(estimate_timeout * 1000);
316
1
        RETURN_IF_ERROR(client->download_multi_files(local_dir, expected_files));
317
35
        for (auto&& [file_name, file_size] : file_info_list) {
318
35
            std::string local_file_path = local_dir + "/" + file_name;
319
320
35
            std::error_code ec;
321
            // Check file length
322
35
            uint64_t local_file_size = std::filesystem::file_size(local_file_path, ec);
323
35
            if (ec) {
324
0
                LOG(WARNING) << "download file error: " << ec.message();
325
0
                return Status::IOError("can't retrive file_size of {}, due to {}", local_file_path,
326
0
                                       ec.message());
327
0
            }
328
35
            if (local_file_size != file_size) {
329
0
                LOG(WARNING) << "download file length error"
330
0
                             << ", remote_path=" << mask_token(remote_url)
331
0
                             << ", file_name=" << file_name << ", file_size=" << file_size
332
0
                             << ", local_file_size=" << local_file_size;
333
0
                return Status::InternalError("downloaded file size is not equal");
334
0
            }
335
35
            RETURN_IF_ERROR(io::global_local_filesystem()->permission(
336
35
                    local_file_path, io::LocalFileSystem::PERMS_OWNER_RW));
337
35
        }
338
339
1
        return Status::OK();
340
1
    };
341
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, callback);
342
1
}
343
344
} // namespace doris