Coverage Report

Created: 2025-05-12 20:32

/root/doris/be/src/http/utils.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "http/utils.h"
19
20
#include <absl/strings/str_split.h>
21
#include <fcntl.h>
22
#include <stdint.h>
23
#include <sys/stat.h>
24
#include <unistd.h>
25
26
#include <ostream>
27
#include <string>
28
#include <unordered_map>
29
#include <vector>
30
31
#include "common/config.h"
32
#include "common/logging.h"
33
#include "common/status.h"
34
#include "common/utils.h"
35
#include "http/http_channel.h"
36
#include "http/http_client.h"
37
#include "http/http_common.h"
38
#include "http/http_headers.h"
39
#include "http/http_method.h"
40
#include "http/http_request.h"
41
#include "http/http_status.h"
42
#include "io/fs/file_system.h"
43
#include "io/fs/local_file_system.h"
44
#include "olap/wal/wal_manager.h"
45
#include "runtime/exec_env.h"
46
#include "util/md5.h"
47
#include "util/path_util.h"
48
#include "util/security.h"
49
#include "util/url_coding.h"
50
51
namespace doris {
52
53
const uint32_t CHECK_SUPPORT_TIMEOUT = 3;
54
const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3;
55
const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15;
56
57
1
std::string encode_basic_auth(const std::string& user, const std::string& passwd) {
58
1
    std::string auth = user + ":" + passwd;
59
1
    std::string encoded_auth;
60
1
    base64_encode(auth, &encoded_auth);
61
1
    static std::string s_prefix = "Basic ";
62
1
    return s_prefix + encoded_auth;
63
1
}
64
65
39
bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* passwd) {
66
    // const auto& token = req.header(HttpHeaders::AUTH_TOKEN);
67
68
39
    const char k_basic[] = "Basic ";
69
39
    const auto& auth = req.header(HttpHeaders::AUTHORIZATION);
70
39
    if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 0) {
71
15
        return false;
72
15
    }
73
24
    std::string encoded_str = auth.substr(sizeof(k_basic) - 1);
74
24
    std::string decoded_auth;
75
24
    if (!base64_decode(encoded_str, &decoded_auth)) {
76
2
        return false;
77
2
    }
78
22
    auto pos = decoded_auth.find(':');
79
22
    if (pos == std::string::npos) {
80
0
        return false;
81
0
    }
82
22
    user->assign(decoded_auth.c_str(), pos);
83
22
    passwd->assign(decoded_auth.c_str() + pos + 1);
84
85
22
    return true;
86
22
}
87
88
28
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
89
    // deprecated, removed in 3.1, use AUTH_TOKEN
90
28
    const auto& token = req.header("token");
91
    // deprecated, removed in 3.1, use AUTH_TOKEN
92
28
    const auto& auth_code = req.header(HTTP_AUTH_CODE);
93
28
    const auto& auth_token = req.header(HttpHeaders::AUTH_TOKEN);
94
95
28
    std::tuple<std::string, std::string, std::string> tmp;
96
28
    auto& [user, pass, cluster] = tmp;
97
28
    bool valid_basic_auth = parse_basic_auth(req, &user, &pass);
98
28
    if (valid_basic_auth) { // always set the basic auth, the user may be useful
99
15
        auto pos = user.find('@');
100
15
        if (pos != std::string::npos) {
101
0
            cluster.assign(user.c_str() + pos + 1);
102
0
            user.assign(user.c_str(), pos); // user is updated
103
0
        }
104
15
        auth->user = user;
105
15
        auth->passwd = pass;
106
15
        auth->cluster = cluster;
107
15
    }
108
109
28
    if (!token.empty()) {
110
0
        auth->token = token; // deprecated
111
28
    } else if (!auth_token.empty()) {
112
2
        auth->token = auth_token;
113
26
    } else if (!auth_code.empty()) {
114
0
        auth->auth_code = std::stoll(auth_code); // deprecated
115
26
    } else if (!valid_basic_auth) {
116
11
        return false;
117
11
    }
118
119
    // set user ip
120
17
    auth->user_ip.assign(req.remote_host() != nullptr ? req.remote_host() : "");
121
122
17
    return true;
123
28
}
124
125
// Do a simple decision, only deal a few type
126
4
std::string get_content_type(const std::string& file_name) {
127
4
    std::string file_ext = path_util::file_extension(file_name);
128
4
    VLOG_TRACE << "file_name: " << file_name << "; file extension: [" << file_ext << "]";
129
4
    if (file_ext == std::string(".html") || file_ext == std::string(".htm")) {
130
0
        return "text/html; charset=utf-8";
131
4
    } else if (file_ext == std::string(".js")) {
132
0
        return "application/javascript; charset=utf-8";
133
4
    } else if (file_ext == std::string(".css")) {
134
0
        return "text/css; charset=utf-8";
135
4
    } else if (file_ext == std::string(".txt")) {
136
0
        return "text/plain; charset=utf-8";
137
4
    } else if (file_ext == std::string(".png")) {
138
0
        return "image/png";
139
4
    } else if (file_ext == std::string(".ico")) {
140
0
        return "image/x-icon";
141
4
    } else {
142
4
        return "text/plain; charset=utf-8";
143
4
    }
144
4
}
145
146
void do_file_response(const std::string& file_path, HttpRequest* req,
147
4
                      bufferevent_rate_limit_group* rate_limit_group, bool is_acquire_md5) {
148
4
    if (file_path.find("..") != std::string::npos) {
149
0
        LOG(WARNING) << "Not allowed to read relative path: " << file_path;
150
0
        HttpChannel::send_error(req, HttpStatus::FORBIDDEN);
151
0
        return;
152
0
    }
153
154
    // read file content and send response
155
4
    int fd = open(file_path.c_str(), O_RDONLY);
156
4
    if (fd < 0) {
157
0
        LOG(WARNING) << "Failed to open file: " << file_path;
158
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
159
0
        return;
160
0
    }
161
4
    struct stat st;
162
4
    auto res = fstat(fd, &st);
163
4
    if (res < 0) {
164
0
        close(fd);
165
0
        LOG(WARNING) << "Failed to open file: " << file_path;
166
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
167
0
        return;
168
0
    }
169
170
4
    int64_t file_size = st.st_size;
171
172
    // TODO(lingbin): process "IF_MODIFIED_SINCE" header
173
    // TODO(lingbin): process "RANGE" header
174
4
    const std::string& range_header = req->header(HttpHeaders::RANGE);
175
4
    if (!range_header.empty()) {
176
        // analyse range header
177
0
    }
178
179
4
    req->add_output_header(HttpHeaders::CONTENT_TYPE, get_content_type(file_path).c_str());
180
181
4
    if (is_acquire_md5) {
182
1
        Md5Digest md5;
183
184
1
        void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
185
1
        md5.update(buf, file_size);
186
1
        md5.digest();
187
1
        munmap(buf, file_size);
188
189
1
        req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str());
190
1
    }
191
192
4
    if (req->method() == HttpMethod::HEAD) {
193
3
        close(fd);
194
3
        req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(file_size).c_str());
195
3
        HttpChannel::send_reply(req);
196
3
        return;
197
3
    }
198
199
1
    HttpChannel::send_file(req, fd, 0, file_size, rate_limit_group);
200
1
}
201
202
2
void do_dir_response(const std::string& dir_path, HttpRequest* req, bool is_acquire_filesize) {
203
2
    bool exists = true;
204
2
    std::vector<io::FileInfo> files;
205
2
    Status st = io::global_local_filesystem()->list(dir_path, true, &files, &exists);
206
2
    if (!st.ok()) {
207
0
        LOG(WARNING) << "Failed to scan dir. " << st;
208
0
        HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR);
209
0
        return;
210
0
    }
211
212
2
    VLOG_DEBUG << "list dir: " << dir_path << ", file count: " << files.size();
213
214
2
    const std::string FILE_DELIMITER_IN_DIR_RESPONSE = "\n";
215
216
2
    std::stringstream result;
217
37
    for (auto& file : files) {
218
37
        result << file.file_name << FILE_DELIMITER_IN_DIR_RESPONSE;
219
37
        if (is_acquire_filesize) {
220
35
            result << file.file_size << FILE_DELIMITER_IN_DIR_RESPONSE;
221
35
        }
222
37
    }
223
224
2
    std::string result_str = result.str();
225
2
    HttpChannel::send_reply(req, result_str);
226
2
}
227
228
2
bool load_size_smaller_than_wal_limit(int64_t content_length) {
229
    // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload content length. If it is empty or equals to 0, it means this streamload
230
    // is a chunked streamload and we are not sure its size.
231
    // 2. if streamload content length is too large, like larger than 80% of the WAL constrain.
232
    //
233
    // This two cases, we are not certain that the Write-Ahead Logging (WAL) constraints allow for writing down
234
    // these blocks within the limited space. So we need to set group_commit = false to avoid dead lock.
235
2
    size_t max_available_size = ExecEnv::GetInstance()->wal_mgr()->get_max_available_size();
236
2
    return (content_length < 0.8 * max_available_size);
237
2
}
238
239
1
Status is_support_batch_download(const std::string& endpoint) {
240
1
    std::string url = fmt::format("http://{}/api/_tablet/_batch_download?check=true", endpoint);
241
1
    auto check_support_cb = [&url](HttpClient* client) {
242
1
        RETURN_IF_ERROR(client->init(url));
243
1
        client->set_timeout_ms(CHECK_SUPPORT_TIMEOUT * 1000);
244
1
        client->set_method(HttpMethod::HEAD);
245
1
        std::string response;
246
1
        return client->execute(&response);
247
1
    };
248
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, check_support_cb);
249
1
}
250
251
Status list_remote_files_v2(const std::string& address, const std::string& token,
252
                            const std::string& remote_dir,
253
1
                            std::vector<std::pair<std::string, size_t>>* file_info_list) {
254
1
    std::string remote_url =
255
1
            fmt::format("http://{}/api/_tablet/_batch_download?token={}&dir={}&list=true", address,
256
1
                        token, remote_dir);
257
258
1
    std::string file_list_str;
259
1
    auto list_files_cb = [&](HttpClient* client) {
260
1
        file_list_str.clear();
261
1
        RETURN_IF_ERROR(client->init(remote_url, false));
262
1
        client->set_method(HttpMethod::GET);
263
1
        client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000);
264
1
        return client->execute(&file_list_str);
265
1
    };
266
1
    Status status = HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb);
267
1
    if (!status.ok()) {
268
0
        LOG(WARNING) << "failed to list remote files from " << remote_url
269
0
                     << ", status: " << status.to_string() << ", response: " << file_list_str;
270
0
        return status;
271
0
    }
272
273
1
    std::vector<std::string> file_list =
274
1
            absl::StrSplit(file_list_str, "\n", absl::SkipWhitespace());
275
1
    if (file_list.size() % 2 != 0) {
276
0
        return Status::InternalError("batch download files: invalid file list, size is not even");
277
0
    }
278
279
1
    VLOG_DEBUG << "list remote files from " << remote_url
280
0
               << ", file count: " << file_list.size() / 2;
281
282
36
    for (size_t i = 0; i < file_list.size(); i += 2) {
283
35
        uint64_t file_size = 0;
284
35
        try {
285
35
            file_size = std::stoull(file_list[i + 1]);
286
35
        } catch (std::exception&) {
287
0
            return Status::InternalError("batch download files: invalid file size format: " +
288
0
                                         file_list[i + 1]);
289
0
        }
290
35
        file_info_list->emplace_back(std::move(file_list[i]), file_size);
291
35
    }
292
293
1
    return Status::OK();
294
1
}
295
296
Status download_files_v2(const std::string& address, const std::string& token,
297
                         const std::string& remote_dir, const std::string& local_dir,
298
1
                         const std::vector<std::pair<std::string, size_t>>& file_info_list) {
299
1
    std::string remote_url = fmt::format("http://{}/api/_tablet/_batch_download?dir={}&token={}",
300
1
                                         address, remote_dir, token);
301
302
1
    size_t batch_file_size = 0;
303
1
    std::unordered_set<std::string> expected_files;
304
1
    std::stringstream ss;
305
35
    for (const auto& file_info : file_info_list) {
306
35
        ss << file_info.first << "\n";
307
35
        batch_file_size += file_info.second;
308
35
        expected_files.insert(file_info.first);
309
35
    }
310
1
    std::string payload = ss.str();
311
312
1
    uint64_t estimate_timeout = batch_file_size / config::download_low_speed_limit_kbps / 1024;
313
1
    if (estimate_timeout < config::download_low_speed_time) {
314
1
        estimate_timeout = config::download_low_speed_time;
315
1
    }
316
317
1
    LOG(INFO) << "begin to download files from " << remote_url << " to " << local_dir
318
1
              << ", file count: " << file_info_list.size() << ", total size: " << batch_file_size
319
1
              << ", timeout: " << estimate_timeout;
320
321
1
    auto callback = [&](HttpClient* client) -> Status {
322
1
        RETURN_IF_ERROR(client->init(remote_url, false));
323
1
        client->set_method(HttpMethod::POST);
324
1
        client->set_payload(payload);
325
1
        client->set_timeout_ms(estimate_timeout * 1000);
326
1
        RETURN_IF_ERROR(client->download_multi_files(local_dir, expected_files));
327
35
        for (auto&& [file_name, file_size] : file_info_list) {
328
35
            std::string local_file_path = local_dir + "/" + file_name;
329
330
35
            std::error_code ec;
331
            // Check file length
332
35
            uint64_t local_file_size = std::filesystem::file_size(local_file_path, ec);
333
35
            if (ec) {
334
0
                LOG(WARNING) << "download file error: " << ec.message();
335
0
                return Status::IOError("can't retrive file_size of {}, due to {}", local_file_path,
336
0
                                       ec.message());
337
0
            }
338
35
            if (local_file_size != file_size) {
339
0
                LOG(WARNING) << "download file length error"
340
0
                             << ", remote_path=" << mask_token(remote_url)
341
0
                             << ", file_name=" << file_name << ", file_size=" << file_size
342
0
                             << ", local_file_size=" << local_file_size;
343
0
                return Status::InternalError("downloaded file size is not equal");
344
0
            }
345
35
            RETURN_IF_ERROR(io::global_local_filesystem()->permission(
346
35
                    local_file_path, io::LocalFileSystem::PERMS_OWNER_RW));
347
35
        }
348
349
1
        return Status::OK();
350
1
    };
351
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, callback);
352
1
}
353
354
} // namespace doris