Coverage Report

Created: 2025-03-13 18:54

/root/doris/be/src/http/utils.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "http/utils.h"
19
20
#include <fcntl.h>
21
#include <stdint.h>
22
#include <sys/stat.h>
23
#include <unistd.h>
24
25
#include <ostream>
26
#include <string>
27
#include <unordered_map>
28
#include <vector>
29
30
#include "common/config.h"
31
#include "common/logging.h"
32
#include "common/status.h"
33
#include "common/utils.h"
34
#include "http/http_channel.h"
35
#include "http/http_client.h"
36
#include "http/http_common.h"
37
#include "http/http_headers.h"
38
#include "http/http_method.h"
39
#include "http/http_request.h"
40
#include "http/http_status.h"
41
#include "io/fs/file_system.h"
42
#include "io/fs/local_file_system.h"
43
#include "olap/wal/wal_manager.h"
44
#include "runtime/exec_env.h"
45
#include "util/md5.h"
46
#include "util/path_util.h"
47
#include "util/security.h"
48
#include "util/url_coding.h"
49
50
namespace doris {
51
52
const uint32_t CHECK_SUPPORT_TIMEOUT = 3;
53
const uint32_t DOWNLOAD_FILE_MAX_RETRY = 3;
54
const uint32_t LIST_REMOTE_FILE_TIMEOUT = 15;
55
56
1
std::string encode_basic_auth(const std::string& user, const std::string& passwd) {
57
1
    std::string auth = user + ":" + passwd;
58
1
    std::string encoded_auth;
59
1
    base64_encode(auth, &encoded_auth);
60
1
    static std::string s_prefix = "Basic ";
61
1
    return s_prefix + encoded_auth;
62
1
}
63
64
39
bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* passwd) {
65
    // const auto& token = req.header(HttpHeaders::AUTH_TOKEN);
66
67
39
    const char k_basic[] = "Basic ";
68
39
    const auto& auth = req.header(HttpHeaders::AUTHORIZATION);
69
39
    if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 0) {
70
15
        return false;
71
15
    }
72
24
    std::string encoded_str = auth.substr(sizeof(k_basic) - 1);
73
24
    std::string decoded_auth;
74
24
    if (!base64_decode(encoded_str, &decoded_auth)) {
75
2
        return false;
76
2
    }
77
22
    auto pos = decoded_auth.find(':');
78
22
    if (pos == std::string::npos) {
79
0
        return false;
80
0
    }
81
22
    user->assign(decoded_auth.c_str(), pos);
82
22
    passwd->assign(decoded_auth.c_str() + pos + 1);
83
84
22
    return true;
85
22
}
86
87
28
bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
88
    // deprecated, removed in 3.1, use AUTH_TOKEN
89
28
    const auto& token = req.header("token");
90
    // deprecated, removed in 3.1, use AUTH_TOKEN
91
28
    const auto& auth_code = req.header(HTTP_AUTH_CODE);
92
28
    const auto& auth_token = req.header(HttpHeaders::AUTH_TOKEN);
93
94
28
    std::tuple<std::string, std::string, std::string> tmp;
95
28
    auto& [user, pass, cluster] = tmp;
96
28
    bool valid_basic_auth = parse_basic_auth(req, &user, &pass);
97
28
    if (valid_basic_auth) { // always set the basic auth, the user may be useful
98
15
        auto pos = user.find('@');
99
15
        if (pos != std::string::npos) {
100
0
            cluster.assign(user.c_str() + pos + 1);
101
0
            user.assign(user.c_str(), pos); // user is updated
102
0
        }
103
15
        auth->user = user;
104
15
        auth->passwd = pass;
105
15
        auth->cluster = cluster;
106
15
    }
107
108
28
    if (!token.empty()) {
109
0
        auth->token = token; // deprecated
110
28
    } else if (!auth_token.empty()) {
111
2
        auth->token = auth_token;
112
26
    } else if (!auth_code.empty()) {
113
0
        auth->auth_code = std::stoll(auth_code); // deprecated
114
26
    } else if (!valid_basic_auth) {
115
11
        return false;
116
11
    }
117
118
    // set user ip
119
17
    auth->user_ip.assign(req.remote_host() != nullptr ? req.remote_host() : "");
120
121
17
    return true;
122
28
}
123
124
// Do a simple decision, only deal a few type
125
1
std::string get_content_type(const std::string& file_name) {
126
1
    std::string file_ext = path_util::file_extension(file_name);
127
1
    VLOG_TRACE << "file_name: " << file_name << "; file extension: [" << file_ext << "]";
128
1
    if (file_ext == std::string(".html") || file_ext == std::string(".htm")) {
129
0
        return "text/html; charset=utf-8";
130
1
    } else if (file_ext == std::string(".js")) {
131
0
        return "application/javascript; charset=utf-8";
132
1
    } else if (file_ext == std::string(".css")) {
133
0
        return "text/css; charset=utf-8";
134
1
    } else if (file_ext == std::string(".txt")) {
135
0
        return "text/plain; charset=utf-8";
136
1
    } else if (file_ext == std::string(".png")) {
137
0
        return "image/png";
138
1
    } else if (file_ext == std::string(".ico")) {
139
0
        return "image/x-icon";
140
1
    } else {
141
1
        return "text/plain; charset=utf-8";
142
1
    }
143
1
}
144
145
void do_file_response(const std::string& file_path, HttpRequest* req,
146
1
                      bufferevent_rate_limit_group* rate_limit_group, bool is_acquire_md5) {
147
1
    if (file_path.find("..") != std::string::npos) {
148
0
        LOG(WARNING) << "Not allowed to read relative path: " << file_path;
149
0
        HttpChannel::send_error(req, HttpStatus::FORBIDDEN);
150
0
        return;
151
0
    }
152
153
    // read file content and send response
154
1
    int fd = open(file_path.c_str(), O_RDONLY);
155
1
    if (fd < 0) {
156
0
        LOG(WARNING) << "Failed to open file: " << file_path;
157
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
158
0
        return;
159
0
    }
160
1
    struct stat st;
161
1
    auto res = fstat(fd, &st);
162
1
    if (res < 0) {
163
0
        close(fd);
164
0
        LOG(WARNING) << "Failed to open file: " << file_path;
165
0
        HttpChannel::send_error(req, HttpStatus::NOT_FOUND);
166
0
        return;
167
0
    }
168
169
1
    int64_t file_size = st.st_size;
170
171
    // TODO(lingbin): process "IF_MODIFIED_SINCE" header
172
    // TODO(lingbin): process "RANGE" header
173
1
    const std::string& range_header = req->header(HttpHeaders::RANGE);
174
1
    if (!range_header.empty()) {
175
        // analyse range header
176
0
    }
177
178
1
    req->add_output_header(HttpHeaders::CONTENT_TYPE, get_content_type(file_path).c_str());
179
180
1
    if (is_acquire_md5) {
181
1
        Md5Digest md5;
182
183
1
        void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
184
1
        md5.update(buf, file_size);
185
1
        md5.digest();
186
1
        munmap(buf, file_size);
187
188
1
        req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str());
189
1
    }
190
191
1
    if (req->method() == HttpMethod::HEAD) {
192
1
        close(fd);
193
1
        req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(file_size).c_str());
194
1
        HttpChannel::send_reply(req);
195
1
        return;
196
1
    }
197
198
0
    HttpChannel::send_file(req, fd, 0, file_size, rate_limit_group);
199
0
}
200
201
1
void do_dir_response(const std::string& dir_path, HttpRequest* req, bool is_acquire_filesize) {
202
1
    bool exists = true;
203
1
    std::vector<io::FileInfo> files;
204
1
    Status st = io::global_local_filesystem()->list(dir_path, true, &files, &exists);
205
1
    if (!st.ok()) {
206
0
        LOG(WARNING) << "Failed to scan dir. " << st;
207
0
        HttpChannel::send_error(req, HttpStatus::INTERNAL_SERVER_ERROR);
208
0
        return;
209
0
    }
210
211
1
    VLOG_DEBUG << "list dir: " << dir_path << ", file count: " << files.size();
212
213
1
    const std::string FILE_DELIMITER_IN_DIR_RESPONSE = "\n";
214
215
1
    std::stringstream result;
216
35
    for (auto& file : files) {
217
35
        result << file.file_name << FILE_DELIMITER_IN_DIR_RESPONSE;
218
35
        if (is_acquire_filesize) {
219
35
            result << file.file_size << FILE_DELIMITER_IN_DIR_RESPONSE;
220
35
        }
221
35
    }
222
223
1
    std::string result_str = result.str();
224
1
    HttpChannel::send_reply(req, result_str);
225
1
}
226
227
2
bool load_size_smaller_than_wal_limit(int64_t content_length) {
228
    // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload content length. If it is empty or equals to 0, it means this streamload
229
    // is a chunked streamload and we are not sure its size.
230
    // 2. if streamload content length is too large, like larger than 80% of the WAL constrain.
231
    //
232
    // This two cases, we are not certain that the Write-Ahead Logging (WAL) constraints allow for writing down
233
    // these blocks within the limited space. So we need to set group_commit = false to avoid dead lock.
234
2
    size_t max_available_size = ExecEnv::GetInstance()->wal_mgr()->get_max_available_size();
235
2
    return (content_length < 0.8 * max_available_size);
236
2
}
237
238
1
Status is_support_batch_download(const std::string& endpoint) {
239
1
    std::string url = fmt::format("http://{}/api/_tablet/_batch_download?check=true", endpoint);
240
1
    auto check_support_cb = [&url](HttpClient* client) {
241
1
        RETURN_IF_ERROR(client->init(url));
242
1
        client->set_timeout_ms(CHECK_SUPPORT_TIMEOUT * 1000);
243
1
        client->set_method(HttpMethod::HEAD);
244
1
        std::string response;
245
1
        return client->execute(&response);
246
1
    };
247
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, check_support_cb);
248
1
}
249
250
Status list_remote_files_v2(const std::string& address, const std::string& token,
251
                            const std::string& remote_dir,
252
1
                            std::vector<std::pair<std::string, size_t>>* file_info_list) {
253
1
    std::string remote_url =
254
1
            fmt::format("http://{}/api/_tablet/_batch_download?token={}&dir={}&list=true", address,
255
1
                        token, remote_dir);
256
257
1
    std::string file_list_str;
258
1
    auto list_files_cb = [&](HttpClient* client) {
259
1
        file_list_str.clear();
260
1
        RETURN_IF_ERROR(client->init(remote_url, false));
261
1
        client->set_method(HttpMethod::GET);
262
1
        client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000);
263
1
        return client->execute(&file_list_str);
264
1
    };
265
1
    Status status = HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb);
266
1
    if (!status.ok()) {
267
0
        LOG(WARNING) << "failed to list remote files from " << remote_url
268
0
                     << ", status: " << status.to_string() << ", response: " << file_list_str;
269
0
        return status;
270
0
    }
271
272
1
    std::vector<string> file_list = strings::Split(file_list_str, "\n", strings::SkipWhitespace());
273
1
    if (file_list.size() % 2 != 0) {
274
0
        return Status::InternalError("batch download files: invalid file list, size is not even");
275
0
    }
276
277
1
    VLOG_DEBUG << "list remote files from " << remote_url
278
0
               << ", file count: " << file_list.size() / 2;
279
280
36
    for (size_t i = 0; i < file_list.size(); i += 2) {
281
35
        uint64_t file_size = 0;
282
35
        try {
283
35
            file_size = std::stoull(file_list[i + 1]);
284
35
        } catch (std::exception&) {
285
0
            return Status::InternalError("batch download files: invalid file size format: " +
286
0
                                         file_list[i + 1]);
287
0
        }
288
35
        file_info_list->emplace_back(std::move(file_list[i]), file_size);
289
35
    }
290
291
1
    return Status::OK();
292
1
}
293
294
Status download_files_v2(const std::string& address, const std::string& token,
295
                         const std::string& remote_dir, const std::string& local_dir,
296
1
                         const std::vector<std::pair<std::string, size_t>>& file_info_list) {
297
1
    std::string remote_url = fmt::format("http://{}/api/_tablet/_batch_download?dir={}&token={}",
298
1
                                         address, remote_dir, token);
299
300
1
    size_t batch_file_size = 0;
301
1
    std::unordered_set<std::string> expected_files;
302
1
    std::stringstream ss;
303
35
    for (const auto& file_info : file_info_list) {
304
35
        ss << file_info.first << "\n";
305
35
        batch_file_size += file_info.second;
306
35
        expected_files.insert(file_info.first);
307
35
    }
308
1
    std::string payload = ss.str();
309
310
1
    uint64_t estimate_timeout = batch_file_size / config::download_low_speed_limit_kbps / 1024;
311
1
    if (estimate_timeout < config::download_low_speed_time) {
312
1
        estimate_timeout = config::download_low_speed_time;
313
1
    }
314
315
1
    LOG(INFO) << "begin to download files from " << remote_url << " to " << local_dir
316
1
              << ", file count: " << file_info_list.size() << ", total size: " << batch_file_size
317
1
              << ", timeout: " << estimate_timeout;
318
319
1
    auto callback = [&](HttpClient* client) -> Status {
320
1
        RETURN_IF_ERROR(client->init(remote_url, false));
321
1
        client->set_method(HttpMethod::POST);
322
1
        client->set_payload(payload);
323
1
        client->set_timeout_ms(estimate_timeout * 1000);
324
1
        RETURN_IF_ERROR(client->download_multi_files(local_dir, expected_files));
325
35
        for (auto&& [file_name, file_size] : file_info_list) {
326
35
            std::string local_file_path = local_dir + "/" + file_name;
327
328
35
            std::error_code ec;
329
            // Check file length
330
35
            uint64_t local_file_size = std::filesystem::file_size(local_file_path, ec);
331
35
            if (ec) {
332
0
                LOG(WARNING) << "download file error: " << ec.message();
333
0
                return Status::IOError("can't retrive file_size of {}, due to {}", local_file_path,
334
0
                                       ec.message());
335
0
            }
336
35
            if (local_file_size != file_size) {
337
0
                LOG(WARNING) << "download file length error"
338
0
                             << ", remote_path=" << mask_token(remote_url)
339
0
                             << ", file_name=" << file_name << ", file_size=" << file_size
340
0
                             << ", local_file_size=" << local_file_size;
341
0
                return Status::InternalError("downloaded file size is not equal");
342
0
            }
343
35
            RETURN_IF_ERROR(io::global_local_filesystem()->permission(
344
35
                    local_file_path, io::LocalFileSystem::PERMS_OWNER_RW));
345
35
        }
346
347
1
        return Status::OK();
348
1
    };
349
1
    return HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, callback);
350
1
}
351
352
} // namespace doris