Coverage Report

Created: 2026-03-16 14:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/service/http/action/batch_download_action.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "service/http/action/batch_download_action.h"
19
20
#include <absl/strings/str_split.h>
21
22
#include <memory>
23
#include <string>
24
#include <utility>
25
#include <vector>
26
27
#include "common/config.h"
28
#include "common/logging.h"
29
#include "common/status.h"
30
#include "io/fs/local_file_system.h"
31
#include "runtime/exec_env.h"
32
#include "service/http/http_channel.h"
33
#include "service/http/http_method.h"
34
#include "service/http/http_request.h"
35
#include "service/http/utils.h"
36
#include "util/security.h"
37
38
namespace doris {
39
namespace {
40
const std::string CHECK_PARAMETER = "check";
41
const std::string LIST_PARAMETER = "list";
42
const std::string DIR_PARAMETER = "dir";
43
const std::string TOKEN_PARAMETER = "token";
44
} // namespace
45
46
BatchDownloadAction::BatchDownloadAction(
47
        ExecEnv* exec_env, std::shared_ptr<bufferevent_rate_limit_group> rate_limit_group,
48
        const std::vector<std::string>& allow_dirs)
49
0
        : HttpHandlerWithAuth(exec_env), _rate_limit_group(std::move(rate_limit_group)) {
50
0
    for (const auto& dir : allow_dirs) {
51
0
        std::string p;
52
0
        Status st = io::global_local_filesystem()->canonicalize(dir, &p);
53
0
        if (!st.ok()) {
54
0
            continue;
55
0
        }
56
0
        _allow_paths.emplace_back(std::move(p));
57
0
    }
58
0
}
59
60
0
void BatchDownloadAction::handle(HttpRequest* req) {
61
0
    if (VLOG_CRITICAL_IS_ON) {
62
0
        VLOG_CRITICAL << "accept one batch download request " << req->debug_string();
63
0
    }
64
65
0
    if (req->param(CHECK_PARAMETER) == "true") {
66
        // For API support check
67
0
        HttpChannel::send_reply(req, "OK");
68
0
        return;
69
0
    }
70
71
    // Get 'dir' parameter, then assembly file absolute path
72
0
    const std::string& dir_path = req->param(DIR_PARAMETER);
73
0
    if (dir_path.empty()) {
74
0
        std::string error_msg =
75
0
                std::string("parameter " + DIR_PARAMETER + " not specified in url.");
76
0
        LOG(WARNING) << "handle batch download request: " << error_msg
77
0
                     << ", url: " << mask_token(req->uri());
78
0
        HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg);
79
0
        return;
80
0
    }
81
82
0
    if (dir_path.find("..") != std::string::npos) {
83
0
        std::string error_msg = "Not allowed to read relative path: " + dir_path;
84
0
        LOG(WARNING) << "handle batch download request: " << error_msg
85
0
                     << ", url: " << mask_token(req->uri());
86
0
        HttpChannel::send_reply(req, HttpStatus::FORBIDDEN, error_msg);
87
0
        return;
88
0
    }
89
90
0
    Status status;
91
0
    if (config::enable_token_check) {
92
0
        status = _check_token(req);
93
0
        if (!status.ok()) {
94
0
            std::string error_msg = status.to_string();
95
0
            if (status.is<ErrorCode::NOT_AUTHORIZED>()) {
96
0
                HttpChannel::send_reply(req, HttpStatus::UNAUTHORIZED, error_msg);
97
0
                return;
98
0
            } else {
99
0
                HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, error_msg);
100
0
                return;
101
0
            }
102
0
        }
103
0
    }
104
105
0
    status = _check_path_is_allowed(dir_path);
106
0
    if (!status.ok()) {
107
0
        std::string error_msg = status.to_string();
108
0
        if (status.is<ErrorCode::NOT_FOUND>() || status.is<ErrorCode::IO_ERROR>()) {
109
0
            HttpChannel::send_reply(req, HttpStatus::NOT_FOUND, error_msg);
110
0
            return;
111
0
        } else if (status.is<ErrorCode::NOT_AUTHORIZED>()) {
112
0
            HttpChannel::send_reply(req, HttpStatus::UNAUTHORIZED, error_msg);
113
0
            return;
114
0
        } else {
115
0
            HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, error_msg);
116
0
            return;
117
0
        }
118
0
    }
119
120
0
    bool is_dir = false;
121
0
    status = io::global_local_filesystem()->is_directory(dir_path, &is_dir);
122
0
    if (!status.ok()) {
123
0
        LOG(WARNING) << "handle batch download request: " << status.to_string()
124
0
                     << ", url: " << mask_token(req->uri());
125
0
        HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, status.to_string());
126
0
        return;
127
0
    }
128
129
0
    if (!is_dir) {
130
0
        std::string error_msg = fmt::format("The requested path is not a directory: {}", dir_path);
131
0
        LOG(WARNING) << "handle batch download request: " << error_msg
132
0
                     << ", url: " << mask_token(req->uri());
133
0
        HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg);
134
0
        return;
135
0
    }
136
137
0
    _handle(req, dir_path);
138
139
0
    VLOG_CRITICAL << "deal with batch download request finished! ";
140
0
}
141
142
0
void BatchDownloadAction::_handle(HttpRequest* req, const std::string& dir_path) {
143
0
    bool is_list_request = req->param(LIST_PARAMETER) == "true";
144
0
    if (is_list_request) {
145
        // return the list of files in the specified directory
146
0
        bool is_acquire_filesize = true;
147
0
        do_dir_response(dir_path, req, is_acquire_filesize);
148
0
    } else {
149
0
        _handle_batch_download(req, dir_path);
150
0
    }
151
0
}
152
153
0
void BatchDownloadAction::_handle_batch_download(HttpRequest* req, const std::string& dir_path) {
154
0
    std::vector<std::string> files =
155
0
            absl::StrSplit(req->get_request_body(), "\n", absl::SkipWhitespace());
156
0
    if (files.empty()) {
157
0
        std::string error_msg = "No file specified in request body.";
158
0
        LOG(WARNING) << "handle batch download request: " << error_msg
159
0
                     << ", url: " << mask_token(req->uri());
160
0
        HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg);
161
0
        return;
162
0
    }
163
164
0
    if (files.size() > 64) {
165
0
        std::string error_msg =
166
0
                "The number of files to download in a batch should be less than 64.";
167
0
        LOG(WARNING) << "handle batch download request: " << error_msg
168
0
                     << ", url: " << mask_token(req->uri());
169
0
        HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, error_msg);
170
0
        return;
171
0
    }
172
173
0
    for (const auto& file : files) {
174
0
        if (file.find('/') != std::string::npos) {
175
0
            std::string error_msg =
176
0
                    fmt::format("Not allowed to read relative path: {}, dir: {}", file, dir_path);
177
0
            LOG(WARNING) << "handle batch download request: " << error_msg
178
0
                         << ", url: " << mask_token(req->uri());
179
0
            HttpChannel::send_reply(req, HttpStatus::FORBIDDEN, error_msg);
180
0
            return;
181
0
        }
182
0
    }
183
184
0
    HttpChannel::send_files(req, dir_path, std::move(files));
185
0
}
186
187
0
Status BatchDownloadAction::_check_token(HttpRequest* req) {
188
0
    const std::string& token_str = req->param(TOKEN_PARAMETER);
189
0
    if (token_str.empty()) {
190
0
        LOG(WARNING) << "token is not specified in request. url: " << mask_token(req->uri());
191
0
        return Status::NotAuthorized("token is not specified.");
192
0
    }
193
194
0
    const std::string& local_token = _exec_env->token();
195
0
    if (token_str != local_token) {
196
0
        LOG(WARNING) << "invalid download token: " << mask_token(token_str)
197
0
                     << ", local token: " << mask_token(local_token)
198
0
                     << ", url: " << mask_token(req->uri());
199
0
        return Status::NotAuthorized("invalid token {}", mask_token(token_str));
200
0
    }
201
202
0
    return Status::OK();
203
0
}
204
205
0
Status BatchDownloadAction::_check_path_is_allowed(const std::string& file_path) {
206
0
    std::string canonical_file_path;
207
0
    RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(file_path, &canonical_file_path));
208
0
    for (auto& allow_path : _allow_paths) {
209
0
        if (io::LocalFileSystem::contain_path(allow_path, canonical_file_path)) {
210
0
            return Status::OK();
211
0
        }
212
0
    }
213
214
0
    return Status::NotAuthorized("file path is not allowed: {}", canonical_file_path);
215
0
}
216
217
} // end namespace doris