be/src/runtime/small_file_mgr.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "runtime/small_file_mgr.h" |
19 | | |
20 | | // IWYU pragma: no_include <bthread/errno.h> |
21 | | #include <absl/strings/str_split.h> |
22 | | #include <errno.h> // IWYU pragma: keep |
23 | | #include <gen_cpp/HeartbeatService_types.h> |
24 | | #include <gen_cpp/Types_types.h> |
25 | | #include <glog/logging.h> |
26 | | #include <stdint.h> |
27 | | #include <stdio.h> |
28 | | #include <unistd.h> |
29 | | |
30 | | #include <cstring> |
31 | | #include <memory> |
32 | | #include <sstream> |
33 | | #include <utility> |
34 | | #include <vector> |
35 | | |
36 | | #include "common/metrics/doris_metrics.h" |
37 | | #include "common/metrics/metrics.h" |
38 | | #include "common/status.h" |
39 | | #include "io/fs/file_system.h" |
40 | | #include "io/fs/local_file_system.h" |
41 | | #include "runtime/exec_env.h" |
42 | | #include "service/http/http_client.h" |
43 | | #include "util/md5.h" |
44 | | #include "util/string_util.h" |
45 | | |
46 | | namespace doris { |
47 | | |
48 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(small_file_cache_count, MetricUnit::NOUNIT); |
49 | | |
50 | | SmallFileMgr::SmallFileMgr(ExecEnv* env, const std::string& local_path) |
51 | 0 | : _exec_env(env), _local_path(local_path) { |
52 | 0 | REGISTER_HOOK_METRIC(small_file_cache_count, [this]() { |
53 | | // std::lock_guard<std::mutex> l(_lock); |
54 | 0 | return _file_cache.size(); |
55 | 0 | }); |
56 | 0 | } |
57 | | |
58 | 0 | SmallFileMgr::~SmallFileMgr() { |
59 | 0 | DEREGISTER_HOOK_METRIC(small_file_cache_count); |
60 | 0 | } |
61 | | |
62 | 0 | Status SmallFileMgr::init() { |
63 | 0 | RETURN_IF_ERROR(_load_local_files()); |
64 | 0 | return Status::OK(); |
65 | 0 | } |
66 | | |
67 | 0 | Status SmallFileMgr::_load_local_files() { |
68 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(_local_path)); |
69 | | |
70 | 0 | auto scan_cb = [this](const io::FileInfo& file) { |
71 | 0 | if (!file.is_file) { |
72 | 0 | return true; |
73 | 0 | } |
74 | 0 | auto st = _load_single_file(_local_path, file.file_name); |
75 | 0 | if (!st.ok()) { |
76 | 0 | LOG(WARNING) << "load small file failed: " << st; |
77 | 0 | } |
78 | 0 | return true; |
79 | 0 | }; |
80 | |
|
81 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->iterate_directory(_local_path, scan_cb)); |
82 | 0 | return Status::OK(); |
83 | 0 | } |
84 | | |
85 | 0 | Status SmallFileMgr::_load_single_file(const std::string& path, const std::string& file_name) { |
86 | | // file name format should be like: |
87 | | // file_id.md5 |
88 | 0 | std::vector<std::string> parts = absl::StrSplit(file_name, "."); |
89 | 0 | if (parts.size() != 2) { |
90 | 0 | return Status::InternalError("Not a valid file name: {}", file_name); |
91 | 0 | } |
92 | 0 | int64_t file_id = std::stol(parts[0]); |
93 | 0 | std::string md5 = parts[1]; |
94 | |
|
95 | 0 | if (_file_cache.find(file_id) != _file_cache.end()) { |
96 | 0 | return Status::InternalError("File with same id is already been loaded: {}", file_id); |
97 | 0 | } |
98 | | |
99 | 0 | std::string file_md5; |
100 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->md5sum(path + "/" + file_name, &file_md5)); |
101 | 0 | if (file_md5 != md5) { |
102 | 0 | return Status::InternalError("Invalid md5 of file: {}", file_name); |
103 | 0 | } |
104 | | |
105 | 0 | CacheEntry entry; |
106 | 0 | entry.path = path + "/" + file_name; |
107 | 0 | entry.md5 = file_md5; |
108 | |
|
109 | 0 | _file_cache.emplace(file_id, entry); |
110 | 0 | return Status::OK(); |
111 | 0 | } |
112 | | |
113 | 0 | Status SmallFileMgr::get_file(int64_t file_id, const std::string& md5, std::string* file_path) { |
114 | 0 | std::unique_lock<std::mutex> l(_lock); |
115 | | // find in cache |
116 | 0 | auto it = _file_cache.find(file_id); |
117 | 0 | if (it != _file_cache.end()) { |
118 | | // find the cached file, check it |
119 | 0 | CacheEntry& entry = it->second; |
120 | 0 | Status st = _check_file(entry, md5); |
121 | 0 | if (!st.ok()) { |
122 | | // check file failed, we should remove this cache and download it from FE again |
123 | 0 | if (remove(entry.path.c_str()) != 0) { |
124 | 0 | return Status::InternalError("failed to remove file: {}, err: {}", file_id, |
125 | 0 | std::strerror(errno)); |
126 | 0 | } |
127 | 0 | _file_cache.erase(it); |
128 | 0 | } else { |
129 | | // check ok, return the path |
130 | 0 | *file_path = entry.path; |
131 | 0 | return Status::OK(); |
132 | 0 | } |
133 | 0 | } |
134 | | |
135 | | // file not found in cache. download it from FE |
136 | 0 | RETURN_IF_ERROR(_download_file(file_id, md5, file_path)); |
137 | | |
138 | 0 | return Status::OK(); |
139 | 0 | } |
140 | | |
141 | 0 | Status SmallFileMgr::_check_file(const CacheEntry& entry, const std::string& md5) { |
142 | 0 | bool exists; |
143 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(entry.path, &exists)); |
144 | 0 | if (!exists) { |
145 | 0 | return Status::InternalError("file not exist: {}", entry.path); |
146 | 0 | } |
147 | 0 | if (!iequal(md5, entry.md5)) { |
148 | 0 | return Status::InternalError("invalid MD5 of file: {}", entry.path); |
149 | 0 | } |
150 | 0 | return Status::OK(); |
151 | 0 | } |
152 | | |
153 | | Status SmallFileMgr::_download_file(int64_t file_id, const std::string& md5, |
154 | 0 | std::string* file_path) { |
155 | 0 | std::stringstream ss; |
156 | 0 | ss << _local_path << "/" << file_id << ".tmp"; |
157 | 0 | std::string tmp_file = ss.str(); |
158 | 0 | bool should_delete = true; |
159 | 0 | auto fp_closer = [&tmp_file, &should_delete](FILE* fp) { |
160 | 0 | fclose(fp); |
161 | 0 | if (should_delete) remove(tmp_file.c_str()); |
162 | 0 | }; |
163 | |
|
164 | 0 | std::unique_ptr<FILE, decltype(fp_closer)> fp(fopen(tmp_file.c_str(), "w"), fp_closer); |
165 | 0 | if (fp == nullptr) { |
166 | 0 | LOG(WARNING) << "fail to open file, file=" << tmp_file; |
167 | 0 | return Status::InternalError("fail to open file"); |
168 | 0 | } |
169 | | |
170 | 0 | ClusterInfo* cluster_info = _exec_env->cluster_info(); |
171 | | // Small file download is the only BE→FE path that uses HTTP (not Thrift/RPC). |
172 | | // master_fe_http_port is set to https_port when enable_https=true (see HeartbeatMgr). |
173 | | // The ~1ms fallback overhead is acceptable; small file downloads are infrequent. |
174 | 0 | const std::string host_port = cluster_info->master_fe_addr.hostname + ":" + |
175 | 0 | std::to_string(cluster_info->master_fe_http_port); |
176 | 0 | const std::string query = "/api/get_small_file?file_id=" + std::to_string(file_id) + |
177 | 0 | "&token=" + cluster_info->token; |
178 | |
|
179 | 0 | Status status; |
180 | 0 | Md5Digest digest; |
181 | 0 | auto download_cb = [&status, &tmp_file, &fp, &digest](const void* data, size_t length) { |
182 | 0 | digest.update(data, length); |
183 | 0 | auto res = fwrite(data, length, 1, fp.get()); |
184 | 0 | if (res != 1) { |
185 | 0 | LOG(WARNING) << "fail to write data to file, file=" << tmp_file |
186 | 0 | << ", error=" << ferror(fp.get()); |
187 | 0 | status = Status::InternalError("fail to write data when download"); |
188 | 0 | return false; |
189 | 0 | } |
190 | 0 | return true; |
191 | 0 | }; |
192 | |
|
193 | 0 | std::string url = "http://" + host_port + query; |
194 | 0 | LOG(INFO) << "download file from: " << url; |
195 | 0 | HttpClient client; |
196 | 0 | RETURN_IF_ERROR(client.init(url)); |
197 | 0 | Status execute_status = client.execute(download_cb); |
198 | |
|
199 | 0 | if (!execute_status.ok()) { |
200 | 0 | rewind(fp.get()); |
201 | 0 | if (ftruncate(fileno(fp.get()), 0) != 0) { |
202 | 0 | LOG(WARNING) << "fail to truncate temp file for https retry, errno=" << errno; |
203 | 0 | } |
204 | 0 | status = Status::OK(); |
205 | 0 | digest = Md5Digest(); |
206 | |
|
207 | 0 | url = "https://" + host_port + query; |
208 | 0 | LOG(INFO) << "HTTP failed, retrying with HTTPS: " << url; |
209 | 0 | HttpClient https_client; |
210 | 0 | RETURN_IF_ERROR(https_client.init(url)); |
211 | | // Skip TLS cert verification: internal cluster traffic only; file integrity |
212 | | // is guaranteed independently by MD5 checksum verification below. |
213 | 0 | https_client.use_untrusted_ssl(); |
214 | 0 | execute_status = https_client.execute(download_cb); |
215 | 0 | } |
216 | | |
217 | 0 | RETURN_IF_ERROR(execute_status); |
218 | 0 | RETURN_IF_ERROR(status); |
219 | 0 | digest.digest(); |
220 | |
|
221 | 0 | if (!iequal(digest.hex(), md5)) { |
222 | 0 | LOG(WARNING) << "file's checksum is not equal, download: " << digest.hex() |
223 | 0 | << ", expected: " << md5 << ", file: " << file_id; |
224 | 0 | return Status::InternalError("download with invalid md5"); |
225 | 0 | } |
226 | | |
227 | | // close this file |
228 | 0 | should_delete = false; |
229 | 0 | fp.reset(); |
230 | | |
231 | | // rename temporary file to library file |
232 | 0 | std::stringstream real_ss; |
233 | 0 | real_ss << _local_path << "/" << file_id << "." << md5; |
234 | 0 | std::string real_file_path = real_ss.str(); |
235 | 0 | auto ret = rename(tmp_file.c_str(), real_file_path.c_str()); |
236 | 0 | if (ret != 0) { |
237 | 0 | char buf[64]; |
238 | 0 | LOG(WARNING) << "fail to rename file from=" << tmp_file << ", to=" << real_file_path |
239 | 0 | << ", errno=" << errno << ", errmsg=" << strerror_r(errno, buf, 64); |
240 | 0 | remove(tmp_file.c_str()); |
241 | 0 | remove(real_file_path.c_str()); |
242 | 0 | return Status::InternalError("fail to rename file"); |
243 | 0 | } |
244 | | |
245 | | // add to file cache |
246 | 0 | CacheEntry entry; |
247 | 0 | entry.path = real_file_path; |
248 | 0 | entry.md5 = md5; |
249 | 0 | _file_cache.emplace(file_id, entry); |
250 | |
|
251 | 0 | *file_path = real_file_path; |
252 | |
|
253 | | LOG(INFO) << "finished to download file: " << file_path; |
254 | 0 | return Status::OK(); |
255 | 0 | } |
256 | | |
257 | | } // end namespace doris |