be/src/io/fs/local_file_writer.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "io/fs/local_file_writer.h" |
19 | | |
20 | | // IWYU pragma: no_include <bthread/errno.h> |
21 | | #include <errno.h> // IWYU pragma: keep |
22 | | #include <fcntl.h> |
23 | | #include <glog/logging.h> |
24 | | #include <sys/uio.h> |
25 | | #include <unistd.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cstring> |
29 | | #include <ostream> |
30 | | #include <utility> |
31 | | |
32 | | // IWYU pragma: no_include <opentelemetry/common/threadlocal.h> |
33 | | #include "common/cast_set.h" |
34 | | #include "common/compiler_util.h" // IWYU pragma: keep |
35 | | #include "common/macros.h" |
36 | | #include "common/metrics/doris_metrics.h" |
37 | | #include "common/status.h" |
38 | | #include "cpp/sync_point.h" |
39 | | #include "io/fs/err_utils.h" |
40 | | #include "io/fs/file_writer.h" |
41 | | #include "io/fs/local_file_system.h" |
42 | | #include "io/fs/path.h" |
43 | | #include "storage/data_dir.h" |
44 | | #include "util/debug_points.h" |
45 | | |
46 | | namespace doris::io { |
47 | | #include "common/compile_check_begin.h" |
48 | | namespace { |
49 | | |
50 | 7.43k | Status sync_dir(const io::Path& dirname) { |
51 | 7.43k | TEST_SYNC_POINT_RETURN_WITH_VALUE("sync_dir", Status::IOError("")); |
52 | 7.43k | int fd; |
53 | 7.43k | RETRY_ON_EINTR(fd, ::open(dirname.c_str(), O_DIRECTORY | O_RDONLY)); |
54 | 7.43k | if (-1 == fd) { |
55 | 0 | return localfs_error(errno, fmt::format("failed to open {}", dirname.native())); |
56 | 0 | } |
57 | 7.43k | Defer defer {[fd] { ::close(fd); }}; |
58 | | #ifdef __APPLE__ |
59 | | if (fcntl(fd, F_FULLFSYNC) < 0) { |
60 | | return localfs_error(errno, fmt::format("failed to sync {}", dirname.native())); |
61 | | } |
62 | | #else |
63 | 7.43k | if (0 != ::fdatasync(fd)) { |
64 | 0 | return localfs_error(errno, fmt::format("failed to sync {}", dirname.native())); |
65 | 0 | } |
66 | 7.43k | #endif |
67 | 7.43k | return Status::OK(); |
68 | 7.43k | } |
69 | | |
70 | | } // namespace |
71 | | |
72 | | LocalFileWriter::LocalFileWriter(Path path, int fd, bool sync_data) |
73 | 12.8k | : _path(std::move(path)), _fd(fd), _sync_data(sync_data) { |
74 | 12.8k | DorisMetrics::instance()->local_file_open_writing->increment(1); |
75 | 12.8k | DorisMetrics::instance()->local_file_writer_total->increment(1); |
76 | 12.8k | } |
77 | | |
78 | 193k | size_t LocalFileWriter::bytes_appended() const { |
79 | 193k | return _bytes_appended; |
80 | 193k | } |
81 | | |
82 | 12.8k | LocalFileWriter::~LocalFileWriter() { |
83 | 12.8k | if (_state == State::OPENED) { |
84 | 45 | _abort(); |
85 | 45 | } |
86 | 12.8k | DorisMetrics::instance()->local_file_open_writing->increment(-1); |
87 | 12.8k | DorisMetrics::instance()->file_created_total->increment(1); |
88 | 12.8k | DorisMetrics::instance()->local_bytes_written_total->increment(_bytes_appended); |
89 | 12.8k | } |
90 | | |
91 | 18.3k | Status LocalFileWriter::close(bool non_block) { |
92 | 18.3k | if (_state == State::CLOSED) { |
93 | 0 | return Status::InternalError("LocalFileWriter already closed, file path {}", |
94 | 0 | _path.native()); |
95 | 0 | } |
96 | 18.3k | if (_state == State::ASYNC_CLOSING) { |
97 | 5.62k | if (non_block) { |
98 | 0 | return Status::InternalError("Don't submit async close multi times"); |
99 | 0 | } |
100 | | // Actucally the first time call to close(true) would return the value of _finalize, if it returned one |
101 | | // error status then the code would never call the second close(true) |
102 | 5.62k | _state = State::CLOSED; |
103 | 5.62k | return Status::OK(); |
104 | 5.62k | } |
105 | 12.7k | if (non_block) { |
106 | 5.63k | _state = State::ASYNC_CLOSING; |
107 | 7.13k | } else { |
108 | 7.13k | _state = State::CLOSED; |
109 | 7.13k | } |
110 | 12.7k | return _close(_sync_data); |
111 | 18.3k | } |
112 | | |
113 | 45 | void LocalFileWriter::_abort() { |
114 | 45 | auto st = _close(false); |
115 | 45 | if (!st.ok()) [[unlikely]] { |
116 | 0 | LOG(WARNING) << "close file failed when abort file writer: " << st; |
117 | 0 | } |
118 | 45 | st = io::global_local_filesystem()->delete_file(_path); |
119 | 45 | if (!st.ok()) [[unlikely]] { |
120 | 0 | LOG(WARNING) << "delete file failed when abort file writer: " << st; |
121 | 0 | } |
122 | 45 | } |
123 | | |
124 | 68.5k | Status LocalFileWriter::appendv(const Slice* data, size_t data_cnt) { |
125 | 68.5k | TEST_SYNC_POINT_RETURN_WITH_VALUE("LocalFileWriter::appendv", |
126 | 68.5k | Status::IOError("inject io error")); |
127 | 68.5k | if (_state != State::OPENED) [[unlikely]] { |
128 | 1 | return Status::InternalError("append to closed file: {}", _path.native()); |
129 | 1 | } |
130 | 68.5k | _dirty = true; |
131 | | |
132 | | // Convert the results into the iovec vector to request |
133 | | // and calculate the total bytes requested. |
134 | 68.5k | size_t bytes_req = 0; |
135 | 68.5k | std::vector<iovec> iov(data_cnt); |
136 | 258k | for (size_t i = 0; i < data_cnt; i++) { |
137 | 190k | const Slice& result = data[i]; |
138 | 190k | bytes_req += result.size; |
139 | 190k | iov[i] = {result.data, result.size}; |
140 | 190k | } |
141 | | |
142 | 68.5k | size_t completed_iov = 0; |
143 | 68.5k | size_t n_left = bytes_req; |
144 | 68.5k | while (n_left > 0) { |
145 | | // Never request more than IOV_MAX in one request. |
146 | 68.5k | size_t iov_count = std::min(data_cnt - completed_iov, static_cast<size_t>(IOV_MAX)); |
147 | 68.5k | ssize_t res; |
148 | 68.5k | RETRY_ON_EINTR(res, SYNC_POINT_HOOK_RETURN_VALUE(::writev(_fd, iov.data() + completed_iov, |
149 | 68.5k | cast_set<int32_t>(iov_count)), |
150 | 68.5k | "LocalFileWriter::writev", _fd)); |
151 | 68.5k | DBUG_EXECUTE_IF("LocalFileWriter::appendv.io_error", { |
152 | 68.5k | auto sub_path = dp->param<std::string>("sub_path", ""); |
153 | 68.5k | if ((sub_path.empty() && _path.filename().compare(kTestFilePath)) || |
154 | 68.5k | (!sub_path.empty() && _path.native().find(sub_path) != std::string::npos)) { |
155 | 68.5k | res = -1; |
156 | 68.5k | errno = EIO; |
157 | 68.5k | LOG(WARNING) << Status::IOError("debug write io error: {}", _path.native()); |
158 | 68.5k | } |
159 | 68.5k | }); |
160 | 68.5k | if (UNLIKELY(res < 0)) { |
161 | 1 | return localfs_error(errno, fmt::format("failed to write {}", _path.native())); |
162 | 1 | } |
163 | | |
164 | 68.5k | if (LIKELY(res == n_left)) { |
165 | | // All requested bytes were read. This is almost always the case. |
166 | 68.5k | n_left = 0; |
167 | 68.5k | break; |
168 | 68.5k | } |
169 | | // Adjust iovec vector based on bytes read for the next request. |
170 | 3 | ssize_t bytes_rem = res; |
171 | 5 | for (size_t i = completed_iov; i < data_cnt; i++) { |
172 | 5 | if (bytes_rem >= iov[i].iov_len) { |
173 | | // The full length of this iovec was written. |
174 | 2 | completed_iov++; |
175 | 2 | bytes_rem -= iov[i].iov_len; |
176 | 3 | } else { |
177 | | // Partially wrote this result. |
178 | | // Adjust the iov_len and iov_base to write only the missing data. |
179 | 3 | iov[i].iov_base = static_cast<uint8_t*>(iov[i].iov_base) + bytes_rem; |
180 | 3 | iov[i].iov_len -= bytes_rem; |
181 | 3 | break; // Don't need to adjust remaining iovec's. |
182 | 3 | } |
183 | 5 | } |
184 | 3 | n_left -= res; |
185 | 3 | } |
186 | 68.5k | DCHECK_EQ(0, n_left); |
187 | 68.5k | _bytes_appended += bytes_req; |
188 | 68.5k | return Status::OK(); |
189 | 68.5k | } |
190 | | |
191 | | // TODO(ByteYue): Refactor this function as FileWriter::flush() |
192 | 0 | Status LocalFileWriter::_finalize() { |
193 | 0 | TEST_SYNC_POINT_RETURN_WITH_VALUE("LocalFileWriter::finalize", |
194 | 0 | Status::IOError("inject io error")); |
195 | 0 | if (_state == State::OPENED) [[unlikely]] { |
196 | 0 | return Status::InternalError("finalize closed file: {}", _path.native()); |
197 | 0 | } |
198 | | |
199 | 0 | if (_dirty) { |
200 | 0 | #if defined(__linux__) |
201 | 0 | int flags = SYNC_FILE_RANGE_WRITE; |
202 | 0 | if (sync_file_range(_fd, 0, 0, flags) < 0) { |
203 | 0 | return localfs_error(errno, fmt::format("failed to finalize {}", _path.native())); |
204 | 0 | } |
205 | 0 | #endif |
206 | 0 | } |
207 | 0 | return Status::OK(); |
208 | 0 | } |
209 | | |
210 | 12.8k | Status LocalFileWriter::_close(bool sync) { |
211 | 20.2k | auto fd_reclaim_func = [&](Status st) { |
212 | 20.2k | if (_fd > 0 && 0 != ::close(_fd)) { |
213 | 0 | return localfs_error(errno, fmt::format("failed to {}, along with failed to close {}", |
214 | 0 | st, _path.native())); |
215 | 0 | } |
216 | 20.2k | _fd = -1; |
217 | 20.2k | return st; |
218 | 20.2k | }; |
219 | 12.8k | if (sync && config::sync_file_on_close) { |
220 | 7.43k | if (_dirty) { |
221 | | #ifdef __APPLE__ |
222 | | if (fcntl(_fd, F_FULLFSYNC) < 0) [[unlikely]] { |
223 | | return fd_reclaim_func( |
224 | | localfs_error(errno, fmt::format("failed to sync {}", _path.native()))); |
225 | | } |
226 | | #else |
227 | 6.94k | if (0 != ::fdatasync(_fd)) [[unlikely]] { |
228 | 0 | return fd_reclaim_func( |
229 | 0 | localfs_error(errno, fmt::format("failed to sync {}", _path.native()))); |
230 | 0 | } |
231 | 6.94k | #endif |
232 | 6.94k | _dirty = false; |
233 | 6.94k | } |
234 | 7.43k | RETURN_IF_ERROR(fd_reclaim_func(sync_dir(_path.parent_path()))); |
235 | 7.43k | } |
236 | | |
237 | 12.8k | DBUG_EXECUTE_IF("LocalFileWriter.close.failed", { |
238 | | // spare '.testfile' to make bad disk checker happy |
239 | 12.8k | if (_path.filename().compare(kTestFilePath)) { |
240 | 12.8k | return fd_reclaim_func( |
241 | 12.8k | Status::IOError("cannot close {}: {}", _path.native(), std::strerror(errno))); |
242 | 12.8k | } |
243 | 12.8k | }); |
244 | | |
245 | 12.8k | TEST_SYNC_POINT_RETURN_WITH_VALUE("LocalFileWriter::close", Status::IOError("inject io error")); |
246 | 12.8k | return fd_reclaim_func(Status::OK()); |
247 | 12.8k | } |
248 | | #include "common/compile_check_end.h" |
249 | | |
250 | | } // namespace doris::io |