/root/doris/be/src/olap/utils.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/utils.h" |
19 | | |
20 | | // IWYU pragma: no_include <bthread/errno.h> |
21 | | #include <errno.h> // IWYU pragma: keep |
22 | | #include <stdarg.h> |
23 | | #include <time.h> |
24 | | #include <unistd.h> |
25 | | #include <zconf.h> |
26 | | #include <zlib.h> |
27 | | |
28 | | #include <cmath> |
29 | | #include <cstring> |
30 | | #include <memory> |
31 | | #include <regex> |
32 | | #include <set> |
33 | | #include <sstream> |
34 | | #include <string> |
35 | | #include <vector> |
36 | | |
37 | | #include "common/logging.h" |
38 | | #include "common/status.h" |
39 | | #include "io/fs/file_reader.h" |
40 | | #include "io/fs/file_writer.h" |
41 | | #include "io/fs/local_file_system.h" |
42 | | #include "olap/olap_common.h" |
43 | | #include "util/sse_util.hpp" |
44 | | #include "util/string_parser.hpp" |
45 | | #include "vec/runtime/ipv4_value.h" |
46 | | #include "vec/runtime/ipv6_value.h" |
47 | | |
48 | | namespace doris { |
49 | | #include "common/compile_check_begin.h" |
50 | | using namespace ErrorCode; |
51 | | |
52 | 3.16k | uint32_t olap_adler32_init() { |
53 | 3.16k | return (uint32_t)adler32(0, Z_NULL, 0); |
54 | 3.16k | } |
55 | | |
56 | 3.16k | uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len) { |
57 | 3.16k | return (uint32_t)adler32(adler, reinterpret_cast<const Bytef*>(buf), (uint32_t)len); |
58 | 3.16k | } |
59 | | |
60 | 2 | Status gen_timestamp_string(std::string* out_string) { |
61 | 2 | time_t now = time(nullptr); |
62 | 2 | tm local_tm; |
63 | | |
64 | 2 | if (localtime_r(&now, &local_tm) == nullptr) { |
65 | 0 | return Status::Error<OS_ERROR>("fail to localtime_r time. time={}", now); |
66 | 0 | } |
67 | 2 | char time_suffix[16] = {0}; // Example: 20150706111404's length is 15 |
68 | 2 | if (strftime(time_suffix, sizeof(time_suffix), "%Y%m%d%H%M%S", &local_tm) == 0) { |
69 | 0 | return Status::Error<OS_ERROR>("fail to strftime time. time={}", now); |
70 | 0 | } |
71 | | |
72 | 2 | *out_string = time_suffix; |
73 | 2 | return Status::OK(); |
74 | 2 | } |
75 | | |
76 | 573 | Status read_write_test_file(const std::string& test_file_path) { |
77 | 573 | if (access(test_file_path.c_str(), F_OK) == 0) { |
78 | 0 | if (remove(test_file_path.c_str()) != 0) { |
79 | 0 | char errmsg[64]; |
80 | 0 | return Status::IOError("fail to access test file. path={}, errno={}, err={}", |
81 | 0 | test_file_path, errno, strerror_r(errno, errmsg, 64)); |
82 | 0 | } |
83 | 573 | } else { |
84 | 573 | if (errno != ENOENT) { |
85 | 0 | char errmsg[64]; |
86 | 0 | return Status::IOError("fail to access test file. path={}, errno={}, err={}", |
87 | 0 | test_file_path, errno, strerror_r(errno, errmsg, 64)); |
88 | 0 | } |
89 | 573 | } |
90 | | |
91 | 573 | const size_t TEST_FILE_BUF_SIZE = 4096; |
92 | 573 | const size_t DIRECT_IO_ALIGNMENT = 512; |
93 | 573 | char* write_test_buff = nullptr; |
94 | 573 | char* read_test_buff = nullptr; |
95 | 573 | if (posix_memalign((void**)&write_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) { |
96 | 0 | return Status::Error<MEM_ALLOC_FAILED>("fail to allocate write buffer memory. size={}", |
97 | 0 | TEST_FILE_BUF_SIZE); |
98 | 0 | } |
99 | 573 | std::unique_ptr<char, decltype(&std::free)> write_buff(write_test_buff, &std::free); |
100 | 573 | if (posix_memalign((void**)&read_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) { |
101 | 0 | return Status::Error<MEM_ALLOC_FAILED>("fail to allocate read buffer memory. size={}", |
102 | 0 | TEST_FILE_BUF_SIZE); |
103 | 0 | } |
104 | 573 | std::unique_ptr<char, decltype(&std::free)> read_buff(read_test_buff, &std::free); |
105 | | // generate random numbers |
106 | 573 | uint32_t rand_seed = static_cast<uint32_t>(time(nullptr)); |
107 | 2.34M | for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) { |
108 | 2.34M | int32_t tmp_value = rand_r(&rand_seed); |
109 | 2.34M | write_test_buff[i] = static_cast<char>(tmp_value); |
110 | 2.34M | } |
111 | | |
112 | | // write file |
113 | 573 | io::FileWriterPtr file_writer; |
114 | 573 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(test_file_path, &file_writer)); |
115 | 573 | RETURN_IF_ERROR(file_writer->append({write_buff.get(), TEST_FILE_BUF_SIZE})); |
116 | 573 | RETURN_IF_ERROR(file_writer->close()); |
117 | | // read file |
118 | 573 | io::FileReaderSPtr file_reader; |
119 | 573 | RETURN_IF_ERROR(io::global_local_filesystem()->open_file(test_file_path, &file_reader)); |
120 | 573 | size_t bytes_read = 0; |
121 | 573 | RETURN_IF_ERROR(file_reader->read_at(0, {read_buff.get(), TEST_FILE_BUF_SIZE}, &bytes_read)); |
122 | 573 | if (memcmp(write_buff.get(), read_buff.get(), TEST_FILE_BUF_SIZE) != 0) { |
123 | 0 | return Status::IOError("the test file write_buf and read_buf not equal, file_name={}.", |
124 | 0 | test_file_path); |
125 | 0 | } |
126 | | // delete file |
127 | 573 | return io::global_local_filesystem()->delete_file(test_file_path); |
128 | 573 | } |
129 | | |
130 | 22 | Status check_datapath_rw(const std::string& path) { |
131 | 22 | bool exists = true; |
132 | 22 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); |
133 | 22 | if (!exists) { |
134 | 0 | return Status::IOError("path does not exist: {}", path); |
135 | 0 | } |
136 | 22 | std::string file_path = path + "/.read_write_test_file"; |
137 | 22 | return read_write_test_file(file_path); |
138 | 22 | } |
139 | | |
140 | | __thread char Errno::_buf[BUF_SIZE]; ///< buffer instance |
141 | | |
142 | 0 | const char* Errno::str() { |
143 | 0 | return str(no()); |
144 | 0 | } |
145 | | |
146 | 0 | const char* Errno::str(int no) { |
147 | 0 | if (0 != strerror_r(no, _buf, BUF_SIZE)) { |
148 | 0 | LOG(WARNING) << "fail to get errno string. [no='" << no << "', errno='" << errno << "']"; |
149 | 0 | snprintf(_buf, BUF_SIZE, "unknown errno"); |
150 | 0 | } |
151 | |
|
152 | 0 | return _buf; |
153 | 0 | } |
154 | | |
155 | 0 | int Errno::no() { |
156 | 0 | return errno; |
157 | 0 | } |
158 | | |
159 | | template <> |
160 | 161 | bool valid_signed_number<int128_t>(const std::string& value_str) { |
161 | 161 | char* endptr = nullptr; |
162 | 161 | const char* value_string = value_str.c_str(); |
163 | 161 | int64_t value = strtol(value_string, &endptr, 10); |
164 | 161 | if (*endptr != 0) { |
165 | 0 | return false; |
166 | 161 | } else if (value > LONG_MIN && value < LONG_MAX) { |
167 | 158 | return true; |
168 | 158 | } else { |
169 | 3 | bool sign = false; |
170 | 3 | if (*value_string == '-' || *value_string == '+') { |
171 | 1 | if (*(value_string++) == '-') { |
172 | 1 | sign = true; |
173 | 1 | } |
174 | 1 | } |
175 | | |
176 | 3 | uint128_t current = 0; |
177 | 3 | uint128_t max_int128 = std::numeric_limits<int128_t>::max(); |
178 | 81 | while (*value_string != 0) { |
179 | 78 | if (current > max_int128 / 10) { |
180 | 0 | return false; |
181 | 0 | } |
182 | | |
183 | 78 | current = current * 10 + (*(value_string++) - '0'); |
184 | 78 | } |
185 | | |
186 | 3 | if ((!sign && current > max_int128) || (sign && current > max_int128 + 1)) { |
187 | 2 | return false; |
188 | 2 | } |
189 | | |
190 | 1 | return true; |
191 | 3 | } |
192 | 161 | } |
193 | | |
194 | 34 | bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac) { |
195 | 34 | const char* decimal_pattern = "-?(\\d+)(.\\d+)?"; |
196 | 34 | std::regex e(decimal_pattern); |
197 | 34 | std::smatch what; |
198 | 34 | if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) { |
199 | 1 | LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]"; |
200 | 1 | return false; |
201 | 1 | } |
202 | | |
203 | 33 | size_t number_length = value_str.size(); |
204 | 33 | bool is_negative = value_str[0] == '-'; |
205 | 33 | if (is_negative) { |
206 | 10 | --number_length; |
207 | 10 | } |
208 | | |
209 | 33 | size_t integer_len = 0; |
210 | 33 | size_t fractional_len = 0; |
211 | 33 | size_t point_pos = value_str.find('.'); |
212 | 33 | if (point_pos == std::string::npos) { |
213 | 2 | integer_len = number_length; |
214 | 2 | fractional_len = 0; |
215 | 31 | } else { |
216 | 31 | integer_len = point_pos - (is_negative ? 1 : 0); |
217 | 31 | fractional_len = number_length - point_pos - 1; |
218 | 31 | } |
219 | | |
220 | | /// For value likes "0.xxxxxx", the integer_len should actually be 0. |
221 | 33 | if (integer_len == 1 && precision - frac == 0) { |
222 | 8 | if (what[1].str() == "0") { |
223 | 8 | integer_len = 0; |
224 | 8 | } |
225 | 8 | } |
226 | | |
227 | 33 | return (integer_len <= (precision - frac) && fractional_len <= frac); |
228 | 34 | } |
229 | | |
230 | 116 | bool valid_datetime(const std::string& value_str, const uint32_t scale) { |
231 | 116 | const char* datetime_pattern = |
232 | 116 | "((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*" |
233 | 116 | "(((?:\\d){2}):((?:\\d){2}):((?:\\d){2})([.]*((?:\\d){0,6})))?"; |
234 | 116 | std::regex e(datetime_pattern); |
235 | 116 | std::smatch what; |
236 | | |
237 | 116 | if (std::regex_match(value_str, what, e)) { |
238 | 111 | if (what[0].str().size() != value_str.size()) { |
239 | 0 | LOG(WARNING) << "datetime str does not fully match. [value_str=" << value_str |
240 | 0 | << " match=" << what[0].str() << "]"; |
241 | 0 | return false; |
242 | 0 | } |
243 | | |
244 | 111 | int64_t month = strtol(what[2].str().c_str(), nullptr, 10); |
245 | 111 | if (month < 1 || month > 12) { |
246 | 2 | LOG(WARNING) << "invalid month. [month=" << month << "]"; |
247 | 2 | return false; |
248 | 2 | } |
249 | | |
250 | 109 | int64_t day = strtol(what[3].str().c_str(), nullptr, 10); |
251 | 111 | if (day < 1 || day > 31) { |
252 | 2 | LOG(WARNING) << "invalid day. [day=" << day << "]"; |
253 | 2 | return false; |
254 | 2 | } |
255 | | |
256 | 107 | if (what[4].length()) { |
257 | 65 | int64_t hour = strtol(what[5].str().c_str(), nullptr, 10); |
258 | 65 | if (hour < 0 || hour > 23) { |
259 | 1 | LOG(WARNING) << "invalid hour. [hour=" << hour << "]"; |
260 | 1 | return false; |
261 | 1 | } |
262 | | |
263 | 64 | int64_t minute = strtol(what[6].str().c_str(), nullptr, 10); |
264 | 66 | if (minute < 0 || minute > 59) { |
265 | 1 | LOG(WARNING) << "invalid minute. [minute=" << minute << "]"; |
266 | 1 | return false; |
267 | 1 | } |
268 | | |
269 | 63 | int64_t second = strtol(what[7].str().c_str(), nullptr, 10); |
270 | 63 | if (second < 0 || second > 59) { |
271 | 1 | LOG(WARNING) << "invalid second. [second=" << second << "]"; |
272 | 1 | return false; |
273 | 1 | } |
274 | 62 | if (what[8].length()) { |
275 | 14 | if (what[9].str().size() > 6) { |
276 | 0 | LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() << "]"; |
277 | 0 | return false; |
278 | 0 | } |
279 | 14 | auto s9 = what[9].str(); |
280 | 14 | s9.resize(6, '0'); |
281 | 14 | if (const long ms = strtol(s9.c_str(), nullptr, 10); |
282 | 14 | ms % static_cast<long>(std::pow(10, 6 - scale)) != 0) { |
283 | 0 | LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() |
284 | 0 | << ", scale = " << scale << "]"; |
285 | 0 | return false; |
286 | 0 | } |
287 | 14 | } |
288 | 62 | } |
289 | | |
290 | 104 | return true; |
291 | 107 | } else { |
292 | 5 | LOG(WARNING) << "datetime string does not match"; |
293 | 5 | return false; |
294 | 5 | } |
295 | 116 | } |
296 | | |
297 | 5 | bool valid_bool(const std::string& value_str) { |
298 | 5 | if (value_str == "0" || value_str == "1") { |
299 | 5 | return true; |
300 | 5 | } |
301 | 0 | StringParser::ParseResult result; |
302 | 0 | StringParser::string_to_bool(value_str.c_str(), value_str.length(), &result); |
303 | 0 | return result == StringParser::PARSE_SUCCESS; |
304 | 5 | } |
305 | | |
306 | 4 | bool valid_ipv4(const std::string& value_str) { |
307 | 4 | return IPv4Value::is_valid_string(value_str.c_str(), value_str.size()); |
308 | 4 | } |
309 | | |
310 | 4 | bool valid_ipv6(const std::string& value_str) { |
311 | 4 | return IPv6Value::is_valid_string(value_str.c_str(), value_str.size()); |
312 | 4 | } |
313 | | #include "common/compile_check_end.h" |
314 | | } // namespace doris |