/root/doris/be/src/olap/utils.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/utils.h" |
19 | | |
20 | | // IWYU pragma: no_include <bthread/errno.h> |
21 | | #include <errno.h> // IWYU pragma: keep |
22 | | #include <stdarg.h> |
23 | | #include <time.h> |
24 | | #include <unistd.h> |
25 | | #include <zconf.h> |
26 | | #include <zlib.h> |
27 | | |
28 | | #include <cmath> |
29 | | #include <cstring> |
30 | | #include <memory> |
31 | | #include <regex> |
32 | | #include <set> |
33 | | #include <sstream> |
34 | | #include <string> |
35 | | #include <vector> |
36 | | |
37 | | #include "common/logging.h" |
38 | | #include "common/status.h" |
39 | | #include "io/fs/file_reader.h" |
40 | | #include "io/fs/file_writer.h" |
41 | | #include "io/fs/local_file_system.h" |
42 | | #include "olap/olap_common.h" |
43 | | #include "util/sse_util.hpp" |
44 | | #include "util/string_parser.hpp" |
45 | | #include "vec/runtime/ipv4_value.h" |
46 | | #include "vec/runtime/ipv6_value.h" |
47 | | |
48 | | namespace doris { |
49 | | using namespace ErrorCode; |
50 | | |
51 | 33 | uint32_t olap_adler32_init() { |
52 | 33 | return adler32(0L, Z_NULL, 0); |
53 | 33 | } |
54 | | |
55 | 33 | uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len) { |
56 | 33 | return adler32(adler, reinterpret_cast<const Bytef*>(buf), len); |
57 | 33 | } |
58 | | |
59 | 0 | Status gen_timestamp_string(std::string* out_string) { |
60 | 0 | time_t now = time(nullptr); |
61 | 0 | tm local_tm; |
62 | |
|
63 | 0 | if (localtime_r(&now, &local_tm) == nullptr) { |
64 | 0 | return Status::Error<OS_ERROR>("fail to localtime_r time. time={}", now); |
65 | 0 | } |
66 | 0 | char time_suffix[16] = {0}; // Example: 20150706111404's length is 15 |
67 | 0 | if (strftime(time_suffix, sizeof(time_suffix), "%Y%m%d%H%M%S", &local_tm) == 0) { |
68 | 0 | return Status::Error<OS_ERROR>("fail to strftime time. time={}", now); |
69 | 0 | } |
70 | | |
71 | 0 | *out_string = time_suffix; |
72 | 0 | return Status::OK(); |
73 | 0 | } |
74 | | |
75 | 0 | Status read_write_test_file(const std::string& test_file_path) { |
76 | 0 | if (access(test_file_path.c_str(), F_OK) == 0) { |
77 | 0 | if (remove(test_file_path.c_str()) != 0) { |
78 | 0 | char errmsg[64]; |
79 | 0 | return Status::IOError("fail to access test file. path={}, errno={}, err={}", |
80 | 0 | test_file_path, errno, strerror_r(errno, errmsg, 64)); |
81 | 0 | } |
82 | 0 | } else { |
83 | 0 | if (errno != ENOENT) { |
84 | 0 | char errmsg[64]; |
85 | 0 | return Status::IOError("fail to access test file. path={}, errno={}, err={}", |
86 | 0 | test_file_path, errno, strerror_r(errno, errmsg, 64)); |
87 | 0 | } |
88 | 0 | } |
89 | | |
90 | 0 | const size_t TEST_FILE_BUF_SIZE = 4096; |
91 | 0 | const size_t DIRECT_IO_ALIGNMENT = 512; |
92 | 0 | char* write_test_buff = nullptr; |
93 | 0 | char* read_test_buff = nullptr; |
94 | 0 | if (posix_memalign((void**)&write_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) { |
95 | 0 | return Status::Error<MEM_ALLOC_FAILED>("fail to allocate write buffer memory. size={}", |
96 | 0 | TEST_FILE_BUF_SIZE); |
97 | 0 | } |
98 | 0 | std::unique_ptr<char, decltype(&std::free)> write_buff(write_test_buff, &std::free); |
99 | 0 | if (posix_memalign((void**)&read_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) { |
100 | 0 | return Status::Error<MEM_ALLOC_FAILED>("fail to allocate read buffer memory. size={}", |
101 | 0 | TEST_FILE_BUF_SIZE); |
102 | 0 | } |
103 | 0 | std::unique_ptr<char, decltype(&std::free)> read_buff(read_test_buff, &std::free); |
104 | | // generate random numbers |
105 | 0 | uint32_t rand_seed = static_cast<uint32_t>(time(nullptr)); |
106 | 0 | for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) { |
107 | 0 | int32_t tmp_value = rand_r(&rand_seed); |
108 | 0 | write_test_buff[i] = static_cast<char>(tmp_value); |
109 | 0 | } |
110 | | |
111 | | // write file |
112 | 0 | io::FileWriterPtr file_writer; |
113 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(test_file_path, &file_writer)); |
114 | 0 | RETURN_IF_ERROR(file_writer->append({write_buff.get(), TEST_FILE_BUF_SIZE})); |
115 | 0 | RETURN_IF_ERROR(file_writer->close()); |
116 | | // read file |
117 | 0 | io::FileReaderSPtr file_reader; |
118 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->open_file(test_file_path, &file_reader)); |
119 | 0 | size_t bytes_read = 0; |
120 | 0 | RETURN_IF_ERROR(file_reader->read_at(0, {read_buff.get(), TEST_FILE_BUF_SIZE}, &bytes_read)); |
121 | 0 | if (memcmp(write_buff.get(), read_buff.get(), TEST_FILE_BUF_SIZE) != 0) { |
122 | 0 | return Status::IOError("the test file write_buf and read_buf not equal, file_name={}.", |
123 | 0 | test_file_path); |
124 | 0 | } |
125 | | // delete file |
126 | 0 | return io::global_local_filesystem()->delete_file(test_file_path); |
127 | 0 | } |
128 | | |
129 | 0 | Status check_datapath_rw(const std::string& path) { |
130 | 0 | bool exists = true; |
131 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); |
132 | 0 | if (!exists) { |
133 | 0 | return Status::IOError("path does not exist: {}", path); |
134 | 0 | } |
135 | 0 | std::string file_path = path + "/.read_write_test_file"; |
136 | 0 | return read_write_test_file(file_path); |
137 | 0 | } |
138 | | |
139 | | __thread char Errno::_buf[BUF_SIZE]; ///< buffer instance |
140 | | |
141 | 0 | const char* Errno::str() { |
142 | 0 | return str(no()); |
143 | 0 | } |
144 | | |
145 | 0 | const char* Errno::str(int no) { |
146 | 0 | if (0 != strerror_r(no, _buf, BUF_SIZE)) { |
147 | 0 | LOG(WARNING) << "fail to get errno string. [no='" << no << "', errno='" << errno << "']"; |
148 | 0 | snprintf(_buf, BUF_SIZE, "unknown errno"); |
149 | 0 | } |
150 | |
|
151 | 0 | return _buf; |
152 | 0 | } |
153 | | |
154 | 0 | int Errno::no() { |
155 | 0 | return errno; |
156 | 0 | } |
157 | | |
158 | | template <> |
159 | 4 | bool valid_signed_number<int128_t>(const std::string& value_str) { |
160 | 4 | char* endptr = nullptr; |
161 | 4 | const char* value_string = value_str.c_str(); |
162 | 4 | int64_t value = strtol(value_string, &endptr, 10); |
163 | 4 | if (*endptr != 0) { |
164 | 0 | return false; |
165 | 4 | } else if (value > LONG_MIN && value < LONG_MAX) { |
166 | 2 | return true; |
167 | 2 | } else { |
168 | 2 | bool sign = false; |
169 | 2 | if (*value_string == '-' || *value_string == '+') { |
170 | 1 | if (*(value_string++) == '-') { |
171 | 1 | sign = true; |
172 | 1 | } |
173 | 1 | } |
174 | | |
175 | 2 | uint128_t current = 0; |
176 | 2 | uint128_t max_int128 = std::numeric_limits<int128_t>::max(); |
177 | 80 | while (*value_string != 0) { |
178 | 78 | if (current > max_int128 / 10) { |
179 | 0 | return false; |
180 | 0 | } |
181 | | |
182 | 78 | current = current * 10 + (*(value_string++) - '0'); |
183 | 78 | } |
184 | | |
185 | 2 | if ((!sign && current > max_int128) || (sign && current > max_int128 + 1)) { |
186 | 2 | return false; |
187 | 2 | } |
188 | | |
189 | 0 | return true; |
190 | 2 | } |
191 | 4 | } |
192 | | |
193 | 7 | bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac) { |
194 | 7 | const char* decimal_pattern = "-?(\\d+)(.\\d+)?"; |
195 | 7 | std::regex e(decimal_pattern); |
196 | 7 | std::smatch what; |
197 | 7 | if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) { |
198 | 1 | LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]"; |
199 | 1 | return false; |
200 | 1 | } |
201 | | |
202 | 6 | size_t number_length = value_str.size(); |
203 | 6 | bool is_negative = value_str[0] == '-'; |
204 | 6 | if (is_negative) { |
205 | 2 | --number_length; |
206 | 2 | } |
207 | | |
208 | 6 | size_t integer_len = 0; |
209 | 6 | size_t fractional_len = 0; |
210 | 6 | size_t point_pos = value_str.find('.'); |
211 | 6 | if (point_pos == std::string::npos) { |
212 | 2 | integer_len = number_length; |
213 | 2 | fractional_len = 0; |
214 | 4 | } else { |
215 | 4 | integer_len = point_pos - (is_negative ? 1 : 0); |
216 | 4 | fractional_len = number_length - point_pos - 1; |
217 | 4 | } |
218 | | |
219 | | /// For value likes "0.xxxxxx", the integer_len should actually be 0. |
220 | 6 | if (integer_len == 1 && precision - frac == 0) { |
221 | 0 | if (what[1].str() == "0") { |
222 | 0 | integer_len = 0; |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | 6 | return (integer_len <= (precision - frac) && fractional_len <= frac); |
227 | 7 | } |
228 | | |
229 | 11 | bool valid_datetime(const std::string& value_str, const uint32_t scale) { |
230 | 11 | const char* datetime_pattern = |
231 | 11 | "((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*" |
232 | 11 | "(((?:\\d){2}):((?:\\d){2}):((?:\\d){2})([.]*((?:\\d){0,6})))?"; |
233 | 11 | std::regex e(datetime_pattern); |
234 | 11 | std::smatch what; |
235 | | |
236 | 11 | if (std::regex_match(value_str, what, e)) { |
237 | 9 | if (what[0].str().size() != value_str.size()) { |
238 | 0 | LOG(WARNING) << "datetime str does not fully match. [value_str=" << value_str |
239 | 0 | << " match=" << what[0].str() << "]"; |
240 | 0 | return false; |
241 | 0 | } |
242 | | |
243 | 9 | int month = strtol(what[2].str().c_str(), nullptr, 10); |
244 | 9 | if (month < 1 || month > 12) { |
245 | 2 | LOG(WARNING) << "invalid month. [month=" << month << "]"; |
246 | 2 | return false; |
247 | 2 | } |
248 | | |
249 | 7 | int day = strtol(what[3].str().c_str(), nullptr, 10); |
250 | 7 | if (day < 1 || day > 31) { |
251 | 2 | LOG(WARNING) << "invalid day. [day=" << day << "]"; |
252 | 2 | return false; |
253 | 2 | } |
254 | | |
255 | 5 | if (what[4].length()) { |
256 | 4 | int hour = strtol(what[5].str().c_str(), nullptr, 10); |
257 | 4 | if (hour < 0 || hour > 23) { |
258 | 1 | LOG(WARNING) << "invalid hour. [hour=" << hour << "]"; |
259 | 1 | return false; |
260 | 1 | } |
261 | | |
262 | 3 | int minute = strtol(what[6].str().c_str(), nullptr, 10); |
263 | 3 | if (minute < 0 || minute > 59) { |
264 | 1 | LOG(WARNING) << "invalid minute. [minute=" << minute << "]"; |
265 | 1 | return false; |
266 | 1 | } |
267 | | |
268 | 2 | int second = strtol(what[7].str().c_str(), nullptr, 10); |
269 | 2 | if (second < 0 || second > 59) { |
270 | 1 | LOG(WARNING) << "invalid second. [second=" << second << "]"; |
271 | 1 | return false; |
272 | 1 | } |
273 | 1 | if (what[8].length()) { |
274 | 0 | if (what[9].str().size() > 6) { |
275 | 0 | LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() << "]"; |
276 | 0 | return false; |
277 | 0 | } |
278 | 0 | auto s9 = what[9].str(); |
279 | 0 | s9.resize(6, '0'); |
280 | 0 | if (const long ms = strtol(s9.c_str(), nullptr, 10); |
281 | 0 | ms % static_cast<long>(std::pow(10, 6 - scale)) != 0) { |
282 | 0 | LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() |
283 | 0 | << ", scale = " << scale << "]"; |
284 | 0 | return false; |
285 | 0 | } |
286 | 0 | } |
287 | 1 | } |
288 | | |
289 | 2 | return true; |
290 | 5 | } else { |
291 | 2 | LOG(WARNING) << "datetime string does not match"; |
292 | 2 | return false; |
293 | 2 | } |
294 | 11 | } |
295 | | |
296 | 0 | bool valid_bool(const std::string& value_str) { |
297 | 0 | if (value_str == "0" || value_str == "1") { |
298 | 0 | return true; |
299 | 0 | } |
300 | 0 | StringParser::ParseResult result; |
301 | 0 | StringParser::string_to_bool(value_str.c_str(), value_str.length(), &result); |
302 | 0 | return result == StringParser::PARSE_SUCCESS; |
303 | 0 | } |
304 | | |
305 | 0 | bool valid_ipv4(const std::string& value_str) { |
306 | 0 | return IPv4Value::is_valid_string(value_str.c_str(), value_str.size()); |
307 | 0 | } |
308 | | |
309 | 0 | bool valid_ipv6(const std::string& value_str) { |
310 | 0 | return IPv6Value::is_valid_string(value_str.c_str(), value_str.size()); |
311 | 0 | } |
312 | | |
313 | | } // namespace doris |