/root/doris/be/src/olap/utils.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | // IWYU pragma: no_include <bthread/errno.h> |
21 | | #include <errno.h> // IWYU pragma: keep |
22 | | #include <limits.h> |
23 | | #include <stdint.h> |
24 | | #include <sys/time.h> |
25 | | |
26 | | #include <cstdio> |
27 | | #include <cstdlib> |
28 | | #include <iterator> |
29 | | #include <limits> |
30 | | #include <string> |
31 | | #include <vector> |
32 | | |
33 | | #include "common/status.h" |
34 | | #include "olap/olap_common.h" |
35 | | |
36 | | namespace doris { |
37 | | void write_log_info(char* buf, size_t buf_len, const char* fmt, ...); |
38 | | static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__"; |
39 | | static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__"; |
40 | | static const std::string VERSION_COL = "__DORIS_VERSION_COL__"; |
41 | | |
42 | | // 用来加速运算 |
43 | | const static int32_t g_power_table[] = {1, 10, 100, 1000, 10000, |
44 | | 100000, 1000000, 10000000, 100000000, 1000000000}; |
45 | | |
46 | | // 计时工具,用于确定一段代码执行的时间,用于性能调优 |
47 | | class OlapStopWatch { |
48 | | public: |
49 | 73 | uint64_t get_elapse_time_us() const { |
50 | 73 | struct timeval now; |
51 | 73 | gettimeofday(&now, nullptr); |
52 | 73 | return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 + |
53 | 73 | (now.tv_usec - _begin_time.tv_usec)); |
54 | 73 | } |
55 | | |
56 | 0 | double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; } |
57 | | |
58 | 67 | void reset() { gettimeofday(&_begin_time, nullptr); } |
59 | | |
60 | 67 | OlapStopWatch() { reset(); } |
61 | | |
62 | | private: |
63 | | struct timeval _begin_time; // 起始时间戳 |
64 | | }; |
65 | | |
66 | | // @brief 切分字符串 |
67 | | // @param base 原串 |
68 | | // @param separator 分隔符 |
69 | | // @param result 切分结果 |
70 | | template <typename T> |
71 | 0 | Status split_string(const std::string& base, const T separator, std::vector<std::string>* result) { |
72 | 0 | if (!result) { |
73 | 0 | return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input"); |
74 | 0 | } |
75 | | |
76 | | // 处理base为空的情况 |
77 | | // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况 |
78 | 0 | if (base.size() == 0) { |
79 | 0 | result->push_back(""); |
80 | 0 | return Status::OK(); |
81 | 0 | } |
82 | | |
83 | 0 | size_t offset = 0; |
84 | 0 | while (offset < base.length()) { |
85 | 0 | size_t next = base.find(separator, offset); |
86 | 0 | if (next == std::string::npos) { |
87 | 0 | result->push_back(base.substr(offset)); |
88 | 0 | break; |
89 | 0 | } else { |
90 | 0 | result->push_back(base.substr(offset, next - offset)); |
91 | 0 | offset = next + 1; |
92 | 0 | } |
93 | 0 | } |
94 | |
|
95 | 0 | return Status::OK(); |
96 | 0 | } |
97 | | |
98 | | uint32_t olap_adler32_init(); |
99 | | uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len); |
100 | | |
101 | | // 获取系统当前时间,并将时间转换为字符串 |
102 | | Status gen_timestamp_string(std::string* out_string); |
103 | | |
104 | | // iterator offset,用于二分查找 |
105 | | using iterator_offset_t = size_t; |
106 | | |
107 | | class BinarySearchIterator : public std::iterator_traits<iterator_offset_t*> { |
108 | | public: |
109 | 0 | BinarySearchIterator() : _offset(0u) {} |
110 | 0 | explicit BinarySearchIterator(iterator_offset_t offset) : _offset(offset) {} |
111 | | |
112 | 0 | iterator_offset_t operator*() const { return _offset; } |
113 | | |
114 | 0 | BinarySearchIterator& operator++() { |
115 | 0 | ++_offset; |
116 | 0 | return *this; |
117 | 0 | } |
118 | | |
119 | 0 | BinarySearchIterator& operator--() { |
120 | 0 | --_offset; |
121 | 0 | return *this; |
122 | 0 | } |
123 | | |
124 | 0 | BinarySearchIterator& operator-=(size_t step) { |
125 | 0 | _offset = _offset - step; |
126 | 0 | return *this; |
127 | 0 | } |
128 | | |
129 | 0 | BinarySearchIterator& operator+=(size_t step) { |
130 | 0 | _offset = _offset + step; |
131 | 0 | return *this; |
132 | 0 | } |
133 | | |
134 | 0 | bool operator!=(const BinarySearchIterator& iterator) { |
135 | 0 | return this->_offset != iterator._offset; |
136 | 0 | } |
137 | | |
138 | | private: |
139 | | iterator_offset_t _offset; |
140 | | }; |
141 | | |
142 | | int operator-(const BinarySearchIterator& left, const BinarySearchIterator& right); |
143 | | |
144 | | // 不用sse4指令的crc32c的计算函数 |
145 | | unsigned int crc32c_lut(char const* b, unsigned int off, unsigned int len, unsigned int crc); |
146 | | |
147 | | Status check_datapath_rw(const std::string& path); |
148 | | |
149 | | Status read_write_test_file(const std::string& test_file_path); |
150 | | |
151 | | //转换两个list |
152 | | template <typename T1, typename T2> |
153 | | void static_cast_assign_vector(std::vector<T1>* v1, const std::vector<T2>& v2) { |
154 | | if (nullptr != v1) { |
155 | | //GCC3.4的模板展开貌似有问题, 这里如果使用迭代器会编译失败 |
156 | | for (size_t i = 0; i < v2.size(); i++) { |
157 | | v1->push_back(static_cast<T1>(v2[i])); |
158 | | } |
159 | | } |
160 | | } |
161 | | |
162 | | // 打印Errno |
163 | | class Errno { |
164 | | public: |
165 | | // 返回Errno对应的错误信息,线程安全 |
166 | | static const char* str(); |
167 | | static const char* str(int no); |
168 | | static int no(); |
169 | | |
170 | | private: |
171 | | static const int BUF_SIZE = 256; |
172 | | static __thread char _buf[BUF_SIZE]; |
173 | | }; |
174 | | |
175 | | #define ENDSWITH(str, suffix) ((str).rfind(suffix) == (str).size() - strlen(suffix)) |
176 | | |
177 | | // 检查int8_t, int16_t, int32_t, int64_t的值是否溢出 |
178 | | template <typename T> |
179 | 89 | bool valid_signed_number(const std::string& value_str) { |
180 | 89 | char* endptr = nullptr; |
181 | 89 | errno = 0; |
182 | 89 | int64_t value = strtol(value_str.c_str(), &endptr, 10); |
183 | | |
184 | 89 | if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || |
185 | 89 | (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') { |
186 | 2 | return false; |
187 | 2 | } |
188 | | |
189 | 87 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { |
190 | 6 | return false; |
191 | 6 | } |
192 | | |
193 | 81 | return true; |
194 | 87 | } _ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 179 | 11 | bool valid_signed_number(const std::string& value_str) { | 180 | 11 | char* endptr = nullptr; | 181 | 11 | errno = 0; | 182 | 11 | int64_t value = strtol(value_str.c_str(), &endptr, 10); | 183 | | | 184 | 11 | if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || | 185 | 11 | (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') { | 186 | 0 | return false; | 187 | 0 | } | 188 | | | 189 | 11 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { | 190 | 2 | return false; | 191 | 2 | } | 192 | | | 193 | 9 | return true; | 194 | 11 | } |
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 179 | 13 | bool valid_signed_number(const std::string& value_str) { | 180 | 13 | char* endptr = nullptr; | 181 | 13 | errno = 0; | 182 | 13 | int64_t value = strtol(value_str.c_str(), &endptr, 10); | 183 | | | 184 | 13 | if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || | 185 | 13 | (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') { | 186 | 0 | return false; | 187 | 0 | } | 188 | | | 189 | 13 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { | 190 | 2 | return false; | 191 | 2 | } | 192 | | | 193 | 11 | return true; | 194 | 13 | } |
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 179 | 54 | bool valid_signed_number(const std::string& value_str) { | 180 | 54 | char* endptr = nullptr; | 181 | 54 | errno = 0; | 182 | 54 | int64_t value = strtol(value_str.c_str(), &endptr, 10); | 183 | | | 184 | 54 | if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || | 185 | 54 | (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') { | 186 | 0 | return false; | 187 | 0 | } | 188 | | | 189 | 54 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { | 190 | 2 | return false; | 191 | 2 | } | 192 | | | 193 | 52 | return true; | 194 | 54 | } |
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 179 | 11 | bool valid_signed_number(const std::string& value_str) { | 180 | 11 | char* endptr = nullptr; | 181 | 11 | errno = 0; | 182 | 11 | int64_t value = strtol(value_str.c_str(), &endptr, 10); | 183 | | | 184 | 11 | if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) || | 185 | 11 | (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') { | 186 | 2 | return false; | 187 | 2 | } | 188 | | | 189 | 9 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { | 190 | 0 | return false; | 191 | 0 | } | 192 | | | 193 | 9 | return true; | 194 | 9 | } |
|
195 | | |
196 | | template <> |
197 | | bool valid_signed_number<int128_t>(const std::string& value_str); |
198 | | |
199 | | // 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出 |
200 | | template <typename T> |
201 | 0 | bool valid_unsigned_number(const std::string& value_str) { |
202 | 0 | if (value_str[0] == '-') { |
203 | 0 | return false; |
204 | 0 | } |
205 | | |
206 | 0 | char* endptr = nullptr; |
207 | 0 | errno = 0; |
208 | 0 | uint64_t value = strtoul(value_str.c_str(), &endptr, 10); |
209 | |
|
210 | 0 | if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) || |
211 | 0 | endptr == value_str || *endptr != '\0') { |
212 | 0 | return false; |
213 | 0 | } |
214 | | |
215 | 0 | if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) { |
216 | 0 | return false; |
217 | 0 | } |
218 | | |
219 | 0 | return true; |
220 | 0 | } Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE |
221 | | |
222 | | bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac); |
223 | | |
224 | | // Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss' |
225 | | // TODO: support 'yyyy-MM-dd HH:mm:ss.SSS' |
226 | | bool valid_datetime(const std::string& value_str, const uint32_t scale); |
227 | | |
228 | | bool valid_bool(const std::string& value_str); |
229 | | |
230 | | bool valid_ipv4(const std::string& value_str); |
231 | | |
232 | | bool valid_ipv6(const std::string& value_str); |
233 | | |
234 | 80 | constexpr bool is_string_type(const FieldType& field_type) { |
235 | 80 | return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR || |
236 | 80 | field_type == FieldType::OLAP_FIELD_TYPE_CHAR || |
237 | 80 | field_type == FieldType::OLAP_FIELD_TYPE_STRING; |
238 | 80 | } |
239 | | |
240 | 40 | constexpr bool is_numeric_type(const FieldType& field_type) { |
241 | 40 | return field_type == FieldType::OLAP_FIELD_TYPE_INT || |
242 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT || |
243 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_BIGINT || |
244 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT || |
245 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT || |
246 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT || |
247 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_TINYINT || |
248 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE || |
249 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_FLOAT || |
250 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DATE || |
251 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 || |
252 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DATETIME || |
253 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 || |
254 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT || |
255 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL || |
256 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 || |
257 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 || |
258 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I || |
259 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 || |
260 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_BOOL || |
261 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_IPV4 || |
262 | 40 | field_type == FieldType::OLAP_FIELD_TYPE_IPV6; |
263 | 40 | } |
264 | | |
265 | | // Util used to get string name of thrift enum item |
266 | | #define EnumToString(enum_type, index, out) \ |
267 | 2.19k | do { \ |
268 | 2.19k | auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \ |
269 | 2.19k | if (it == _##enum_type##_VALUES_TO_NAMES.end()) { \ |
270 | 0 | out = "NULL"; \ |
271 | 2.19k | } else { \ |
272 | 2.19k | out = it->second; \ |
273 | 2.19k | } \ |
274 | 2.19k | } while (0) |
275 | | |
276 | | struct RowLocation { |
277 | 2.36M | RowLocation() : segment_id(0), row_id(0) {} |
278 | 6.66M | RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {} |
279 | | RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid) |
280 | 8.10M | : rowset_id(rsid), segment_id(sid), row_id(rid) {} |
281 | | RowsetId rowset_id; |
282 | | uint32_t segment_id; |
283 | | uint32_t row_id; |
284 | | |
285 | 0 | bool operator==(const RowLocation& rhs) const { |
286 | 0 | return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id; |
287 | 0 | } |
288 | | |
289 | 0 | bool operator<(const RowLocation& rhs) const { |
290 | 0 | if (rowset_id != rhs.rowset_id) { |
291 | 0 | return rowset_id < rhs.rowset_id; |
292 | 0 | } else if (segment_id != rhs.segment_id) { |
293 | 0 | return segment_id < rhs.segment_id; |
294 | 0 | } else { |
295 | 0 | return row_id < rhs.row_id; |
296 | 0 | } |
297 | 0 | } |
298 | | }; |
299 | | using RowLocationSet = std::set<RowLocation>; |
300 | | using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>; |
301 | | |
302 | | struct GlobalRowLoacation { |
303 | | GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid) |
304 | 0 | : tablet_id(tid), row_location(rsid, sid, rid) {} |
305 | | int64_t tablet_id; |
306 | | RowLocation row_location; |
307 | | |
308 | 0 | bool operator==(const GlobalRowLoacation& rhs) const { |
309 | 0 | return tablet_id == rhs.tablet_id && row_location == rhs.row_location; |
310 | 0 | } |
311 | | |
312 | 0 | bool operator<(const GlobalRowLoacation& rhs) const { |
313 | 0 | if (tablet_id != rhs.tablet_id) { |
314 | 0 | return tablet_id < rhs.tablet_id; |
315 | 0 | } else { |
316 | 0 | return row_location < rhs.row_location; |
317 | 0 | } |
318 | 0 | } |
319 | | }; |
320 | | |
321 | | } // namespace doris |