Coverage Report

Created: 2024-11-21 14:46

/root/doris/be/src/olap/utils.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "olap/olap_common.h"
35
36
namespace doris {
37
void write_log_info(char* buf, size_t buf_len, const char* fmt, ...);
38
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
39
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
40
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
41
42
// 用来加速运算
43
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
44
                                        100000, 1000000, 10000000, 100000000, 1000000000};
45
46
// 计时工具,用于确定一段代码执行的时间,用于性能调优
47
class OlapStopWatch {
48
public:
49
73
    uint64_t get_elapse_time_us() const {
50
73
        struct timeval now;
51
73
        gettimeofday(&now, nullptr);
52
73
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
53
73
                          (now.tv_usec - _begin_time.tv_usec));
54
73
    }
55
56
0
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
57
58
67
    void reset() { gettimeofday(&_begin_time, nullptr); }
59
60
67
    OlapStopWatch() { reset(); }
61
62
private:
63
    struct timeval _begin_time; // 起始时间戳
64
};
65
66
// @brief 切分字符串
67
// @param base 原串
68
// @param separator 分隔符
69
// @param result 切分结果
70
template <typename T>
71
0
Status split_string(const std::string& base, const T separator, std::vector<std::string>* result) {
72
0
    if (!result) {
73
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
74
0
    }
75
76
    // 处理base为空的情况
77
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
78
0
    if (base.size() == 0) {
79
0
        result->push_back("");
80
0
        return Status::OK();
81
0
    }
82
83
0
    size_t offset = 0;
84
0
    while (offset < base.length()) {
85
0
        size_t next = base.find(separator, offset);
86
0
        if (next == std::string::npos) {
87
0
            result->push_back(base.substr(offset));
88
0
            break;
89
0
        } else {
90
0
            result->push_back(base.substr(offset, next - offset));
91
0
            offset = next + 1;
92
0
        }
93
0
    }
94
95
0
    return Status::OK();
96
0
}
97
98
uint32_t olap_adler32_init();
99
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
100
101
// 获取系统当前时间,并将时间转换为字符串
102
Status gen_timestamp_string(std::string* out_string);
103
104
// iterator offset,用于二分查找
105
using iterator_offset_t = size_t;
106
107
class BinarySearchIterator : public std::iterator_traits<iterator_offset_t*> {
108
public:
109
0
    BinarySearchIterator() : _offset(0u) {}
110
0
    explicit BinarySearchIterator(iterator_offset_t offset) : _offset(offset) {}
111
112
0
    iterator_offset_t operator*() const { return _offset; }
113
114
0
    BinarySearchIterator& operator++() {
115
0
        ++_offset;
116
0
        return *this;
117
0
    }
118
119
0
    BinarySearchIterator& operator--() {
120
0
        --_offset;
121
0
        return *this;
122
0
    }
123
124
0
    BinarySearchIterator& operator-=(size_t step) {
125
0
        _offset = _offset - step;
126
0
        return *this;
127
0
    }
128
129
0
    BinarySearchIterator& operator+=(size_t step) {
130
0
        _offset = _offset + step;
131
0
        return *this;
132
0
    }
133
134
0
    bool operator!=(const BinarySearchIterator& iterator) {
135
0
        return this->_offset != iterator._offset;
136
0
    }
137
138
private:
139
    iterator_offset_t _offset;
140
};
141
142
int operator-(const BinarySearchIterator& left, const BinarySearchIterator& right);
143
144
// 不用sse4指令的crc32c的计算函数
145
unsigned int crc32c_lut(char const* b, unsigned int off, unsigned int len, unsigned int crc);
146
147
Status check_datapath_rw(const std::string& path);
148
149
Status read_write_test_file(const std::string& test_file_path);
150
151
//转换两个list
152
template <typename T1, typename T2>
153
void static_cast_assign_vector(std::vector<T1>* v1, const std::vector<T2>& v2) {
154
    if (nullptr != v1) {
155
        //GCC3.4的模板展开貌似有问题, 这里如果使用迭代器会编译失败
156
        for (size_t i = 0; i < v2.size(); i++) {
157
            v1->push_back(static_cast<T1>(v2[i]));
158
        }
159
    }
160
}
161
162
// 打印Errno
163
class Errno {
164
public:
165
    // 返回Errno对应的错误信息,线程安全
166
    static const char* str();
167
    static const char* str(int no);
168
    static int no();
169
170
private:
171
    static const int BUF_SIZE = 256;
172
    static __thread char _buf[BUF_SIZE];
173
};
174
175
#define ENDSWITH(str, suffix) ((str).rfind(suffix) == (str).size() - strlen(suffix))
176
177
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
178
template <typename T>
179
89
bool valid_signed_number(const std::string& value_str) {
180
89
    char* endptr = nullptr;
181
89
    errno = 0;
182
89
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
183
184
89
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
185
89
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
186
2
        return false;
187
2
    }
188
189
87
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
190
6
        return false;
191
6
    }
192
193
81
    return true;
194
87
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
179
11
bool valid_signed_number(const std::string& value_str) {
180
11
    char* endptr = nullptr;
181
11
    errno = 0;
182
11
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
183
184
11
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
185
11
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
186
0
        return false;
187
0
    }
188
189
11
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
190
2
        return false;
191
2
    }
192
193
9
    return true;
194
11
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
179
13
bool valid_signed_number(const std::string& value_str) {
180
13
    char* endptr = nullptr;
181
13
    errno = 0;
182
13
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
183
184
13
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
185
13
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
186
0
        return false;
187
0
    }
188
189
13
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
190
2
        return false;
191
2
    }
192
193
11
    return true;
194
13
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
179
54
bool valid_signed_number(const std::string& value_str) {
180
54
    char* endptr = nullptr;
181
54
    errno = 0;
182
54
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
183
184
54
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
185
54
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
186
0
        return false;
187
0
    }
188
189
54
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
190
2
        return false;
191
2
    }
192
193
52
    return true;
194
54
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
179
11
bool valid_signed_number(const std::string& value_str) {
180
11
    char* endptr = nullptr;
181
11
    errno = 0;
182
11
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
183
184
11
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
185
11
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
186
2
        return false;
187
2
    }
188
189
9
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
190
0
        return false;
191
0
    }
192
193
9
    return true;
194
9
}
195
196
template <>
197
bool valid_signed_number<int128_t>(const std::string& value_str);
198
199
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
200
template <typename T>
201
0
bool valid_unsigned_number(const std::string& value_str) {
202
0
    if (value_str[0] == '-') {
203
0
        return false;
204
0
    }
205
206
0
    char* endptr = nullptr;
207
0
    errno = 0;
208
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
209
210
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
211
0
        endptr == value_str || *endptr != '\0') {
212
0
        return false;
213
0
    }
214
215
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
216
0
        return false;
217
0
    }
218
219
0
    return true;
220
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
221
222
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
223
224
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
225
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
226
bool valid_datetime(const std::string& value_str, const uint32_t scale);
227
228
bool valid_bool(const std::string& value_str);
229
230
bool valid_ipv4(const std::string& value_str);
231
232
bool valid_ipv6(const std::string& value_str);
233
234
80
constexpr bool is_string_type(const FieldType& field_type) {
235
80
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
236
80
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
237
80
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
238
80
}
239
240
40
constexpr bool is_numeric_type(const FieldType& field_type) {
241
40
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
242
40
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
243
40
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
244
40
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
245
40
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
246
40
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
247
40
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
248
40
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
249
40
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
250
40
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
251
40
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
252
40
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
253
40
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
254
40
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
255
40
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
256
40
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
257
40
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
258
40
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
259
40
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
260
40
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
261
40
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
262
40
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
263
40
}
264
265
// Util used to get string name of thrift enum item
266
#define EnumToString(enum_type, index, out)                   \
267
2.19k
    do {                                                      \
268
2.19k
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
269
2.19k
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
270
0
            out = "NULL";                                     \
271
2.19k
        } else {                                              \
272
2.19k
            out = it->second;                                 \
273
2.19k
        }                                                     \
274
2.19k
    } while (0)
275
276
struct RowLocation {
277
2.36M
    RowLocation() : segment_id(0), row_id(0) {}
278
6.66M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
279
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
280
8.10M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
281
    RowsetId rowset_id;
282
    uint32_t segment_id;
283
    uint32_t row_id;
284
285
0
    bool operator==(const RowLocation& rhs) const {
286
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
287
0
    }
288
289
0
    bool operator<(const RowLocation& rhs) const {
290
0
        if (rowset_id != rhs.rowset_id) {
291
0
            return rowset_id < rhs.rowset_id;
292
0
        } else if (segment_id != rhs.segment_id) {
293
0
            return segment_id < rhs.segment_id;
294
0
        } else {
295
0
            return row_id < rhs.row_id;
296
0
        }
297
0
    }
298
};
299
using RowLocationSet = std::set<RowLocation>;
300
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
301
302
struct GlobalRowLoacation {
303
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
304
0
            : tablet_id(tid), row_location(rsid, sid, rid) {}
305
    int64_t tablet_id;
306
    RowLocation row_location;
307
308
0
    bool operator==(const GlobalRowLoacation& rhs) const {
309
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
310
0
    }
311
312
0
    bool operator<(const GlobalRowLoacation& rhs) const {
313
0
        if (tablet_id != rhs.tablet_id) {
314
0
            return tablet_id < rhs.tablet_id;
315
0
        } else {
316
0
            return row_location < rhs.row_location;
317
0
        }
318
0
    }
319
};
320
321
} // namespace doris