Coverage Report

Created: 2025-04-29 12:00

/root/doris/be/src/olap/utils.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "olap/olap_common.h"
35
36
namespace doris {
37
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
38
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
39
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
40
41
// 用来加速运算
42
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
43
                                        100000, 1000000, 10000000, 100000000, 1000000000};
44
45
// 计时工具,用于确定一段代码执行的时间,用于性能调优
46
class OlapStopWatch {
47
public:
48
120
    uint64_t get_elapse_time_us() const {
49
120
        struct timeval now;
50
120
        gettimeofday(&now, nullptr);
51
120
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
52
120
                          (now.tv_usec - _begin_time.tv_usec));
53
120
    }
54
55
38
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
56
57
106
    void reset() { gettimeofday(&_begin_time, nullptr); }
58
59
106
    OlapStopWatch() { reset(); }
60
61
private:
62
    struct timeval _begin_time; // 起始时间戳
63
};
64
65
// @brief 切分字符串
66
// @param base 原串
67
// @param separator 分隔符
68
// @param result 切分结果
69
template <typename Str, typename T>
70
0
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
71
0
    if (!result) {
72
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
73
0
    }
74
75
    // 处理base为空的情况
76
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
77
0
    if (base.size() == 0) {
78
0
        result->push_back("");
79
0
        return Status::OK();
80
0
    }
81
82
0
    size_t offset = 0;
83
0
    while (offset < base.length()) {
84
0
        size_t next = base.find(separator, offset);
85
0
        if (next == std::string::npos) {
86
0
            result->emplace_back(base.substr(offset));
87
0
            break;
88
0
        } else {
89
0
            result->emplace_back(base.substr(offset, next - offset));
90
0
            offset = next + 1;
91
0
        }
92
0
    }
93
94
0
    return Status::OK();
95
0
}
Unexecuted instantiation: _ZN5doris12split_stringISt17basic_string_viewIcSt11char_traitsIcEEcEENS_6StatusERKT_T0_PSt6vectorINSt7__cxx1112basic_stringIcS3_SaIcEEESaISE_EE
Unexecuted instantiation: _ZN5doris12split_stringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEENS_6StatusERKT_T0_PSt6vectorIS6_SaIS6_EE
96
97
uint32_t olap_adler32_init();
98
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
99
100
// 获取系统当前时间,并将时间转换为字符串
101
Status gen_timestamp_string(std::string* out_string);
102
103
Status check_datapath_rw(const std::string& path);
104
105
Status read_write_test_file(const std::string& test_file_path);
106
107
// 打印Errno
108
class Errno {
109
public:
110
    // 返回Errno对应的错误信息,线程安全
111
    static const char* str();
112
    static const char* str(int no);
113
    static int no();
114
115
private:
116
    static const int BUF_SIZE = 256;
117
    static __thread char _buf[BUF_SIZE];
118
};
119
120
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
121
template <typename T>
122
89
bool valid_signed_number(const std::string& value_str) {
123
89
    char* endptr = nullptr;
124
89
    errno = 0;
125
89
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
89
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
89
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
2
        return false;
130
2
    }
131
132
87
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
6
        return false;
134
6
    }
135
136
81
    return true;
137
87
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
11
bool valid_signed_number(const std::string& value_str) {
123
11
    char* endptr = nullptr;
124
11
    errno = 0;
125
11
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
11
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
11
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
11
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
9
    return true;
137
11
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
13
bool valid_signed_number(const std::string& value_str) {
123
13
    char* endptr = nullptr;
124
13
    errno = 0;
125
13
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
13
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
13
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
13
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
11
    return true;
137
13
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
54
bool valid_signed_number(const std::string& value_str) {
123
54
    char* endptr = nullptr;
124
54
    errno = 0;
125
54
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
54
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
54
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
54
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
52
    return true;
137
54
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
11
bool valid_signed_number(const std::string& value_str) {
123
11
    char* endptr = nullptr;
124
11
    errno = 0;
125
11
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
11
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
11
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
2
        return false;
130
2
    }
131
132
9
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
0
        return false;
134
0
    }
135
136
9
    return true;
137
9
}
138
139
template <>
140
bool valid_signed_number<int128_t>(const std::string& value_str);
141
142
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
143
template <typename T>
144
0
bool valid_unsigned_number(const std::string& value_str) {
145
0
    if (value_str[0] == '-') {
146
0
        return false;
147
0
    }
148
149
0
    char* endptr = nullptr;
150
0
    errno = 0;
151
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
152
153
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
154
0
        endptr == value_str || *endptr != '\0') {
155
0
        return false;
156
0
    }
157
158
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
159
0
        return false;
160
0
    }
161
162
0
    return true;
163
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
164
165
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
166
167
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
168
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
169
bool valid_datetime(const std::string& value_str, const uint32_t scale);
170
171
bool valid_bool(const std::string& value_str);
172
173
bool valid_ipv4(const std::string& value_str);
174
175
bool valid_ipv6(const std::string& value_str);
176
177
2.96k
constexpr bool is_string_type(const FieldType& field_type) {
178
2.96k
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
179
2.96k
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
180
2.96k
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
181
2.96k
}
182
183
342
constexpr bool is_numeric_type(const FieldType& field_type) {
184
342
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
185
342
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
186
342
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
187
342
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
188
342
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
189
342
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
190
342
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
191
342
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
192
342
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
193
342
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
194
342
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
195
342
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
196
342
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
197
342
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
198
342
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
199
342
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
200
342
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
201
342
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
202
342
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
203
342
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
204
342
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
205
342
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
206
342
}
207
208
// Util used to get string name of thrift enum item
209
#define EnumToString(enum_type, index, out)                   \
210
3.34k
    do {                                                      \
211
3.34k
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
212
3.34k
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
213
0
            out = "NULL";                                     \
214
3.34k
        } else {                                              \
215
3.34k
            out = it->second;                                 \
216
3.34k
        }                                                     \
217
3.34k
    } while (0)
218
219
struct RowLocation {
220
5.18M
    RowLocation() : segment_id(0), row_id(0) {}
221
6.68M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
222
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
223
8.12M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
224
    RowsetId rowset_id;
225
    uint32_t segment_id;
226
    uint32_t row_id;
227
228
0
    bool operator==(const RowLocation& rhs) const {
229
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
230
0
    }
231
232
0
    bool operator<(const RowLocation& rhs) const {
233
0
        if (rowset_id != rhs.rowset_id) {
234
0
            return rowset_id < rhs.rowset_id;
235
0
        } else if (segment_id != rhs.segment_id) {
236
0
            return segment_id < rhs.segment_id;
237
0
        } else {
238
0
            return row_id < rhs.row_id;
239
0
        }
240
0
    }
241
};
242
using RowLocationSet = std::set<RowLocation>;
243
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
244
245
struct GlobalRowLoacation {
246
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
247
0
            : tablet_id(tid), row_location(rsid, sid, rid) {}
248
    int64_t tablet_id;
249
    RowLocation row_location;
250
251
0
    bool operator==(const GlobalRowLoacation& rhs) const {
252
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
253
0
    }
254
255
0
    bool operator<(const GlobalRowLoacation& rhs) const {
256
0
        if (tablet_id != rhs.tablet_id) {
257
0
            return tablet_id < rhs.tablet_id;
258
0
        } else {
259
0
            return row_location < rhs.row_location;
260
0
        }
261
0
    }
262
};
263
264
} // namespace doris