Coverage Report

Created: 2026-01-08 09:56

/root/doris/be/src/olap/utils.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "olap/olap_common.h"
35
36
namespace doris {
37
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
38
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
39
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
40
41
// 用来加速运算
42
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
43
                                        100000, 1000000, 10000000, 100000000, 1000000000};
44
45
// 计时工具,用于确定一段代码执行的时间,用于性能调优
46
class OlapStopWatch {
47
public:
48
879k
    uint64_t get_elapse_time_us() const {
49
879k
        struct timeval now;
50
879k
        gettimeofday(&now, nullptr);
51
879k
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
52
879k
                          (now.tv_usec - _begin_time.tv_usec));
53
879k
    }
54
55
412
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
56
57
343k
    void reset() { gettimeofday(&_begin_time, nullptr); }
58
59
343k
    OlapStopWatch() { reset(); }
60
61
private:
62
    struct timeval _begin_time; // 起始时间戳
63
};
64
65
// @brief 切分字符串
66
// @param base 原串
67
// @param separator 分隔符
68
// @param result 切分结果
69
template <typename Str, typename T>
70
347k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
71
347k
    if (!result) {
72
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
73
0
    }
74
75
    // 处理base为空的情况
76
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
77
347k
    if (base.size() == 0) {
78
0
        result->push_back("");
79
0
        return Status::OK();
80
0
    }
81
82
347k
    size_t offset = 0;
83
1.03M
    while (offset < base.length()) {
84
1.03M
        size_t next = base.find(separator, offset);
85
1.03M
        if (next == std::string::npos) {
86
347k
            result->emplace_back(base.substr(offset));
87
347k
            break;
88
685k
        } else {
89
685k
            result->emplace_back(base.substr(offset, next - offset));
90
685k
            offset = next + 1;
91
685k
        }
92
1.03M
    }
93
94
347k
    return Status::OK();
95
347k
}
_ZN5doris12split_stringISt17basic_string_viewIcSt11char_traitsIcEEcEENS_6StatusERKT_T0_PSt6vectorINSt7__cxx1112basic_stringIcS3_SaIcEEESaISE_EE
Line
Count
Source
70
338k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
71
338k
    if (!result) {
72
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
73
0
    }
74
75
    // 处理base为空的情况
76
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
77
338k
    if (base.size() == 0) {
78
0
        result->push_back("");
79
0
        return Status::OK();
80
0
    }
81
82
338k
    size_t offset = 0;
83
1.01M
    while (offset < base.length()) {
84
1.01M
        size_t next = base.find(separator, offset);
85
1.01M
        if (next == std::string::npos) {
86
338k
            result->emplace_back(base.substr(offset));
87
338k
            break;
88
677k
        } else {
89
677k
            result->emplace_back(base.substr(offset, next - offset));
90
677k
            offset = next + 1;
91
677k
        }
92
1.01M
    }
93
94
338k
    return Status::OK();
95
338k
}
_ZN5doris12split_stringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEENS_6StatusERKT_T0_PSt6vectorIS6_SaIS6_EE
Line
Count
Source
70
8.48k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
71
8.48k
    if (!result) {
72
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
73
0
    }
74
75
    // 处理base为空的情况
76
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
77
8.48k
    if (base.size() == 0) {
78
0
        result->push_back("");
79
0
        return Status::OK();
80
0
    }
81
82
8.48k
    size_t offset = 0;
83
16.9k
    while (offset < base.length()) {
84
16.9k
        size_t next = base.find(separator, offset);
85
16.9k
        if (next == std::string::npos) {
86
8.48k
            result->emplace_back(base.substr(offset));
87
8.48k
            break;
88
8.48k
        } else {
89
8.48k
            result->emplace_back(base.substr(offset, next - offset));
90
8.48k
            offset = next + 1;
91
8.48k
        }
92
16.9k
    }
93
94
8.48k
    return Status::OK();
95
8.48k
}
96
97
uint32_t olap_adler32_init();
98
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
99
100
// 获取系统当前时间,并将时间转换为字符串
101
Status gen_timestamp_string(std::string* out_string);
102
103
Status check_datapath_rw(const std::string& path);
104
105
Status read_write_test_file(const std::string& test_file_path);
106
107
// 打印Errno
108
class Errno {
109
public:
110
    // 返回Errno对应的错误信息,线程安全
111
    static const char* str();
112
    static const char* str(int no);
113
    static int no();
114
115
private:
116
    static const int BUF_SIZE = 256;
117
    static __thread char _buf[BUF_SIZE];
118
};
119
120
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
121
template <typename T>
122
1.74k
bool valid_signed_number(const std::string& value_str) {
123
1.74k
    char* endptr = nullptr;
124
1.74k
    errno = 0;
125
1.74k
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
1.74k
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
1.74k
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
2
        return false;
130
2
    }
131
132
1.74k
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
6
        return false;
134
6
    }
135
136
1.74k
    return true;
137
1.74k
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
39
bool valid_signed_number(const std::string& value_str) {
123
39
    char* endptr = nullptr;
124
39
    errno = 0;
125
39
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
39
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
39
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
39
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
37
    return true;
137
39
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
21
bool valid_signed_number(const std::string& value_str) {
123
21
    char* endptr = nullptr;
124
21
    errno = 0;
125
21
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
21
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
21
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
21
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
19
    return true;
137
21
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
1.58k
bool valid_signed_number(const std::string& value_str) {
123
1.58k
    char* endptr = nullptr;
124
1.58k
    errno = 0;
125
1.58k
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
1.58k
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
1.58k
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
0
        return false;
130
0
    }
131
132
1.58k
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
2
        return false;
134
2
    }
135
136
1.58k
    return true;
137
1.58k
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
122
107
bool valid_signed_number(const std::string& value_str) {
123
107
    char* endptr = nullptr;
124
107
    errno = 0;
125
107
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
126
127
107
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
128
107
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
129
2
        return false;
130
2
    }
131
132
105
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
133
0
        return false;
134
0
    }
135
136
105
    return true;
137
105
}
138
139
template <>
140
bool valid_signed_number<int128_t>(const std::string& value_str);
141
142
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
143
template <typename T>
144
0
bool valid_unsigned_number(const std::string& value_str) {
145
0
    if (value_str[0] == '-') {
146
0
        return false;
147
0
    }
148
149
0
    char* endptr = nullptr;
150
0
    errno = 0;
151
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
152
153
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
154
0
        endptr == value_str || *endptr != '\0') {
155
0
        return false;
156
0
    }
157
158
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
159
0
        return false;
160
0
    }
161
162
0
    return true;
163
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
164
165
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
166
167
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
168
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
169
bool valid_datetime(const std::string& value_str, const uint32_t scale);
170
171
bool valid_bool(const std::string& value_str);
172
173
bool valid_ipv4(const std::string& value_str);
174
175
bool valid_ipv6(const std::string& value_str);
176
177
108k
constexpr bool is_string_type(const FieldType& field_type) {
178
108k
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
179
108k
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
180
108k
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
181
108k
}
182
183
39.7k
constexpr bool is_numeric_type(const FieldType& field_type) {
184
39.7k
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
185
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
186
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
187
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
188
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
189
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
190
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
191
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
192
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
193
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
194
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
195
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
196
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
197
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
198
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
199
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
200
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
201
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
202
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
203
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
204
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
205
39.7k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
206
39.7k
}
207
208
// Util used to get string name of thrift enum item
209
#define EnumToString(enum_type, index, out)                   \
210
8.03M
    do {                                                      \
211
8.03M
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
212
8.03M
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
213
0
            out = "NULL";                                     \
214
8.03M
        } else {                                              \
215
8.03M
            out = it->second;                                 \
216
8.03M
        }                                                     \
217
8.03M
    } while (0)
218
219
struct RowLocation {
220
23.6M
    RowLocation() : segment_id(0), row_id(0) {}
221
7.61M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
222
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
223
27.7M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
224
    RowsetId rowset_id;
225
    uint32_t segment_id;
226
    uint32_t row_id;
227
228
0
    bool operator==(const RowLocation& rhs) const {
229
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
230
0
    }
231
232
121k
    bool operator<(const RowLocation& rhs) const {
233
121k
        if (rowset_id != rhs.rowset_id) {
234
7.99k
            return rowset_id < rhs.rowset_id;
235
113k
        } else if (segment_id != rhs.segment_id) {
236
0
            return segment_id < rhs.segment_id;
237
113k
        } else {
238
113k
            return row_id < rhs.row_id;
239
113k
        }
240
121k
    }
241
};
242
using RowLocationSet = std::set<RowLocation>;
243
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
244
245
struct GlobalRowLoacation {
246
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
247
18.8M
            : tablet_id(tid), row_location(rsid, sid, rid) {}
248
    int64_t tablet_id;
249
    RowLocation row_location;
250
251
0
    bool operator==(const GlobalRowLoacation& rhs) const {
252
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
253
0
    }
254
255
185k
    bool operator<(const GlobalRowLoacation& rhs) const {
256
185k
        if (tablet_id != rhs.tablet_id) {
257
64.2k
            return tablet_id < rhs.tablet_id;
258
121k
        } else {
259
121k
            return row_location < rhs.row_location;
260
121k
        }
261
185k
    }
262
};
263
264
} // namespace doris