Coverage Report

Created: 2026-06-23 14:23

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "storage/olap_common.h"
35
36
namespace doris {
37
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
38
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
39
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
40
static const std::string SKIP_BITMAP_COL = "__DORIS_SKIP_BITMAP_COL__";
41
static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
42
static const std::string BINLOG_LSN_COL = "__DORIS_BINLOG_LSN__";
43
static const std::string BINLOG_OP_COL = "__DORIS_BINLOG_OP__";
44
static const std::string BINLOG_TSO_COL = "__DORIS_BINLOG_TSO__";
45
46
// 用来加速运算
47
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
48
                                        100000, 1000000, 10000000, 100000000, 1000000000};
49
50
// 计时工具,用于确定一段代码执行的时间,用于性能调优
51
class OlapStopWatch {
52
public:
53
191k
    uint64_t get_elapse_time_us() const {
54
191k
        struct timeval now;
55
191k
        gettimeofday(&now, nullptr);
56
191k
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
57
191k
                          (now.tv_usec - _begin_time.tv_usec));
58
191k
    }
59
60
315
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
61
62
106k
    void reset() { gettimeofday(&_begin_time, nullptr); }
63
64
106k
    OlapStopWatch() { reset(); }
65
66
private:
67
    struct timeval _begin_time; // 起始时间戳
68
};
69
70
// @brief 切分字符串
71
// @param base 原串
72
// @param separator 分隔符
73
// @param result 切分结果
74
template <typename Str, typename T>
75
306k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
76
306k
    if (!result) {
77
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
78
0
    }
79
80
    // 处理base为空的情况
81
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
82
306k
    if (base.size() == 0) {
83
0
        result->push_back("");
84
0
        return Status::OK();
85
0
    }
86
87
306k
    size_t offset = 0;
88
910k
    while (offset < base.length()) {
89
910k
        size_t next = base.find(separator, offset);
90
910k
        if (next == std::string::npos) {
91
306k
            result->emplace_back(base.substr(offset));
92
306k
            break;
93
603k
        } else {
94
603k
            result->emplace_back(base.substr(offset, next - offset));
95
603k
            offset = next + 1;
96
603k
        }
97
910k
    }
98
99
306k
    return Status::OK();
100
306k
}
_ZN5doris12split_stringISt17basic_string_viewIcSt11char_traitsIcEEcEENS_6StatusERKT_T0_PSt6vectorINSt7__cxx1112basic_stringIcS3_SaIcEEESaISE_EE
Line
Count
Source
75
296k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
76
296k
    if (!result) {
77
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
78
0
    }
79
80
    // 处理base为空的情况
81
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
82
296k
    if (base.size() == 0) {
83
0
        result->push_back("");
84
0
        return Status::OK();
85
0
    }
86
87
296k
    size_t offset = 0;
88
891k
    while (offset < base.length()) {
89
891k
        size_t next = base.find(separator, offset);
90
891k
        if (next == std::string::npos) {
91
296k
            result->emplace_back(base.substr(offset));
92
296k
            break;
93
594k
        } else {
94
594k
            result->emplace_back(base.substr(offset, next - offset));
95
594k
            offset = next + 1;
96
594k
        }
97
891k
    }
98
99
296k
    return Status::OK();
100
296k
}
_ZN5doris12split_stringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEENS_6StatusERKT_T0_PSt6vectorIS6_SaIS6_EE
Line
Count
Source
75
9.53k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
76
9.53k
    if (!result) {
77
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
78
0
    }
79
80
    // 处理base为空的情况
81
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
82
9.53k
    if (base.size() == 0) {
83
0
        result->push_back("");
84
0
        return Status::OK();
85
0
    }
86
87
9.53k
    size_t offset = 0;
88
19.0k
    while (offset < base.length()) {
89
19.0k
        size_t next = base.find(separator, offset);
90
19.0k
        if (next == std::string::npos) {
91
9.53k
            result->emplace_back(base.substr(offset));
92
9.53k
            break;
93
9.53k
        } else {
94
9.53k
            result->emplace_back(base.substr(offset, next - offset));
95
9.53k
            offset = next + 1;
96
9.53k
        }
97
19.0k
    }
98
99
9.53k
    return Status::OK();
100
9.53k
}
101
102
uint32_t olap_adler32_init();
103
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
104
105
// 获取系统当前时间,并将时间转换为字符串
106
Status gen_timestamp_string(std::string* out_string);
107
108
Status check_datapath_rw(const std::string& path);
109
110
Status read_write_test_file(const std::string& test_file_path);
111
112
// 打印Errno
113
class Errno {
114
public:
115
    // 返回Errno对应的错误信息,线程安全
116
    static const char* str();
117
    static const char* str(int no);
118
    static int no();
119
120
private:
121
    static const int BUF_SIZE = 256;
122
    static __thread char _buf[BUF_SIZE];
123
};
124
125
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
126
template <typename T>
127
903
bool valid_signed_number(const std::string& value_str) {
128
903
    char* endptr = nullptr;
129
903
    errno = 0;
130
903
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
131
132
903
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
133
907
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
134
2
        return false;
135
2
    }
136
137
901
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
138
6
        return false;
139
6
    }
140
141
895
    return true;
142
901
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
127
25
bool valid_signed_number(const std::string& value_str) {
128
25
    char* endptr = nullptr;
129
25
    errno = 0;
130
25
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
131
132
25
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
133
25
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
134
0
        return false;
135
0
    }
136
137
25
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
138
2
        return false;
139
2
    }
140
141
23
    return true;
142
25
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
127
17
bool valid_signed_number(const std::string& value_str) {
128
17
    char* endptr = nullptr;
129
17
    errno = 0;
130
17
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
131
132
17
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
133
17
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
134
0
        return false;
135
0
    }
136
137
17
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
138
2
        return false;
139
2
    }
140
141
15
    return true;
142
17
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
127
796
bool valid_signed_number(const std::string& value_str) {
128
796
    char* endptr = nullptr;
129
796
    errno = 0;
130
796
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
131
132
796
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
133
802
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
134
0
        return false;
135
0
    }
136
137
797
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
138
2
        return false;
139
2
    }
140
141
794
    return true;
142
796
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
127
65
bool valid_signed_number(const std::string& value_str) {
128
65
    char* endptr = nullptr;
129
65
    errno = 0;
130
65
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
131
132
65
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
133
65
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
134
2
        return false;
135
2
    }
136
137
63
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
138
0
        return false;
139
0
    }
140
141
63
    return true;
142
63
}
143
144
template <>
145
bool valid_signed_number<int128_t>(const std::string& value_str);
146
147
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
148
template <typename T>
149
0
bool valid_unsigned_number(const std::string& value_str) {
150
0
    if (value_str[0] == '-') {
151
0
        return false;
152
0
    }
153
154
0
    char* endptr = nullptr;
155
0
    errno = 0;
156
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
157
158
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
159
0
        endptr == value_str || *endptr != '\0') {
160
0
        return false;
161
0
    }
162
163
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
164
0
        return false;
165
0
    }
166
167
0
    return true;
168
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
169
170
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
171
172
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
173
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
174
bool valid_datetime(const std::string& value_str, const uint32_t scale);
175
176
bool valid_bool(const std::string& value_str);
177
178
bool valid_ipv4(const std::string& value_str);
179
180
bool valid_ipv6(const std::string& value_str);
181
182
18.4M
constexpr bool is_string_type(const FieldType& field_type) {
183
18.4M
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
184
18.4M
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
185
18.4M
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
186
18.4M
}
187
188
24.7k
constexpr bool is_numeric_type(const FieldType& field_type) {
189
24.7k
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
190
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
191
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
192
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
193
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
194
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
195
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
196
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
197
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
198
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
199
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
200
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
201
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
202
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ ||
203
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
204
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
205
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
206
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
207
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
208
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
209
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
210
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
211
24.7k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
212
24.7k
}
213
214
// Util used to get string name of thrift enum item
215
#define EnumToString(enum_type, index, out)                   \
216
33.4M
    do {                                                      \
217
33.4M
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
218
33.4M
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
219
0
            out = "NULL";                                     \
220
33.4M
        } else {                                              \
221
33.4M
            out = it->second;                                 \
222
33.4M
        }                                                     \
223
33.4M
    } while (0)
224
225
struct RowLocation {
226
25.6M
    RowLocation() : segment_id(0), row_id(0) {}
227
9.74M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
228
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
229
11.1M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
230
    RowsetId rowset_id;
231
    uint32_t segment_id;
232
    uint32_t row_id;
233
234
0
    bool operator==(const RowLocation& rhs) const {
235
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
236
0
    }
237
238
856
    bool operator<(const RowLocation& rhs) const {
239
856
        if (rowset_id != rhs.rowset_id) {
240
42
            return rowset_id < rhs.rowset_id;
241
814
        } else if (segment_id != rhs.segment_id) {
242
0
            return segment_id < rhs.segment_id;
243
814
        } else {
244
814
            return row_id < rhs.row_id;
245
814
        }
246
856
    }
247
};
248
using RowLocationSet = std::set<RowLocation>;
249
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
250
251
struct GlobalRowLoacation {
252
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
253
0
            : tablet_id(tid), row_location(rsid, sid, rid) {}
254
    int64_t tablet_id;
255
    RowLocation row_location;
256
257
0
    bool operator==(const GlobalRowLoacation& rhs) const {
258
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
259
0
    }
260
261
0
    bool operator<(const GlobalRowLoacation& rhs) const {
262
0
        if (tablet_id != rhs.tablet_id) {
263
0
            return tablet_id < rhs.tablet_id;
264
0
        } else {
265
0
            return row_location < rhs.row_location;
266
0
        }
267
0
    }
268
};
269
270
struct GlobalRowLoacationV2 {
271
    GlobalRowLoacationV2(uint8_t ver, uint64_t bid, uint32_t fid, uint32_t rid)
272
25.0M
            : version(ver), backend_id(bid), file_id(fid), row_id(rid) {}
273
    uint8_t version;
274
    int64_t backend_id;
275
    uint32_t file_id;
276
    uint32_t row_id;
277
278
    auto operator<=>(const GlobalRowLoacationV2&) const = default;
279
};
280
281
} // namespace doris