Coverage Report

Created: 2026-06-11 15:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "storage/olap_common.h"
35
36
namespace doris {
37
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
38
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
39
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
40
static const std::string SKIP_BITMAP_COL = "__DORIS_SKIP_BITMAP_COL__";
41
static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
42
static const std::string BINLOG_TIMESTAMP_COL = "__DORIS_BINLOG_TIMESTAMP__";
43
static const std::string BINLOG_LSN_COL = "__DORIS_BINLOG_LSN__";
44
45
// 用来加速运算
46
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
47
                                        100000, 1000000, 10000000, 100000000, 1000000000};
48
49
// 计时工具,用于确定一段代码执行的时间,用于性能调优
50
class OlapStopWatch {
51
public:
52
404k
    uint64_t get_elapse_time_us() const {
53
404k
        struct timeval now;
54
404k
        gettimeofday(&now, nullptr);
55
404k
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
56
404k
                          (now.tv_usec - _begin_time.tv_usec));
57
404k
    }
58
59
7.85k
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
60
61
188k
    void reset() { gettimeofday(&_begin_time, nullptr); }
62
63
188k
    OlapStopWatch() { reset(); }
64
65
private:
66
    struct timeval _begin_time; // 起始时间戳
67
};
68
69
// @brief 切分字符串
70
// @param base 原串
71
// @param separator 分隔符
72
// @param result 切分结果
73
template <typename Str, typename T>
74
426k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
426k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
426k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
426k
    size_t offset = 0;
87
1.20M
    while (offset < base.length()) {
88
1.20M
        size_t next = base.find(separator, offset);
89
1.20M
        if (next == std::string::npos) {
90
426k
            result->emplace_back(base.substr(offset));
91
426k
            break;
92
781k
        } else {
93
781k
            result->emplace_back(base.substr(offset, next - offset));
94
781k
            offset = next + 1;
95
781k
        }
96
1.20M
    }
97
98
426k
    return Status::OK();
99
426k
}
_ZN5doris12split_stringISt17basic_string_viewIcSt11char_traitsIcEEcEENS_6StatusERKT_T0_PSt6vectorINSt7__cxx1112basic_stringIcS3_SaIcEEESaISE_EE
Line
Count
Source
74
354k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
354k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
354k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
354k
    size_t offset = 0;
87
1.06M
    while (offset < base.length()) {
88
1.06M
        size_t next = base.find(separator, offset);
89
1.06M
        if (next == std::string::npos) {
90
354k
            result->emplace_back(base.substr(offset));
91
354k
            break;
92
708k
        } else {
93
708k
            result->emplace_back(base.substr(offset, next - offset));
94
708k
            offset = next + 1;
95
708k
        }
96
1.06M
    }
97
98
354k
    return Status::OK();
99
354k
}
_ZN5doris12split_stringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEENS_6StatusERKT_T0_PSt6vectorIS6_SaIS6_EE
Line
Count
Source
74
72.1k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
72.1k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
72.1k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
72.1k
    size_t offset = 0;
87
144k
    while (offset < base.length()) {
88
144k
        size_t next = base.find(separator, offset);
89
144k
        if (next == std::string::npos) {
90
72.1k
            result->emplace_back(base.substr(offset));
91
72.1k
            break;
92
72.1k
        } else {
93
72.1k
            result->emplace_back(base.substr(offset, next - offset));
94
72.1k
            offset = next + 1;
95
72.1k
        }
96
144k
    }
97
98
72.1k
    return Status::OK();
99
72.1k
}
100
101
uint32_t olap_adler32_init();
102
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
103
104
// 获取系统当前时间,并将时间转换为字符串
105
Status gen_timestamp_string(std::string* out_string);
106
107
Status check_datapath_rw(const std::string& path);
108
109
Status read_write_test_file(const std::string& test_file_path);
110
111
// 打印Errno
112
class Errno {
113
public:
114
    // 返回Errno对应的错误信息,线程安全
115
    static const char* str();
116
    static const char* str(int no);
117
    static int no();
118
119
private:
120
    static const int BUF_SIZE = 256;
121
    static __thread char _buf[BUF_SIZE];
122
};
123
124
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
125
template <typename T>
126
931
bool valid_signed_number(const std::string& value_str) {
127
931
    char* endptr = nullptr;
128
931
    errno = 0;
129
931
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
931
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
931
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
2
        return false;
134
2
    }
135
136
929
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
6
        return false;
138
6
    }
139
140
923
    return true;
141
929
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
25
bool valid_signed_number(const std::string& value_str) {
127
25
    char* endptr = nullptr;
128
25
    errno = 0;
129
25
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
25
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
25
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
25
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
23
    return true;
141
25
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
17
bool valid_signed_number(const std::string& value_str) {
127
17
    char* endptr = nullptr;
128
17
    errno = 0;
129
17
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
17
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
17
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
17
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
15
    return true;
141
17
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
825
bool valid_signed_number(const std::string& value_str) {
127
825
    char* endptr = nullptr;
128
825
    errno = 0;
129
825
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
825
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
825
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
825
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
823
    return true;
141
825
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
64
bool valid_signed_number(const std::string& value_str) {
127
64
    char* endptr = nullptr;
128
64
    errno = 0;
129
64
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
64
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
64
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
2
        return false;
134
2
    }
135
136
62
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
0
        return false;
138
0
    }
139
140
62
    return true;
141
62
}
142
143
template <>
144
bool valid_signed_number<int128_t>(const std::string& value_str);
145
146
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
147
template <typename T>
148
0
bool valid_unsigned_number(const std::string& value_str) {
149
0
    if (value_str[0] == '-') {
150
0
        return false;
151
0
    }
152
153
0
    char* endptr = nullptr;
154
0
    errno = 0;
155
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
156
157
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
158
0
        endptr == value_str || *endptr != '\0') {
159
0
        return false;
160
0
    }
161
162
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
163
0
        return false;
164
0
    }
165
166
0
    return true;
167
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
168
169
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
170
171
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
172
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
173
bool valid_datetime(const std::string& value_str, const uint32_t scale);
174
175
bool valid_bool(const std::string& value_str);
176
177
bool valid_ipv4(const std::string& value_str);
178
179
bool valid_ipv6(const std::string& value_str);
180
181
26.4M
constexpr bool is_string_type(const FieldType& field_type) {
182
26.4M
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
183
26.4M
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
184
26.4M
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
185
26.4M
}
186
187
31.1k
constexpr bool is_numeric_type(const FieldType& field_type) {
188
31.1k
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
189
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
190
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
191
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
192
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
193
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
194
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
195
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
196
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
197
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
198
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
199
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
200
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
201
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ ||
202
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
203
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
204
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
205
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
206
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
207
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
208
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
209
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
210
31.1k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
211
31.1k
}
212
213
// Util used to get string name of thrift enum item
214
#define EnumToString(enum_type, index, out)                   \
215
44.7M
    do {                                                      \
216
44.7M
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
217
44.7M
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
218
0
            out = "NULL";                                     \
219
44.7M
        } else {                                              \
220
44.7M
            out = it->second;                                 \
221
44.7M
        }                                                     \
222
44.7M
    } while (0)
223
224
struct RowLocation {
225
35.6M
    RowLocation() : segment_id(0), row_id(0) {}
226
10.1M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
227
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
228
11.5M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
229
    RowsetId rowset_id;
230
    uint32_t segment_id;
231
    uint32_t row_id;
232
233
0
    bool operator==(const RowLocation& rhs) const {
234
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
235
0
    }
236
237
6.19k
    bool operator<(const RowLocation& rhs) const {
238
6.19k
        if (rowset_id != rhs.rowset_id) {
239
1.11k
            return rowset_id < rhs.rowset_id;
240
5.08k
        } else if (segment_id != rhs.segment_id) {
241
0
            return segment_id < rhs.segment_id;
242
5.08k
        } else {
243
5.08k
            return row_id < rhs.row_id;
244
5.08k
        }
245
6.19k
    }
246
};
247
using RowLocationSet = std::set<RowLocation>;
248
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
249
250
struct GlobalRowLoacation {
251
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
252
0
            : tablet_id(tid), row_location(rsid, sid, rid) {}
253
    int64_t tablet_id;
254
    RowLocation row_location;
255
256
0
    bool operator==(const GlobalRowLoacation& rhs) const {
257
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
258
0
    }
259
260
0
    bool operator<(const GlobalRowLoacation& rhs) const {
261
0
        if (tablet_id != rhs.tablet_id) {
262
0
            return tablet_id < rhs.tablet_id;
263
0
        } else {
264
0
            return row_location < rhs.row_location;
265
0
        }
266
0
    }
267
};
268
269
struct GlobalRowLoacationV2 {
270
    GlobalRowLoacationV2(uint8_t ver, uint64_t bid, uint32_t fid, uint32_t rid)
271
42.1M
            : version(ver), backend_id(bid), file_id(fid), row_id(rid) {}
272
    uint8_t version;
273
    int64_t backend_id;
274
    uint32_t file_id;
275
    uint32_t row_id;
276
277
    auto operator<=>(const GlobalRowLoacationV2&) const = default;
278
};
279
280
} // namespace doris