Coverage Report

Created: 2026-06-18 11:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <limits.h>
23
#include <stdint.h>
24
#include <sys/time.h>
25
26
#include <cstdio>
27
#include <cstdlib>
28
#include <iterator>
29
#include <limits>
30
#include <string>
31
#include <vector>
32
33
#include "common/status.h"
34
#include "storage/olap_common.h"
35
36
namespace doris {
37
static const std::string DELETE_SIGN = "__DORIS_DELETE_SIGN__";
38
static const std::string WHERE_SIGN = "__DORIS_WHERE_SIGN__";
39
static const std::string VERSION_COL = "__DORIS_VERSION_COL__";
40
static const std::string SKIP_BITMAP_COL = "__DORIS_SKIP_BITMAP_COL__";
41
static const std::string SEQUENCE_COL = "__DORIS_SEQUENCE_COL__";
42
static const std::string BINLOG_TIMESTAMP_COL = "__DORIS_BINLOG_TSO__";
43
static const std::string BINLOG_LSN_COL = "__DORIS_BINLOG_LSN__";
44
45
// 用来加速运算
46
const static int32_t g_power_table[] = {1,      10,      100,      1000,      10000,
47
                                        100000, 1000000, 10000000, 100000000, 1000000000};
48
49
// 计时工具,用于确定一段代码执行的时间,用于性能调优
50
class OlapStopWatch {
51
public:
52
589k
    uint64_t get_elapse_time_us() const {
53
589k
        struct timeval now;
54
589k
        gettimeofday(&now, nullptr);
55
589k
        return (uint64_t)((now.tv_sec - _begin_time.tv_sec) * 1e6 +
56
589k
                          (now.tv_usec - _begin_time.tv_usec));
57
589k
    }
58
59
7.64k
    double get_elapse_second() const { return get_elapse_time_us() / 1000000.0; }
60
61
219k
    void reset() { gettimeofday(&_begin_time, nullptr); }
62
63
219k
    OlapStopWatch() { reset(); }
64
65
private:
66
    struct timeval _begin_time; // 起始时间戳
67
};
68
69
// @brief 切分字符串
70
// @param base 原串
71
// @param separator 分隔符
72
// @param result 切分结果
73
template <typename Str, typename T>
74
401k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
401k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
401k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
401k
    size_t offset = 0;
87
1.14M
    while (offset < base.length()) {
88
1.14M
        size_t next = base.find(separator, offset);
89
1.14M
        if (next == std::string::npos) {
90
401k
            result->emplace_back(base.substr(offset));
91
401k
            break;
92
742k
        } else {
93
742k
            result->emplace_back(base.substr(offset, next - offset));
94
742k
            offset = next + 1;
95
742k
        }
96
1.14M
    }
97
98
401k
    return Status::OK();
99
401k
}
_ZN5doris12split_stringISt17basic_string_viewIcSt11char_traitsIcEEcEENS_6StatusERKT_T0_PSt6vectorINSt7__cxx1112basic_stringIcS3_SaIcEEESaISE_EE
Line
Count
Source
74
341k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
341k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
341k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
341k
    size_t offset = 0;
87
1.02M
    while (offset < base.length()) {
88
1.02M
        size_t next = base.find(separator, offset);
89
1.02M
        if (next == std::string::npos) {
90
341k
            result->emplace_back(base.substr(offset));
91
341k
            break;
92
683k
        } else {
93
683k
            result->emplace_back(base.substr(offset, next - offset));
94
683k
            offset = next + 1;
95
683k
        }
96
1.02M
    }
97
98
341k
    return Status::OK();
99
341k
}
_ZN5doris12split_stringINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEcEENS_6StatusERKT_T0_PSt6vectorIS6_SaIS6_EE
Line
Count
Source
74
59.2k
Status split_string(const Str& base, const T separator, std::vector<std::string>* result) {
75
59.2k
    if (!result) {
76
0
        return Status::Error<ErrorCode::INVALID_ARGUMENT>("split_string meet nullptr result input");
77
0
    }
78
79
    // 处理base为空的情况
80
    // 在删除功能中,当varchar类型列的过滤条件为空时,会出现这种情况
81
59.2k
    if (base.size() == 0) {
82
0
        result->push_back("");
83
0
        return Status::OK();
84
0
    }
85
86
59.2k
    size_t offset = 0;
87
118k
    while (offset < base.length()) {
88
118k
        size_t next = base.find(separator, offset);
89
118k
        if (next == std::string::npos) {
90
59.2k
            result->emplace_back(base.substr(offset));
91
59.2k
            break;
92
59.2k
        } else {
93
59.2k
            result->emplace_back(base.substr(offset, next - offset));
94
59.2k
            offset = next + 1;
95
59.2k
        }
96
118k
    }
97
98
59.2k
    return Status::OK();
99
59.2k
}
100
101
uint32_t olap_adler32_init();
102
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len);
103
104
// 获取系统当前时间,并将时间转换为字符串
105
Status gen_timestamp_string(std::string* out_string);
106
107
Status check_datapath_rw(const std::string& path);
108
109
Status read_write_test_file(const std::string& test_file_path);
110
111
// 打印Errno
112
class Errno {
113
public:
114
    // 返回Errno对应的错误信息,线程安全
115
    static const char* str();
116
    static const char* str(int no);
117
    static int no();
118
119
private:
120
    static const int BUF_SIZE = 256;
121
    static __thread char _buf[BUF_SIZE];
122
};
123
124
// 检查int8_t, int16_t, int32_t, int64_t的值是否溢出
125
template <typename T>
126
915
bool valid_signed_number(const std::string& value_str) {
127
915
    char* endptr = nullptr;
128
915
    errno = 0;
129
915
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
915
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
927
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
2
        return false;
134
2
    }
135
136
921
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
6
        return false;
138
6
    }
139
140
907
    return true;
141
913
}
_ZN5doris19valid_signed_numberIaEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
25
bool valid_signed_number(const std::string& value_str) {
127
25
    char* endptr = nullptr;
128
25
    errno = 0;
129
25
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
25
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
25
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
25
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
23
    return true;
141
25
}
_ZN5doris19valid_signed_numberIsEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
17
bool valid_signed_number(const std::string& value_str) {
127
17
    char* endptr = nullptr;
128
17
    errno = 0;
129
17
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
17
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
17
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
17
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
15
    return true;
141
17
}
_ZN5doris19valid_signed_numberIiEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
810
bool valid_signed_number(const std::string& value_str) {
127
810
    char* endptr = nullptr;
128
810
    errno = 0;
129
810
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
810
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
823
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
0
        return false;
134
0
    }
135
136
820
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
2
        return false;
138
2
    }
139
140
808
    return true;
141
810
}
_ZN5doris19valid_signed_numberIlEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
126
63
bool valid_signed_number(const std::string& value_str) {
127
63
    char* endptr = nullptr;
128
63
    errno = 0;
129
63
    int64_t value = strtol(value_str.c_str(), &endptr, 10);
130
131
63
    if ((errno == ERANGE && (value == LONG_MAX || value == LONG_MIN)) ||
132
63
        (errno != 0 && value == 0) || endptr == value_str || *endptr != '\0') {
133
2
        return false;
134
2
    }
135
136
61
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
137
0
        return false;
138
0
    }
139
140
61
    return true;
141
61
}
142
143
template <>
144
bool valid_signed_number<int128_t>(const std::string& value_str);
145
146
// 检查uint8_t, uint16_t, uint32_t, uint64_t的值是否溢出
147
template <typename T>
148
0
bool valid_unsigned_number(const std::string& value_str) {
149
0
    if (value_str[0] == '-') {
150
0
        return false;
151
0
    }
152
153
0
    char* endptr = nullptr;
154
0
    errno = 0;
155
0
    uint64_t value = strtoul(value_str.c_str(), &endptr, 10);
156
157
0
    if ((errno == ERANGE && (value == ULONG_MAX)) || (errno != 0 && value == 0) ||
158
0
        endptr == value_str || *endptr != '\0') {
159
0
        return false;
160
0
    }
161
162
0
    if (value < std::numeric_limits<T>::min() || value > std::numeric_limits<T>::max()) {
163
0
        return false;
164
0
    }
165
166
0
    return true;
167
0
}
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIhEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberItEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberIjEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZN5doris21valid_unsigned_numberImEEbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
168
169
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac);
170
171
// Validate for date/datetime roughly. The format is 'yyyy-MM-dd HH:mm:ss'
172
// TODO: support 'yyyy-MM-dd HH:mm:ss.SSS'
173
bool valid_datetime(const std::string& value_str, const uint32_t scale);
174
175
bool valid_bool(const std::string& value_str);
176
177
bool valid_ipv4(const std::string& value_str);
178
179
bool valid_ipv6(const std::string& value_str);
180
181
27.9M
constexpr bool is_string_type(const FieldType& field_type) {
182
27.9M
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
183
27.9M
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
184
27.9M
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
185
27.9M
}
186
187
33.4k
constexpr bool is_numeric_type(const FieldType& field_type) {
188
33.4k
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
189
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
190
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
191
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
192
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
193
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
194
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
195
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
196
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
197
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
198
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
199
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
200
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
201
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ ||
202
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
203
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
204
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
205
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
206
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
207
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
208
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
209
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
210
33.4k
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
211
33.4k
}
212
213
// Util used to get string name of thrift enum item
214
#define EnumToString(enum_type, index, out)                   \
215
44.2M
    do {                                                      \
216
44.2M
        auto it = _##enum_type##_VALUES_TO_NAMES.find(index); \
217
44.2M
        if (it == _##enum_type##_VALUES_TO_NAMES.end()) {     \
218
0
            out = "NULL";                                     \
219
44.2M
        } else {                                              \
220
44.2M
            out = it->second;                                 \
221
44.2M
        }                                                     \
222
44.2M
    } while (0)
223
224
struct RowLocation {
225
37.6M
    RowLocation() : segment_id(0), row_id(0) {}
226
9.92M
    RowLocation(uint32_t sid, uint32_t rid) : segment_id(sid), row_id(rid) {}
227
    RowLocation(RowsetId rsid, uint32_t sid, uint32_t rid)
228
11.3M
            : rowset_id(rsid), segment_id(sid), row_id(rid) {}
229
    RowsetId rowset_id;
230
    uint32_t segment_id;
231
    uint32_t row_id;
232
233
0
    bool operator==(const RowLocation& rhs) const {
234
0
        return rowset_id == rhs.rowset_id && segment_id == rhs.segment_id && row_id == rhs.row_id;
235
0
    }
236
237
5.45k
    bool operator<(const RowLocation& rhs) const {
238
5.45k
        if (rowset_id != rhs.rowset_id) {
239
1.19k
            return rowset_id < rhs.rowset_id;
240
4.26k
        } else if (segment_id != rhs.segment_id) {
241
0
            return segment_id < rhs.segment_id;
242
4.26k
        } else {
243
4.26k
            return row_id < rhs.row_id;
244
4.26k
        }
245
5.45k
    }
246
};
247
using RowLocationSet = std::set<RowLocation>;
248
using RowLocationPairList = std::list<std::pair<RowLocation, RowLocation>>;
249
250
struct GlobalRowLoacation {
251
    GlobalRowLoacation(int64_t tid, RowsetId rsid, uint32_t sid, uint32_t rid)
252
0
            : tablet_id(tid), row_location(rsid, sid, rid) {}
253
    int64_t tablet_id;
254
    RowLocation row_location;
255
256
0
    bool operator==(const GlobalRowLoacation& rhs) const {
257
0
        return tablet_id == rhs.tablet_id && row_location == rhs.row_location;
258
0
    }
259
260
0
    bool operator<(const GlobalRowLoacation& rhs) const {
261
0
        if (tablet_id != rhs.tablet_id) {
262
0
            return tablet_id < rhs.tablet_id;
263
0
        } else {
264
0
            return row_location < rhs.row_location;
265
0
        }
266
0
    }
267
};
268
269
struct GlobalRowLoacationV2 {
270
    GlobalRowLoacationV2(uint8_t ver, uint64_t bid, uint32_t fid, uint32_t rid)
271
42.5M
            : version(ver), backend_id(bid), file_id(fid), row_id(rid) {}
272
    uint8_t version;
273
    int64_t backend_id;
274
    uint32_t file_id;
275
    uint32_t row_id;
276
277
    auto operator<=>(const GlobalRowLoacationV2&) const = default;
278
};
279
280
} // namespace doris