Coverage Report

Created: 2025-07-23 14:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/utils.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/utils.h"
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <stdarg.h>
23
#include <time.h>
24
#include <unistd.h>
25
#include <zconf.h>
26
#include <zlib.h>
27
28
#include <cmath>
29
#include <cstring>
30
#include <memory>
31
#include <regex>
32
#include <set>
33
#include <sstream>
34
#include <string>
35
#include <vector>
36
37
#include "common/logging.h"
38
#include "common/status.h"
39
#include "io/fs/file_reader.h"
40
#include "io/fs/file_writer.h"
41
#include "io/fs/local_file_system.h"
42
#include "olap/olap_common.h"
43
#include "util/sse_util.hpp"
44
#include "util/string_parser.hpp"
45
#include "vec/runtime/ipv4_value.h"
46
#include "vec/runtime/ipv6_value.h"
47
48
namespace doris {
49
#include "common/compile_check_begin.h"
50
using namespace ErrorCode;
51
52
3.16k
uint32_t olap_adler32_init() {
53
3.16k
    return (uint32_t)adler32(0, Z_NULL, 0);
54
3.16k
}
55
56
3.16k
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len) {
57
3.16k
    return (uint32_t)adler32(adler, reinterpret_cast<const Bytef*>(buf), (uint32_t)len);
58
3.16k
}
59
60
2
Status gen_timestamp_string(std::string* out_string) {
61
2
    time_t now = time(nullptr);
62
2
    tm local_tm;
63
64
2
    if (localtime_r(&now, &local_tm) == nullptr) {
65
0
        return Status::Error<OS_ERROR>("fail to localtime_r time. time={}", now);
66
0
    }
67
2
    char time_suffix[16] = {0}; // Example: 20150706111404's length is 15
68
2
    if (strftime(time_suffix, sizeof(time_suffix), "%Y%m%d%H%M%S", &local_tm) == 0) {
69
0
        return Status::Error<OS_ERROR>("fail to strftime time. time={}", now);
70
0
    }
71
72
2
    *out_string = time_suffix;
73
2
    return Status::OK();
74
2
}
75
76
573
Status read_write_test_file(const std::string& test_file_path) {
77
573
    if (access(test_file_path.c_str(), F_OK) == 0) {
78
0
        if (remove(test_file_path.c_str()) != 0) {
79
0
            char errmsg[64];
80
0
            return Status::IOError("fail to access test file. path={}, errno={}, err={}",
81
0
                                   test_file_path, errno, strerror_r(errno, errmsg, 64));
82
0
        }
83
573
    } else {
84
573
        if (errno != ENOENT) {
85
0
            char errmsg[64];
86
0
            return Status::IOError("fail to access test file. path={}, errno={}, err={}",
87
0
                                   test_file_path, errno, strerror_r(errno, errmsg, 64));
88
0
        }
89
573
    }
90
91
573
    const size_t TEST_FILE_BUF_SIZE = 4096;
92
573
    const size_t DIRECT_IO_ALIGNMENT = 512;
93
573
    char* write_test_buff = nullptr;
94
573
    char* read_test_buff = nullptr;
95
573
    if (posix_memalign((void**)&write_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) {
96
0
        return Status::Error<MEM_ALLOC_FAILED>("fail to allocate write buffer memory. size={}",
97
0
                                               TEST_FILE_BUF_SIZE);
98
0
    }
99
573
    std::unique_ptr<char, decltype(&std::free)> write_buff(write_test_buff, &std::free);
100
573
    if (posix_memalign((void**)&read_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) {
101
0
        return Status::Error<MEM_ALLOC_FAILED>("fail to allocate read buffer memory. size={}",
102
0
                                               TEST_FILE_BUF_SIZE);
103
0
    }
104
573
    std::unique_ptr<char, decltype(&std::free)> read_buff(read_test_buff, &std::free);
105
    // generate random numbers
106
573
    uint32_t rand_seed = static_cast<uint32_t>(time(nullptr));
107
2.34M
    for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) {
108
2.34M
        int32_t tmp_value = rand_r(&rand_seed);
109
2.34M
        write_test_buff[i] = static_cast<char>(tmp_value);
110
2.34M
    }
111
112
    // write file
113
573
    io::FileWriterPtr file_writer;
114
573
    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(test_file_path, &file_writer));
115
573
    RETURN_IF_ERROR(file_writer->append({write_buff.get(), TEST_FILE_BUF_SIZE}));
116
573
    RETURN_IF_ERROR(file_writer->close());
117
    // read file
118
573
    io::FileReaderSPtr file_reader;
119
573
    RETURN_IF_ERROR(io::global_local_filesystem()->open_file(test_file_path, &file_reader));
120
573
    size_t bytes_read = 0;
121
573
    RETURN_IF_ERROR(file_reader->read_at(0, {read_buff.get(), TEST_FILE_BUF_SIZE}, &bytes_read));
122
573
    if (memcmp(write_buff.get(), read_buff.get(), TEST_FILE_BUF_SIZE) != 0) {
123
0
        return Status::IOError("the test file write_buf and read_buf not equal, file_name={}.",
124
0
                               test_file_path);
125
0
    }
126
    // delete file
127
573
    return io::global_local_filesystem()->delete_file(test_file_path);
128
573
}
129
130
22
Status check_datapath_rw(const std::string& path) {
131
22
    bool exists = true;
132
22
    RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists));
133
22
    if (!exists) {
134
0
        return Status::IOError("path does not exist: {}", path);
135
0
    }
136
22
    std::string file_path = path + "/.read_write_test_file";
137
22
    return read_write_test_file(file_path);
138
22
}
139
140
__thread char Errno::_buf[BUF_SIZE]; ///< buffer instance
141
142
0
const char* Errno::str() {
143
0
    return str(no());
144
0
}
145
146
0
const char* Errno::str(int no) {
147
0
    if (0 != strerror_r(no, _buf, BUF_SIZE)) {
148
0
        LOG(WARNING) << "fail to get errno string. [no='" << no << "', errno='" << errno << "']";
149
0
        snprintf(_buf, BUF_SIZE, "unknown errno");
150
0
    }
151
152
0
    return _buf;
153
0
}
154
155
0
int Errno::no() {
156
0
    return errno;
157
0
}
158
159
template <>
160
161
bool valid_signed_number<int128_t>(const std::string& value_str) {
161
161
    char* endptr = nullptr;
162
161
    const char* value_string = value_str.c_str();
163
161
    int64_t value = strtol(value_string, &endptr, 10);
164
161
    if (*endptr != 0) {
165
0
        return false;
166
161
    } else if (value > LONG_MIN && value < LONG_MAX) {
167
158
        return true;
168
158
    } else {
169
3
        bool sign = false;
170
3
        if (*value_string == '-' || *value_string == '+') {
171
1
            if (*(value_string++) == '-') {
172
1
                sign = true;
173
1
            }
174
1
        }
175
176
3
        uint128_t current = 0;
177
3
        uint128_t max_int128 = std::numeric_limits<int128_t>::max();
178
81
        while (*value_string != 0) {
179
78
            if (current > max_int128 / 10) {
180
0
                return false;
181
0
            }
182
183
78
            current = current * 10 + (*(value_string++) - '0');
184
78
        }
185
186
3
        if ((!sign && current > max_int128) || (sign && current > max_int128 + 1)) {
187
2
            return false;
188
2
        }
189
190
1
        return true;
191
3
    }
192
161
}
193
194
34
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac) {
195
34
    const char* decimal_pattern = "-?(\\d+)(.\\d+)?";
196
34
    std::regex e(decimal_pattern);
197
34
    std::smatch what;
198
34
    if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
199
1
        LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]";
200
1
        return false;
201
1
    }
202
203
33
    size_t number_length = value_str.size();
204
33
    bool is_negative = value_str[0] == '-';
205
33
    if (is_negative) {
206
10
        --number_length;
207
10
    }
208
209
33
    size_t integer_len = 0;
210
33
    size_t fractional_len = 0;
211
33
    size_t point_pos = value_str.find('.');
212
33
    if (point_pos == std::string::npos) {
213
2
        integer_len = number_length;
214
2
        fractional_len = 0;
215
31
    } else {
216
31
        integer_len = point_pos - (is_negative ? 1 : 0);
217
31
        fractional_len = number_length - point_pos - 1;
218
31
    }
219
220
    /// For value likes "0.xxxxxx", the integer_len should actually be 0.
221
33
    if (integer_len == 1 && precision - frac == 0) {
222
8
        if (what[1].str() == "0") {
223
8
            integer_len = 0;
224
8
        }
225
8
    }
226
227
33
    return (integer_len <= (precision - frac) && fractional_len <= frac);
228
34
}
229
230
116
bool valid_datetime(const std::string& value_str, const uint32_t scale) {
231
116
    const char* datetime_pattern =
232
116
            "((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*"
233
116
            "(((?:\\d){2}):((?:\\d){2}):((?:\\d){2})([.]*((?:\\d){0,6})))?";
234
116
    std::regex e(datetime_pattern);
235
116
    std::smatch what;
236
237
116
    if (std::regex_match(value_str, what, e)) {
238
111
        if (what[0].str().size() != value_str.size()) {
239
0
            LOG(WARNING) << "datetime str does not fully match. [value_str=" << value_str
240
0
                         << " match=" << what[0].str() << "]";
241
0
            return false;
242
0
        }
243
244
111
        int64_t month = strtol(what[2].str().c_str(), nullptr, 10);
245
111
        if (month < 1 || month > 12) {
246
2
            LOG(WARNING) << "invalid month. [month=" << month << "]";
247
2
            return false;
248
2
        }
249
250
109
        int64_t day = strtol(what[3].str().c_str(), nullptr, 10);
251
111
        if (day < 1 || day > 31) {
252
2
            LOG(WARNING) << "invalid day. [day=" << day << "]";
253
2
            return false;
254
2
        }
255
256
107
        if (what[4].length()) {
257
65
            int64_t hour = strtol(what[5].str().c_str(), nullptr, 10);
258
65
            if (hour < 0 || hour > 23) {
259
1
                LOG(WARNING) << "invalid hour. [hour=" << hour << "]";
260
1
                return false;
261
1
            }
262
263
64
            int64_t minute = strtol(what[6].str().c_str(), nullptr, 10);
264
66
            if (minute < 0 || minute > 59) {
265
1
                LOG(WARNING) << "invalid minute. [minute=" << minute << "]";
266
1
                return false;
267
1
            }
268
269
63
            int64_t second = strtol(what[7].str().c_str(), nullptr, 10);
270
63
            if (second < 0 || second > 59) {
271
1
                LOG(WARNING) << "invalid second. [second=" << second << "]";
272
1
                return false;
273
1
            }
274
62
            if (what[8].length()) {
275
14
                if (what[9].str().size() > 6) {
276
0
                    LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() << "]";
277
0
                    return false;
278
0
                }
279
14
                auto s9 = what[9].str();
280
14
                s9.resize(6, '0');
281
14
                if (const long ms = strtol(s9.c_str(), nullptr, 10);
282
14
                    ms % static_cast<long>(std::pow(10, 6 - scale)) != 0) {
283
0
                    LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str()
284
0
                                 << ", scale = " << scale << "]";
285
0
                    return false;
286
0
                }
287
14
            }
288
62
        }
289
290
104
        return true;
291
107
    } else {
292
5
        LOG(WARNING) << "datetime string does not match";
293
5
        return false;
294
5
    }
295
116
}
296
297
5
bool valid_bool(const std::string& value_str) {
298
5
    if (value_str == "0" || value_str == "1") {
299
5
        return true;
300
5
    }
301
0
    StringParser::ParseResult result;
302
0
    StringParser::string_to_bool(value_str.c_str(), value_str.length(), &result);
303
0
    return result == StringParser::PARSE_SUCCESS;
304
5
}
305
306
4
bool valid_ipv4(const std::string& value_str) {
307
4
    return IPv4Value::is_valid_string(value_str.c_str(), value_str.size());
308
4
}
309
310
4
bool valid_ipv6(const std::string& value_str) {
311
4
    return IPv6Value::is_valid_string(value_str.c_str(), value_str.size());
312
4
}
313
#include "common/compile_check_end.h"
314
} // namespace doris