Coverage Report

Created: 2025-04-30 06:07

/root/doris/be/src/olap/utils.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/utils.h"
19
20
// IWYU pragma: no_include <bthread/errno.h>
21
#include <errno.h> // IWYU pragma: keep
22
#include <stdarg.h>
23
#include <time.h>
24
#include <unistd.h>
25
#include <zconf.h>
26
#include <zlib.h>
27
28
#include <cmath>
29
#include <cstring>
30
#include <memory>
31
#include <regex>
32
#include <set>
33
#include <sstream>
34
#include <string>
35
#include <vector>
36
37
#include "common/logging.h"
38
#include "common/status.h"
39
#include "io/fs/file_reader.h"
40
#include "io/fs/file_writer.h"
41
#include "io/fs/local_file_system.h"
42
#include "olap/olap_common.h"
43
#include "util/sse_util.hpp"
44
#include "util/string_parser.hpp"
45
#include "vec/runtime/ipv4_value.h"
46
#include "vec/runtime/ipv6_value.h"
47
48
namespace doris {
49
using namespace ErrorCode;
50
51
201k
uint32_t olap_adler32_init() {
52
201k
    return adler32(0L, Z_NULL, 0);
53
201k
}
54
55
201k
uint32_t olap_adler32(uint32_t adler, const char* buf, size_t len) {
56
201k
    return adler32(adler, reinterpret_cast<const Bytef*>(buf), len);
57
201k
}
58
59
3.90k
Status gen_timestamp_string(std::string* out_string) {
60
3.90k
    time_t now = time(nullptr);
61
3.90k
    tm local_tm;
62
63
3.90k
    if (localtime_r(&now, &local_tm) == nullptr) {
64
0
        return Status::Error<OS_ERROR>("fail to localtime_r time. time={}", now);
65
0
    }
66
3.90k
    char time_suffix[16] = {0}; // Example: 20150706111404's length is 15
67
3.90k
    if (strftime(time_suffix, sizeof(time_suffix), "%Y%m%d%H%M%S", &local_tm) == 0) {
68
0
        return Status::Error<OS_ERROR>("fail to strftime time. time={}", now);
69
0
    }
70
71
3.90k
    *out_string = time_suffix;
72
3.90k
    return Status::OK();
73
3.90k
}
74
75
6.40k
Status read_write_test_file(const std::string& test_file_path) {
76
6.40k
    if (access(test_file_path.c_str(), F_OK) == 0) {
77
0
        if (remove(test_file_path.c_str()) != 0) {
78
0
            char errmsg[64];
79
0
            return Status::IOError("fail to access test file. path={}, errno={}, err={}",
80
0
                                   test_file_path, errno, strerror_r(errno, errmsg, 64));
81
0
        }
82
6.40k
    } else {
83
6.40k
        if (errno != ENOENT) {
84
0
            char errmsg[64];
85
0
            return Status::IOError("fail to access test file. path={}, errno={}, err={}",
86
0
                                   test_file_path, errno, strerror_r(errno, errmsg, 64));
87
0
        }
88
6.40k
    }
89
90
6.40k
    const size_t TEST_FILE_BUF_SIZE = 4096;
91
6.40k
    const size_t DIRECT_IO_ALIGNMENT = 512;
92
6.40k
    char* write_test_buff = nullptr;
93
6.40k
    char* read_test_buff = nullptr;
94
6.40k
    if (posix_memalign((void**)&write_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) {
95
0
        return Status::Error<MEM_ALLOC_FAILED>("fail to allocate write buffer memory. size={}",
96
0
                                               TEST_FILE_BUF_SIZE);
97
0
    }
98
6.40k
    std::unique_ptr<char, decltype(&std::free)> write_buff(write_test_buff, &std::free);
99
6.40k
    if (posix_memalign((void**)&read_test_buff, DIRECT_IO_ALIGNMENT, TEST_FILE_BUF_SIZE) != 0) {
100
0
        return Status::Error<MEM_ALLOC_FAILED>("fail to allocate read buffer memory. size={}",
101
0
                                               TEST_FILE_BUF_SIZE);
102
0
    }
103
6.40k
    std::unique_ptr<char, decltype(&std::free)> read_buff(read_test_buff, &std::free);
104
    // generate random numbers
105
6.40k
    uint32_t rand_seed = static_cast<uint32_t>(time(nullptr));
106
26.2M
    for (size_t i = 0; i < TEST_FILE_BUF_SIZE; ++i) {
107
26.2M
        int32_t tmp_value = rand_r(&rand_seed);
108
26.2M
        write_test_buff[i] = static_cast<char>(tmp_value);
109
26.2M
    }
110
111
    // write file
112
6.40k
    io::FileWriterPtr file_writer;
113
6.40k
    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(test_file_path, &file_writer));
114
6.40k
    RETURN_IF_ERROR(file_writer->append({write_buff.get(), TEST_FILE_BUF_SIZE}));
115
6.40k
    RETURN_IF_ERROR(file_writer->close());
116
    // read file
117
6.40k
    io::FileReaderSPtr file_reader;
118
6.40k
    RETURN_IF_ERROR(io::global_local_filesystem()->open_file(test_file_path, &file_reader));
119
6.40k
    size_t bytes_read = 0;
120
6.40k
    RETURN_IF_ERROR(file_reader->read_at(0, {read_buff.get(), TEST_FILE_BUF_SIZE}, &bytes_read));
121
6.40k
    if (memcmp(write_buff.get(), read_buff.get(), TEST_FILE_BUF_SIZE) != 0) {
122
0
        return Status::IOError("the test file write_buf and read_buf not equal, file_name={}.",
123
0
                               test_file_path);
124
0
    }
125
    // delete file
126
6.40k
    return io::global_local_filesystem()->delete_file(test_file_path);
127
6.40k
}
128
129
12
Status check_datapath_rw(const std::string& path) {
130
12
    bool exists = true;
131
12
    RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists));
132
12
    if (!exists) {
133
0
        return Status::IOError("path does not exist: {}", path);
134
0
    }
135
12
    std::string file_path = path + "/.read_write_test_file";
136
12
    return read_write_test_file(file_path);
137
12
}
138
139
__thread char Errno::_buf[BUF_SIZE]; ///< buffer instance
140
141
0
const char* Errno::str() {
142
0
    return str(no());
143
0
}
144
145
0
const char* Errno::str(int no) {
146
0
    if (0 != strerror_r(no, _buf, BUF_SIZE)) {
147
0
        LOG(WARNING) << "fail to get errno string. [no='" << no << "', errno='" << errno << "']";
148
0
        snprintf(_buf, BUF_SIZE, "unknown errno");
149
0
    }
150
151
0
    return _buf;
152
0
}
153
154
0
int Errno::no() {
155
0
    return errno;
156
0
}
157
158
template <>
159
320
bool valid_signed_number<int128_t>(const std::string& value_str) {
160
320
    char* endptr = nullptr;
161
320
    const char* value_string = value_str.c_str();
162
320
    int64_t value = strtol(value_string, &endptr, 10);
163
320
    if (*endptr != 0) {
164
0
        return false;
165
320
    } else if (value > LONG_MIN && value < LONG_MAX) {
166
318
        return true;
167
318
    } else {
168
2
        bool sign = false;
169
2
        if (*value_string == '-' || *value_string == '+') {
170
1
            if (*(value_string++) == '-') {
171
1
                sign = true;
172
1
            }
173
1
        }
174
175
2
        uint128_t current = 0;
176
2
        uint128_t max_int128 = std::numeric_limits<int128_t>::max();
177
80
        while (*value_string != 0) {
178
78
            if (current > max_int128 / 10) {
179
0
                return false;
180
0
            }
181
182
78
            current = current * 10 + (*(value_string++) - '0');
183
78
        }
184
185
2
        if ((!sign && current > max_int128) || (sign && current > max_int128 + 1)) {
186
2
            return false;
187
2
        }
188
189
0
        return true;
190
2
    }
191
320
}
192
193
61
bool valid_decimal(const std::string& value_str, const uint32_t precision, const uint32_t frac) {
194
61
    const char* decimal_pattern = "-?(\\d+)(.\\d+)?";
195
61
    std::regex e(decimal_pattern);
196
61
    std::smatch what;
197
61
    if (!std::regex_match(value_str, what, e) || what[0].str().size() != value_str.size()) {
198
1
        LOG(WARNING) << "invalid decimal value. [value=" << value_str << "]";
199
1
        return false;
200
1
    }
201
202
60
    size_t number_length = value_str.size();
203
60
    bool is_negative = value_str[0] == '-';
204
60
    if (is_negative) {
205
18
        --number_length;
206
18
    }
207
208
60
    size_t integer_len = 0;
209
60
    size_t fractional_len = 0;
210
60
    size_t point_pos = value_str.find('.');
211
60
    if (point_pos == std::string::npos) {
212
2
        integer_len = number_length;
213
2
        fractional_len = 0;
214
58
    } else {
215
58
        integer_len = point_pos - (is_negative ? 1 : 0);
216
58
        fractional_len = number_length - point_pos - 1;
217
58
    }
218
219
    /// For value likes "0.xxxxxx", the integer_len should actually be 0.
220
60
    if (integer_len == 1 && precision - frac == 0) {
221
16
        if (what[1].str() == "0") {
222
16
            integer_len = 0;
223
16
        }
224
16
    }
225
226
60
    return (integer_len <= (precision - frac) && fractional_len <= frac);
227
61
}
228
229
223
bool valid_datetime(const std::string& value_str, const uint32_t scale) {
230
223
    const char* datetime_pattern =
231
223
            "((?:\\d){4})-((?:\\d){2})-((?:\\d){2})[ ]*"
232
223
            "(((?:\\d){2}):((?:\\d){2}):((?:\\d){2})([.]*((?:\\d){0,6})))?";
233
223
    std::regex e(datetime_pattern);
234
223
    std::smatch what;
235
236
223
    if (std::regex_match(value_str, what, e)) {
237
221
        if (what[0].str().size() != value_str.size()) {
238
0
            LOG(WARNING) << "datetime str does not fully match. [value_str=" << value_str
239
0
                         << " match=" << what[0].str() << "]";
240
0
            return false;
241
0
        }
242
243
221
        int month = strtol(what[2].str().c_str(), nullptr, 10);
244
221
        if (month < 1 || month > 12) {
245
2
            LOG(WARNING) << "invalid month. [month=" << month << "]";
246
2
            return false;
247
2
        }
248
249
219
        int day = strtol(what[3].str().c_str(), nullptr, 10);
250
219
        if (day < 1 || day > 31) {
251
2
            LOG(WARNING) << "invalid day. [day=" << day << "]";
252
2
            return false;
253
2
        }
254
255
217
        if (what[4].length()) {
256
132
            int hour = strtol(what[5].str().c_str(), nullptr, 10);
257
132
            if (hour < 0 || hour > 23) {
258
1
                LOG(WARNING) << "invalid hour. [hour=" << hour << "]";
259
1
                return false;
260
1
            }
261
262
131
            int minute = strtol(what[6].str().c_str(), nullptr, 10);
263
131
            if (minute < 0 || minute > 59) {
264
1
                LOG(WARNING) << "invalid minute. [minute=" << minute << "]";
265
1
                return false;
266
1
            }
267
268
130
            int second = strtol(what[7].str().c_str(), nullptr, 10);
269
130
            if (second < 0 || second > 59) {
270
1
                LOG(WARNING) << "invalid second. [second=" << second << "]";
271
1
                return false;
272
1
            }
273
129
            if (what[8].length()) {
274
14
                if (what[9].str().size() > 6) {
275
0
                    LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str() << "]";
276
0
                    return false;
277
0
                }
278
14
                auto s9 = what[9].str();
279
14
                s9.resize(6, '0');
280
14
                if (const long ms = strtol(s9.c_str(), nullptr, 10);
281
14
                    ms % static_cast<long>(std::pow(10, 6 - scale)) != 0) {
282
0
                    LOG(WARNING) << "invalid microsecond. [microsecond=" << what[9].str()
283
0
                                 << ", scale = " << scale << "]";
284
0
                    return false;
285
0
                }
286
14
            }
287
129
        }
288
289
214
        return true;
290
217
    } else {
291
2
        LOG(WARNING) << "datetime string does not match";
292
2
        return false;
293
2
    }
294
223
}
295
296
10
bool valid_bool(const std::string& value_str) {
297
10
    if (value_str == "0" || value_str == "1") {
298
10
        return true;
299
10
    }
300
0
    StringParser::ParseResult result;
301
0
    StringParser::string_to_bool(value_str.c_str(), value_str.length(), &result);
302
0
    return result == StringParser::PARSE_SUCCESS;
303
10
}
304
305
8
bool valid_ipv4(const std::string& value_str) {
306
8
    return IPv4Value::is_valid_string(value_str.c_str(), value_str.size());
307
8
}
308
309
8
bool valid_ipv6(const std::string& value_str) {
310
8
    return IPv6Value::is_valid_string(value_str.c_str(), value_str.size());
311
8
}
312
313
} // namespace doris