Coverage Report

Created: 2026-03-11 11:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/string_parser.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
#include <sys/types.h>
27
28
#include <algorithm>
29
#include <cstdlib>
30
// IWYU pragma: no_include <bits/std_abs.h>
31
#include <cmath> // IWYU pragma: keep
32
#include <cstdint>
33
#include <limits>
34
#include <map>
35
#include <string>
36
#include <type_traits>
37
#include <utility>
38
39
#include "common/compiler_util.h" // IWYU pragma: keep
40
#include "common/status.h"
41
#include "core/data_type/number_traits.h"
42
#include "core/data_type/primitive_type.h"
43
#include "core/extended_types.h"
44
#include "core/value/large_int_value.h"
45
#include "exec/common/int_exp.h"
46
#include "exec/common/string_utils/string_utils.h"
47
48
namespace doris {
49
#include "common/compile_check_avoid_begin.h"
50
template <DecimalNativeTypeConcept T>
51
struct Decimal;
52
53
// they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not.
54
#ifndef SET_PARAMS_RET_FALSE_IFN
55
#define SET_PARAMS_RET_FALSE_IFN(stmt, ...)                           \
56
3.93M
    do {                                                              \
57
3.93M
        if (!(stmt)) [[unlikely]] {                                   \
58
36.1k
            if constexpr (IsStrict) {                                 \
59
123
                params.status = Status::InvalidArgument(__VA_ARGS__); \
60
123
            }                                                         \
61
36.1k
            return false;                                             \
62
36.1k
        }                                                             \
63
3.93M
    } while (false)
64
#endif
65
66
#ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION
67
#define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \
68
157
    do {                                          \
69
157
        try {                                     \
70
157
            { stmt; }                             \
71
157
        } catch (const doris::Exception& e) {     \
72
15
            if constexpr (IsStrict) {             \
73
5
                params.status = e.to_status();    \
74
5
            }                                     \
75
15
            return false;                         \
76
15
        }                                         \
77
157
    } while (false)
78
#endif
79
80
// skip leading and trailing ascii whitespaces,
81
// return the pointer to the first non-whitespace char,
82
// and update the len to the new length, which does not include
83
// leading and trailing whitespaces
84
template <typename T>
85
544k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
1.01M
    while (len > 0 && is_whitespace_ascii(*s)) {
87
469k
        ++s;
88
469k
        --len;
89
469k
    }
90
91
1.01M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
465k
        --len;
93
465k
    }
94
95
544k
    return s;
96
544k
}
_ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_
Line
Count
Source
85
515k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
909k
    while (len > 0 && is_whitespace_ascii(*s)) {
87
393k
        ++s;
88
393k
        --len;
89
393k
    }
90
91
905k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
390k
        --len;
93
390k
    }
94
95
515k
    return s;
96
515k
}
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_
Line
Count
Source
85
1.37k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
4.90k
    while (len > 0 && is_whitespace_ascii(*s)) {
87
3.52k
        ++s;
88
3.52k
        --len;
89
3.52k
    }
90
91
4.90k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
3.52k
        --len;
93
3.52k
    }
94
95
1.37k
    return s;
96
1.37k
}
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_
Line
Count
Source
85
27.8k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
100k
    while (len > 0 && is_whitespace_ascii(*s)) {
87
72.4k
        ++s;
88
72.4k
        --len;
89
72.4k
    }
90
91
99.8k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
72.0k
        --len;
93
72.0k
    }
94
95
27.8k
    return s;
96
27.8k
}
97
98
template <typename T>
99
53.8k
inline const char* skip_leading_whitespace(const char* __restrict s, T& len) {
100
157k
    while (len > 0 && is_whitespace_ascii(*s)) {
101
103k
        ++s;
102
103k
        --len;
103
103k
    }
104
105
53.8k
    return s;
106
53.8k
}
107
108
// skip trailing ascii whitespaces,
109
// return the pointer to the first char,
110
// and update the len to the new length, which does not include
111
// trailing whitespaces
112
template <typename T>
113
44.3k
inline const char* skip_trailing_whitespaces(const char* s, T& len) {
114
160k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
115
115k
        --len;
116
115k
    }
117
118
44.3k
    return s;
119
44.3k
}
120
121
template <bool (*Pred)(char)>
122
437k
bool range_suite(const char* s, const char* end) {
123
437k
    return std::ranges::all_of(s, end, Pred);
124
437k
}
_ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_
Line
Count
Source
122
434k
bool range_suite(const char* s, const char* end) {
123
434k
    return std::ranges::all_of(s, end, Pred);
124
434k
}
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_
Line
Count
Source
122
2.28k
bool range_suite(const char* s, const char* end) {
123
2.28k
    return std::ranges::all_of(s, end, Pred);
124
2.28k
}
125
126
inline auto is_digit_range = range_suite<is_numeric_ascii>;
127
inline auto is_space_range = range_suite<is_whitespace_ascii>;
128
129
// combine in_bound and range_suite is ok. won't lead to duplicated calculation.
130
462k
inline bool in_bound(const char* s, const char* end, size_t offset) {
131
462k
    if (s + offset >= end) [[unlikely]] {
132
3.12k
        return false;
133
3.12k
    }
134
459k
    return true;
135
462k
}
136
137
// LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
138
// if need result, use StringRef{origin_s, s} outside
139
template <int LEN, bool (*Pred)(char)>
140
1.50M
bool skip_qualified_char(const char*& s, const char* end) {
141
1.50M
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
1.19M
        while (s != end && Pred(*s)) {
144
693k
            ++s;
145
693k
        }
146
998k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
1.98M
        for (int i = 0; i < LEN; ++i, ++s) {
149
998k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
10.8k
                return false;
151
10.8k
            }
152
998k
        }
153
998k
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
54
        int count = 0;
156
360
        while (s != end && Pred(*s)) {
157
306
            ++s;
158
306
            ++count;
159
306
        }
160
54
        if (count < -LEN) [[unlikely]] {
161
0
            return false;
162
0
        }
163
54
    }
164
987k
    return true;
165
1.50M
}
_ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
140
198k
bool skip_qualified_char(const char*& s, const char* end) {
141
198k
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
201k
        while (s != end && Pred(*s)) {
144
3.03k
            ++s;
145
3.03k
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
198k
    return true;
165
198k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_
Line
Count
Source
140
308k
bool skip_qualified_char(const char*& s, const char* end) {
141
308k
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
998k
        while (s != end && Pred(*s)) {
144
690k
            ++s;
145
690k
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
308k
    return true;
165
308k
}
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
140
54
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
54
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
54
        int count = 0;
156
360
        while (s != end && Pred(*s)) {
157
306
            ++s;
158
306
            ++count;
159
306
        }
160
54
        if (count < -LEN) [[unlikely]] {
161
0
            return false;
162
0
        }
163
54
    }
164
54
    return true;
165
54
}
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_
_ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_
Line
Count
Source
140
35.3k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
35.3k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
59.9k
        for (int i = 0; i < LEN; ++i, ++s) {
149
35.3k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
10.6k
                return false;
151
10.6k
            }
152
35.3k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
24.6k
    return true;
165
35.3k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_
Line
Count
Source
140
176k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
176k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
352k
        for (int i = 0; i < LEN; ++i, ++s) {
149
176k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
48
                return false;
151
48
            }
152
176k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
176k
    return true;
165
176k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_
Line
Count
Source
140
439k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
439k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
878k
        for (int i = 0; i < LEN; ++i, ++s) {
149
439k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
42
                return false;
151
42
            }
152
439k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
439k
    return true;
165
439k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_
Line
Count
Source
140
347k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
347k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
694k
        for (int i = 0; i < LEN; ++i, ++s) {
149
347k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
24
                return false;
151
24
            }
152
347k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
347k
    return true;
165
347k
}
166
167
inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
168
inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
169
inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
170
inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
171
inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;
172
173
176k
inline bool is_delimiter(char c) {
174
176k
    return c == ' ' || c == 'T' || c == ':';
175
176k
}
176
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
177
178
664k
inline bool is_date_sep(char c) {
179
664k
    return c == '-' || c == '/';
180
664k
}
181
inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;
182
183
347k
inline bool is_colon(char c) {
184
347k
    return c == ':';
185
347k
}
186
inline auto consume_one_colon = skip_qualified_char<1, is_colon>;
187
188
// only consume a string of digit, not include sign.
189
// when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
190
// LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
191
template <typename T, int LEN = 0, int MAX_LEN = -1>
192
20
bool consume_digit(const char*& s, const char* end, T& out) {
193
20
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
20
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
20
        out = 0;
215
85
        for (int i = 0; i < LEN; ++i, ++s) {
216
65
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
65
            out = out * 10 + (*s - '0');
220
65
        }
221
20
    }
222
20
    return true;
223
20
}
_ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_
Line
Count
Source
192
15
bool consume_digit(const char*& s, const char* end, T& out) {
193
15
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
15
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
15
        out = 0;
215
75
        for (int i = 0; i < LEN; ++i, ++s) {
216
60
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
60
            out = out * 10 + (*s - '0');
220
60
        }
221
15
    }
222
15
    return true;
223
15
}
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_
Line
Count
Source
192
5
bool consume_digit(const char*& s, const char* end, T& out) {
193
5
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
5
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
5
        out = 0;
215
10
        for (int i = 0; i < LEN; ++i, ++s) {
216
5
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
5
            out = out * 10 + (*s - '0');
220
5
        }
221
5
    }
222
5
    return true;
223
5
}
224
225
// specialized version for 2 digits, which is used very often in date/time parsing.
226
template <>
227
518k
inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) {
228
518k
    out = 0;
229
518k
    if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1)))
230
18.2k
            [[unlikely]] {
231
18.2k
        return false;
232
18.2k
    }
233
500k
    out = (s[0] - '0') * 10 + (s[1] - '0');
234
500k
    s += 2; // consume 2 digits
235
500k
    return true;
236
518k
}
237
238
// specialized version for 1 or 2 digits, which is used very often in date/time parsing.
239
template <>
240
987k
inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) {
241
987k
    out = 0;
242
987k
    if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
243
480
        return false;
244
987k
    } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) {
245
        // consume 2 digits
246
970k
        out = (*s - '0') * 10 + (*(s + 1) - '0');
247
970k
        s += 2;
248
970k
    } else {
249
        // consume 1 digit
250
16.6k
        out = *s - '0';
251
16.6k
        ++s;
252
16.6k
    }
253
987k
    return true;
254
987k
}
255
256
template <bool (*Pred)(char)>
257
148
uint32_t count_valid_length(const char* s, const char* end) {
258
148
    DCHECK(s <= end) << "s: " << s << ", end: " << end;
259
148
    uint32_t count = 0;
260
449
    while (s != end && Pred(*s)) {
261
301
        ++count;
262
301
        ++s;
263
301
    }
264
148
    return count;
265
148
}
266
267
inline auto count_digits = count_valid_length<is_numeric_ascii>;
268
269
136
inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
270
136
    std::string result(6, '0');
271
136
    result[0] = sign;
272
136
    result[1] = '0' + (hour_offset / 10);
273
136
    result[2] = '0' + (hour_offset % 10);
274
136
    result[3] = ':';
275
136
    result[4] = '0' + (minute_offset / 10);
276
136
    result[5] = '0' + (minute_offset % 10);
277
136
    DCHECK_EQ(result.size(), 6);
278
136
    return result;
279
136
}
280
281
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
282
// this is significantly faster than libc (atoi/strtol and atof/strtod).
283
//
284
// Strings with leading and trailing whitespaces are accepted.
285
// Branching is heavily optimized for the non-whitespace successful case.
286
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
287
// If that first attempt was unsuccessful, these functions retry the parsing after removing
288
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
289
//
290
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
291
// data type.  This is different from hive, which returns NULL for overflow slots for int types
292
// and inf/-inf for float types.
293
//
294
// Things we tried that did not work:
295
//  - lookup table for converting character to digit
296
// Improvements (TODO):
297
//  - Validate input using _simd_compare_ranges
298
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
299
class StringParser {
300
public:
301
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
302
303
    template <typename T>
304
484k
    static T numeric_limits(bool negative) {
305
484k
        if constexpr (std::is_same_v<T, __int128>) {
306
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
307
436k
        } else {
308
436k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
436k
        }
310
484k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
304
48.3k
    static T numeric_limits(bool negative) {
305
48.3k
        if constexpr (std::is_same_v<T, __int128>) {
306
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
307
        } else {
308
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
        }
310
48.3k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
304
165k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
165k
        } else {
308
165k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
165k
        }
310
165k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
304
76.9k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
76.9k
        } else {
308
76.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
76.9k
        }
310
76.9k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
304
105k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
105k
        } else {
308
105k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
105k
        }
310
105k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
304
87.9k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
87.9k
        } else {
308
87.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
87.9k
        }
310
87.9k
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
304
147
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
147
        } else {
308
147
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
147
        }
310
147
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
304
21
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
21
        } else {
308
21
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
21
        }
310
21
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
304
4
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
4
        } else {
308
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
4
        }
310
4
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
304
4
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
4
        } else {
308
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
4
        }
310
4
    }
311
312
    template <typename T>
313
873k
    static T get_scale_multiplier(int scale) {
314
873k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
873k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
873k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
873k
        if constexpr (std::is_same_v<T, int32_t>) {
318
133k
            return common::exp10_i32(scale);
319
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
182k
            return common::exp10_i64(scale);
321
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
322
237k
            return common::exp10_i128(scale);
323
320k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
320k
            return common::exp10_i256(scale);
325
320k
        }
326
873k
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
313
133k
    static T get_scale_multiplier(int scale) {
314
133k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
133k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
133k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
133k
        if constexpr (std::is_same_v<T, int32_t>) {
318
133k
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
133k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
313
182k
    static T get_scale_multiplier(int scale) {
314
182k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
182k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
182k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
182k
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
182k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
313
237k
    static T get_scale_multiplier(int scale) {
314
237k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
237k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
237k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
322
237k
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
237k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
313
320k
    static T get_scale_multiplier(int scale) {
314
320k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
320k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
320k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
320k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
320k
            return common::exp10_i256(scale);
325
320k
        }
326
320k
    }
327
328
    // This is considerably faster than glibc's implementation (25x).
329
    // Assumes s represents a decimal number.
330
    template <typename T, bool enable_strict_mode = false>
331
391k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
391k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
391k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
337k
            return ans;
335
337k
        }
336
53.8k
        s = skip_leading_whitespace(s, len);
337
53.8k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
391k
    }
_ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
45.2k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
45.2k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
45.2k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
43.9k
            return ans;
335
43.9k
        }
336
1.33k
        s = skip_leading_whitespace(s, len);
337
1.33k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
45.2k
    }
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
95.4k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
95.4k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
95.4k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
66.2k
            return ans;
335
66.2k
        }
336
29.2k
        s = skip_leading_whitespace(s, len);
337
29.2k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
95.4k
    }
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
1.00k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
1.00k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
1.00k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
88
            return ans;
335
88
        }
336
912
        s = skip_leading_whitespace(s, len);
337
912
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
1.00k
    }
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
66.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
66.3k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
66.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
58.1k
            return ans;
335
58.1k
        }
336
8.12k
        s = skip_leading_whitespace(s, len);
337
8.12k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
66.3k
    }
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
984
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
984
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
984
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
88
            return ans;
335
88
        }
336
896
        s = skip_leading_whitespace(s, len);
337
896
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
984
    }
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
98.8k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
98.8k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
98.8k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
92.5k
            return ans;
335
92.5k
        }
336
6.24k
        s = skip_leading_whitespace(s, len);
337
6.24k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
98.8k
    }
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
968
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
968
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
968
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
88
            return ans;
335
88
        }
336
880
        s = skip_leading_whitespace(s, len);
337
880
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
968
    }
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
81.0k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
81.0k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
81.0k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
76.4k
            return ans;
335
76.4k
        }
336
4.50k
        s = skip_leading_whitespace(s, len);
337
4.50k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
81.0k
    }
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
961
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
961
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
961
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
94
            return ans;
335
94
        }
336
867
        s = skip_leading_whitespace(s, len);
337
867
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
961
    }
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
936
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
936
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
936
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
88
            return ans;
335
88
        }
336
848
        s = skip_leading_whitespace(s, len);
337
848
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
936
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
20
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
20
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
20
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
20
            return ans;
335
20
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
20
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
4
            return ans;
335
4
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
4
    }
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
4
            return ans;
335
4
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
4
    }
339
340
    // This is considerably faster than glibc's implementation.
341
    // In the case of overflow, the max/min value for the data type will be returned.
342
    // Assumes s represents a decimal number.
343
    template <typename T>
344
1.37k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
1.37k
        s = skip_ascii_whitespaces(s, len);
346
1.37k
        return string_to_unsigned_int_internal<T>(s, len, result);
347
1.37k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
343
        s = skip_ascii_whitespaces(s, len);
346
343
        return string_to_unsigned_int_internal<T>(s, len, result);
347
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
343
        s = skip_ascii_whitespaces(s, len);
346
343
        return string_to_unsigned_int_internal<T>(s, len, result);
347
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
343
        s = skip_ascii_whitespaces(s, len);
346
343
        return string_to_unsigned_int_internal<T>(s, len, result);
347
343
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
343
        s = skip_ascii_whitespaces(s, len);
346
343
        return string_to_unsigned_int_internal<T>(s, len, result);
347
343
    }
348
349
    // Convert a string s representing a number in given base into a decimal number.
350
    template <typename T>
351
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
352
27.8k
                                  ParseResult* result) {
353
27.8k
        s = skip_ascii_whitespaces(s, len);
354
27.8k
        return string_to_int_internal<T>(s, len, base, result);
355
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
26.4k
                                  ParseResult* result) {
353
26.4k
        s = skip_ascii_whitespaces(s, len);
354
26.4k
        return string_to_int_internal<T>(s, len, base, result);
355
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
490
                                  ParseResult* result) {
353
490
        s = skip_ascii_whitespaces(s, len);
354
490
        return string_to_int_internal<T>(s, len, base, result);
355
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
441
                                  ParseResult* result) {
353
441
        s = skip_ascii_whitespaces(s, len);
354
441
        return string_to_int_internal<T>(s, len, base, result);
355
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
441
                                  ParseResult* result) {
353
441
        s = skip_ascii_whitespaces(s, len);
354
441
        return string_to_int_internal<T>(s, len, base, result);
355
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
1
                                  ParseResult* result) {
353
1
        s = skip_ascii_whitespaces(s, len);
354
1
        return string_to_int_internal<T>(s, len, base, result);
355
1
    }
356
357
    template <typename T>
358
152k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
152k
        s = skip_ascii_whitespaces(s, len);
360
152k
        return string_to_float_internal<T>(s, len, result);
361
152k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
358
87.6k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
87.6k
        s = skip_ascii_whitespaces(s, len);
360
87.6k
        return string_to_float_internal<T>(s, len, result);
361
87.6k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
358
65.1k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
65.1k
        s = skip_ascii_whitespaces(s, len);
360
65.1k
        return string_to_float_internal<T>(s, len, result);
361
65.1k
    }
362
363
    // Parses a string for 'true' or 'false', case insensitive.
364
11.3k
    static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
365
11.3k
        s = skip_ascii_whitespaces(s, len);
366
11.3k
        return string_to_bool_internal(s, len, result);
367
11.3k
    }
368
369
    template <PrimitiveType P>
370
    static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
371
            const char* __restrict s, size_t len, int type_precision, int type_scale,
372
            ParseResult* result);
373
374
    template <typename T>
375
    static Status split_string_to_map(const std::string& base, const T element_separator,
376
                                      const T key_value_separator,
377
                                      std::map<std::string, std::string>* result) {
378
        int key_pos = 0;
379
        int key_end;
380
        int val_pos;
381
        int val_end;
382
383
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
384
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
385
                std::string::npos) {
386
                break;
387
            }
388
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
389
                val_end = base.size();
390
            }
391
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
392
                                          base.substr(val_pos, val_end - val_pos)));
393
            key_pos = val_end;
394
            if (key_pos != std::string::npos) {
395
                ++key_pos;
396
            }
397
        }
398
399
        return Status::OK();
400
    }
401
402
    // This is considerably faster than glibc's implementation.
403
    // In the case of overflow, the max/min value for the data type will be returned.
404
    // Assumes s represents a decimal number.
405
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
406
    template <typename T, bool enable_strict_mode = false>
407
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
408
409
    // This is considerably faster than glibc's implementation.
410
    // In the case of overflow, the max/min value for the data type will be returned.
411
    // Assumes s represents a decimal number.
412
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
413
    template <typename T>
414
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
415
                                                    ParseResult* result);
416
417
    // Convert a string s representing a number in given base into a decimal number.
418
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
419
    template <typename T>
420
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
421
                                           ParseResult* result);
422
423
    // Converts an ascii string to an integer of type T assuming it cannot overflow
424
    // and the number is positive.
425
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
426
    template <typename T, bool enable_strict_mode = false>
427
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
428
                                              ParseResult* result);
429
430
    // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
431
    // char is not a digit.
432
    template <typename T>
433
    static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
434
                                                      ParseResult* result);
435
436
    // This is considerably faster than glibc's implementation (>100x why???)
437
    // No special case handling needs to be done for overflows, the floating point spec
438
    // already does it and will cap the values to -inf/inf
439
    // To avoid inaccurate conversions this function falls back to strtod for
440
    // scientific notation.
441
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
442
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
443
    template <typename T>
444
    static inline T string_to_float_internal(const char* __restrict s, int len,
445
                                             ParseResult* result);
446
447
    // parses a string for 'true' or 'false', case insensitive
448
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
449
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
450
                                               ParseResult* result);
451
452
    // Returns true if s only contains whitespace.
453
3.54k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
454
6.44k
        for (int i = 0; i < len; ++i) {
455
6.00k
            if (!LIKELY(is_whitespace_ascii(s[i]))) {
456
3.10k
                return false;
457
3.10k
            }
458
6.00k
        }
459
440
        return true;
460
3.54k
    }
461
462
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
463
3.62k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
464
3.62k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
465
3.62k
    }
466
467
2.67k
    static inline bool is_all_digit(const char* __restrict s, int len) {
468
5.57k
        for (int i = 0; i < len; ++i) {
469
3.05k
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
470
151
                return false;
471
151
            }
472
3.05k
        }
473
2.52k
        return true;
474
2.67k
    }
475
}; // end of class StringParser
476
477
template <typename T, bool enable_strict_mode>
478
445k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
445k
    if (UNLIKELY(len <= 0)) {
480
2.13k
        *result = PARSE_FAILURE;
481
2.13k
        return 0;
482
2.13k
    }
483
484
443k
    using UnsignedT = MakeUnsignedT<T>;
485
443k
    UnsignedT val = 0;
486
443k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
443k
    bool negative = false;
488
443k
    int i = 0;
489
443k
    switch (*s) {
490
102k
    case '-':
491
102k
        negative = true;
492
102k
        max_val += 1;
493
102k
        [[fallthrough]];
494
106k
    case '+':
495
106k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
106k
        if (UNLIKELY(len == 1)) {
498
2
            *result = PARSE_FAILURE;
499
2
            return 0;
500
2
        }
501
443k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
443k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
277k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
277k
        return static_cast<T>(negative ? -val : val);
507
277k
    }
508
509
166k
    const T max_div_10 = max_val / 10;
510
166k
    const T max_mod_10 = max_val % 10;
511
512
166k
    int first = i;
513
1.68M
    for (; i < len; ++i) {
514
1.61M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
1.57M
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
1.57M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
48.4k
                *result = PARSE_OVERFLOW;
519
48.4k
                return negative ? -max_val : max_val;
520
48.4k
            }
521
1.52M
            val = val * 10 + digit;
522
1.52M
        } else {
523
45.9k
            if constexpr (enable_strict_mode) {
524
4.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
3.78k
                    *result = PARSE_FAILURE;
527
3.78k
                    return 0;
528
3.78k
                }
529
41.8k
            } else {
530
                // Save original position where non-digit was found
531
41.8k
                int remaining_len = len - i;
532
41.8k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
41.8k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
41.8k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
41.8k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
28.9k
                    *result = PARSE_FAILURE;
540
28.9k
                    return 0;
541
28.9k
                }
542
41.8k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
13.1k
            *result = PARSE_SUCCESS;
545
45.9k
            return static_cast<T>(negative ? -val : val);
546
45.9k
        }
547
1.61M
    }
548
71.7k
    *result = PARSE_SUCCESS;
549
71.7k
    return static_cast<T>(negative ? -val : val);
550
166k
}
_ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
46.6k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
46.6k
    if (UNLIKELY(len <= 0)) {
480
44
        *result = PARSE_FAILURE;
481
44
        return 0;
482
44
    }
483
484
46.5k
    using UnsignedT = MakeUnsignedT<T>;
485
46.5k
    UnsignedT val = 0;
486
46.5k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
46.5k
    bool negative = false;
488
46.5k
    int i = 0;
489
46.5k
    switch (*s) {
490
3.54k
    case '-':
491
3.54k
        negative = true;
492
3.54k
        max_val += 1;
493
3.54k
        [[fallthrough]];
494
3.82k
    case '+':
495
3.82k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
3.82k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
46.5k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
46.5k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
41.9k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
41.9k
        return static_cast<T>(negative ? -val : val);
507
41.9k
    }
508
509
4.65k
    const T max_div_10 = max_val / 10;
510
4.65k
    const T max_mod_10 = max_val % 10;
511
512
4.65k
    int first = i;
513
172k
    for (; i < len; ++i) {
514
169k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
168k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
168k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
512
                *result = PARSE_OVERFLOW;
519
512
                return negative ? -max_val : max_val;
520
512
            }
521
168k
            val = val * 10 + digit;
522
168k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
536
            } else {
530
                // Save original position where non-digit was found
531
536
                int remaining_len = len - i;
532
536
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
536
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
376
                    *result = PARSE_FAILURE;
540
376
                    return 0;
541
376
                }
542
536
            }
543
            // Returning here is slightly faster than breaking the loop.
544
160
            *result = PARSE_SUCCESS;
545
536
            return static_cast<T>(negative ? -val : val);
546
536
        }
547
169k
    }
548
3.60k
    *result = PARSE_SUCCESS;
549
3.60k
    return static_cast<T>(negative ? -val : val);
550
4.65k
}
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
124k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
124k
    if (UNLIKELY(len <= 0)) {
480
218
        *result = PARSE_FAILURE;
481
218
        return 0;
482
218
    }
483
484
124k
    using UnsignedT = MakeUnsignedT<T>;
485
124k
    UnsignedT val = 0;
486
124k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
124k
    bool negative = false;
488
124k
    int i = 0;
489
124k
    switch (*s) {
490
22.4k
    case '-':
491
22.4k
        negative = true;
492
22.4k
        max_val += 1;
493
22.4k
        [[fallthrough]];
494
22.9k
    case '+':
495
22.9k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
22.9k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
124k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
124k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
60.4k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
60.4k
        return static_cast<T>(negative ? -val : val);
507
60.4k
    }
508
509
64.0k
    const T max_div_10 = max_val / 10;
510
64.0k
    const T max_mod_10 = max_val % 10;
511
512
64.0k
    int first = i;
513
154k
    for (; i < len; ++i) {
514
147k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
111k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
111k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
21.2k
                *result = PARSE_OVERFLOW;
519
21.2k
                return negative ? -max_val : max_val;
520
21.2k
            }
521
90.2k
            val = val * 10 + digit;
522
90.2k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
35.9k
            } else {
530
                // Save original position where non-digit was found
531
35.9k
                int remaining_len = len - i;
532
35.9k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
35.9k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
35.9k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
35.9k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
24.5k
                    *result = PARSE_FAILURE;
540
24.5k
                    return 0;
541
24.5k
                }
542
35.9k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
11.4k
            *result = PARSE_SUCCESS;
545
35.9k
            return static_cast<T>(negative ? -val : val);
546
35.9k
        }
547
147k
    }
548
6.80k
    *result = PARSE_SUCCESS;
549
6.80k
    return static_cast<T>(negative ? -val : val);
550
64.0k
}
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
1.91k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
1.91k
    if (UNLIKELY(len <= 0)) {
480
8
        *result = PARSE_FAILURE;
481
8
        return 0;
482
8
    }
483
484
1.90k
    using UnsignedT = MakeUnsignedT<T>;
485
1.90k
    UnsignedT val = 0;
486
1.90k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
1.90k
    bool negative = false;
488
1.90k
    int i = 0;
489
1.90k
    switch (*s) {
490
632
    case '-':
491
632
        negative = true;
492
632
        max_val += 1;
493
632
        [[fallthrough]];
494
988
    case '+':
495
988
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
988
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
1.90k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
1.90k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
48
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
48
        return static_cast<T>(negative ? -val : val);
507
48
    }
508
509
1.85k
    const T max_div_10 = max_val / 10;
510
1.85k
    const T max_mod_10 = max_val % 10;
511
512
1.85k
    int first = i;
513
6.58k
    for (; i < len; ++i) {
514
6.51k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
5.32k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
5.32k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
600
                *result = PARSE_OVERFLOW;
519
600
                return negative ? -max_val : max_val;
520
600
            }
521
4.72k
            val = val * 10 + digit;
522
4.72k
        } else {
523
1.18k
            if constexpr (enable_strict_mode) {
524
1.18k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
1.10k
                    *result = PARSE_FAILURE;
527
1.10k
                    return 0;
528
1.10k
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
88
            *result = PARSE_SUCCESS;
545
1.18k
            return static_cast<T>(negative ? -val : val);
546
1.18k
        }
547
6.51k
    }
548
68
    *result = PARSE_SUCCESS;
549
68
    return static_cast<T>(negative ? -val : val);
550
1.85k
}
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
74.4k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
74.4k
    if (UNLIKELY(len <= 0)) {
480
8
        *result = PARSE_FAILURE;
481
8
        return 0;
482
8
    }
483
484
74.4k
    using UnsignedT = MakeUnsignedT<T>;
485
74.4k
    UnsignedT val = 0;
486
74.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
74.4k
    bool negative = false;
488
74.4k
    int i = 0;
489
74.4k
    switch (*s) {
490
12.8k
    case '-':
491
12.8k
        negative = true;
492
12.8k
        max_val += 1;
493
12.8k
        [[fallthrough]];
494
13.1k
    case '+':
495
13.1k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
13.1k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
74.4k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
74.4k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
50.8k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
50.8k
        return static_cast<T>(negative ? -val : val);
507
50.8k
    }
508
509
23.6k
    const T max_div_10 = max_val / 10;
510
23.6k
    const T max_mod_10 = max_val % 10;
511
512
23.6k
    int first = i;
513
123k
    for (; i < len; ++i) {
514
114k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
112k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
112k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
12.9k
                *result = PARSE_OVERFLOW;
519
12.9k
                return negative ? -max_val : max_val;
520
12.9k
            }
521
99.5k
            val = val * 10 + digit;
522
99.5k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
1.90k
            } else {
530
                // Save original position where non-digit was found
531
1.90k
                int remaining_len = len - i;
532
1.90k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
1.90k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
1.90k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
1.90k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
1.29k
                    *result = PARSE_FAILURE;
540
1.29k
                    return 0;
541
1.29k
                }
542
1.90k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
610
            *result = PARSE_SUCCESS;
545
1.90k
            return static_cast<T>(negative ? -val : val);
546
1.90k
        }
547
114k
    }
548
8.80k
    *result = PARSE_SUCCESS;
549
8.80k
    return static_cast<T>(negative ? -val : val);
550
23.6k
}
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
1.88k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
1.88k
    if (UNLIKELY(len <= 0)) {
480
8
        *result = PARSE_FAILURE;
481
8
        return 0;
482
8
    }
483
484
1.87k
    using UnsignedT = MakeUnsignedT<T>;
485
1.87k
    UnsignedT val = 0;
486
1.87k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
1.87k
    bool negative = false;
488
1.87k
    int i = 0;
489
1.87k
    switch (*s) {
490
620
    case '-':
491
620
        negative = true;
492
620
        max_val += 1;
493
620
        [[fallthrough]];
494
970
    case '+':
495
970
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
970
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
1.87k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
1.87k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
168
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
168
        return static_cast<T>(negative ? -val : val);
507
168
    }
508
509
1.70k
    const T max_div_10 = max_val / 10;
510
1.70k
    const T max_mod_10 = max_val % 10;
511
512
1.70k
    int first = i;
513
7.87k
    for (; i < len; ++i) {
514
7.83k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
6.74k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
6.74k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
576
                *result = PARSE_OVERFLOW;
519
576
                return negative ? -max_val : max_val;
520
576
            }
521
6.17k
            val = val * 10 + digit;
522
6.17k
        } else {
523
1.08k
            if constexpr (enable_strict_mode) {
524
1.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
1.00k
                    *result = PARSE_FAILURE;
527
1.00k
                    return 0;
528
1.00k
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
88
            *result = PARSE_SUCCESS;
545
1.08k
            return static_cast<T>(negative ? -val : val);
546
1.08k
        }
547
7.83k
    }
548
40
    *result = PARSE_SUCCESS;
549
40
    return static_cast<T>(negative ? -val : val);
550
1.70k
}
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
105k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
105k
    if (UNLIKELY(len <= 0)) {
480
1.80k
        *result = PARSE_FAILURE;
481
1.80k
        return 0;
482
1.80k
    }
483
484
103k
    using UnsignedT = MakeUnsignedT<T>;
485
103k
    UnsignedT val = 0;
486
103k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
103k
    bool negative = false;
488
103k
    int i = 0;
489
103k
    switch (*s) {
490
10.6k
    case '-':
491
10.6k
        negative = true;
492
10.6k
        max_val += 1;
493
10.6k
        [[fallthrough]];
494
11.0k
    case '+':
495
11.0k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
11.0k
        if (UNLIKELY(len == 1)) {
498
2
            *result = PARSE_FAILURE;
499
2
            return 0;
500
2
        }
501
103k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
103k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
89.7k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
89.7k
        return static_cast<T>(negative ? -val : val);
507
89.7k
    }
508
509
13.5k
    const T max_div_10 = max_val / 10;
510
13.5k
    const T max_mod_10 = max_val % 10;
511
512
13.5k
    int first = i;
513
128k
    for (; i < len; ++i) {
514
122k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
120k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
120k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
5.79k
                *result = PARSE_OVERFLOW;
519
5.79k
                return negative ? -max_val : max_val;
520
5.79k
            }
521
115k
            val = val * 10 + digit;
522
115k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
1.93k
            } else {
530
                // Save original position where non-digit was found
531
1.93k
                int remaining_len = len - i;
532
1.93k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
1.93k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
1.93k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
1.93k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
1.54k
                    *result = PARSE_FAILURE;
540
1.54k
                    return 0;
541
1.54k
                }
542
1.93k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
386
            *result = PARSE_SUCCESS;
545
1.93k
            return static_cast<T>(negative ? -val : val);
546
1.93k
        }
547
122k
    }
548
5.82k
    *result = PARSE_SUCCESS;
549
5.82k
    return static_cast<T>(negative ? -val : val);
550
13.5k
}
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
1.84k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
1.84k
    if (UNLIKELY(len <= 0)) {
480
8
        *result = PARSE_FAILURE;
481
8
        return 0;
482
8
    }
483
484
1.84k
    using UnsignedT = MakeUnsignedT<T>;
485
1.84k
    UnsignedT val = 0;
486
1.84k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
1.84k
    bool negative = false;
488
1.84k
    int i = 0;
489
1.84k
    switch (*s) {
490
608
    case '-':
491
608
        negative = true;
492
608
        max_val += 1;
493
608
        [[fallthrough]];
494
952
    case '+':
495
952
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
952
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
1.84k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
1.84k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
461
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
461
        return static_cast<T>(negative ? -val : val);
507
461
    }
508
509
1.37k
    const T max_div_10 = max_val / 10;
510
1.37k
    const T max_mod_10 = max_val % 10;
511
512
1.37k
    int first = i;
513
10.7k
    for (; i < len; ++i) {
514
10.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
9.90k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
9.90k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
552
                *result = PARSE_OVERFLOW;
519
552
                return negative ? -max_val : max_val;
520
552
            }
521
9.34k
            val = val * 10 + digit;
522
9.34k
        } else {
523
795
            if constexpr (enable_strict_mode) {
524
795
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
735
                    *result = PARSE_FAILURE;
527
735
                    return 0;
528
735
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
60
            *result = PARSE_SUCCESS;
545
795
            return static_cast<T>(negative ? -val : val);
546
795
        }
547
10.6k
    }
548
32
    *result = PARSE_SUCCESS;
549
32
    return static_cast<T>(negative ? -val : val);
550
1.37k
}
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
85.5k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
85.5k
    if (UNLIKELY(len <= 0)) {
480
14
        *result = PARSE_FAILURE;
481
14
        return 0;
482
14
    }
483
484
85.4k
    using UnsignedT = MakeUnsignedT<T>;
485
85.4k
    UnsignedT val = 0;
486
85.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
85.4k
    bool negative = false;
488
85.4k
    int i = 0;
489
85.4k
    switch (*s) {
490
50.2k
    case '-':
491
50.2k
        negative = true;
492
50.2k
        max_val += 1;
493
50.2k
        [[fallthrough]];
494
50.5k
    case '+':
495
50.5k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
50.5k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
85.4k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
85.4k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
32.3k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
32.3k
        return static_cast<T>(negative ? -val : val);
507
32.3k
    }
508
509
53.1k
    const T max_div_10 = max_val / 10;
510
53.1k
    const T max_mod_10 = max_val % 10;
511
512
53.1k
    int first = i;
513
1.03M
    for (; i < len; ++i) {
514
989k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
988k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
988k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
5.22k
                *result = PARSE_OVERFLOW;
519
5.22k
                return negative ? -max_val : max_val;
520
5.22k
            }
521
983k
            val = val * 10 + digit;
522
983k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
1.49k
            } else {
530
                // Save original position where non-digit was found
531
1.49k
                int remaining_len = len - i;
532
1.49k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
1.49k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
1.49k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
1.49k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
1.24k
                    *result = PARSE_FAILURE;
540
1.24k
                    return 0;
541
1.24k
                }
542
1.49k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
244
            *result = PARSE_SUCCESS;
545
1.49k
            return static_cast<T>(negative ? -val : val);
546
1.49k
        }
547
989k
    }
548
46.4k
    *result = PARSE_SUCCESS;
549
46.4k
    return static_cast<T>(negative ? -val : val);
550
53.1k
}
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
1.82k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
1.82k
    if (UNLIKELY(len <= 0)) {
480
10
        *result = PARSE_FAILURE;
481
10
        return 0;
482
10
    }
483
484
1.81k
    using UnsignedT = MakeUnsignedT<T>;
485
1.81k
    UnsignedT val = 0;
486
1.81k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
1.81k
    bool negative = false;
488
1.81k
    int i = 0;
489
1.81k
    switch (*s) {
490
596
    case '-':
491
596
        negative = true;
492
596
        max_val += 1;
493
596
        [[fallthrough]];
494
934
    case '+':
495
934
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
934
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
1.81k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
1.81k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
735
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
735
        return static_cast<T>(negative ? -val : val);
507
735
    }
508
509
1.08k
    const T max_div_10 = max_val / 10;
510
1.08k
    const T max_mod_10 = max_val % 10;
511
512
1.08k
    int first = i;
513
16.9k
    for (; i < len; ++i) {
514
16.8k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
16.3k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
16.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
528
                *result = PARSE_OVERFLOW;
519
528
                return negative ? -max_val : max_val;
520
528
            }
521
15.8k
            val = val * 10 + digit;
522
15.8k
        } else {
523
523
            if constexpr (enable_strict_mode) {
524
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
491
                    *result = PARSE_FAILURE;
527
491
                    return 0;
528
491
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
32
            *result = PARSE_SUCCESS;
545
523
            return static_cast<T>(negative ? -val : val);
546
523
        }
547
16.8k
    }
548
32
    *result = PARSE_SUCCESS;
549
32
    return static_cast<T>(negative ? -val : val);
550
1.08k
}
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
1.78k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
1.78k
    if (UNLIKELY(len <= 0)) {
480
8
        *result = PARSE_FAILURE;
481
8
        return 0;
482
8
    }
483
484
1.77k
    using UnsignedT = MakeUnsignedT<T>;
485
1.77k
    UnsignedT val = 0;
486
1.77k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
1.77k
    bool negative = false;
488
1.77k
    int i = 0;
489
1.77k
    switch (*s) {
490
584
    case '-':
491
584
        negative = true;
492
584
        max_val += 1;
493
584
        [[fallthrough]];
494
916
    case '+':
495
916
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
916
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
1.77k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
1.77k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
752
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
752
        return static_cast<T>(negative ? -val : val);
507
752
    }
508
509
1.02k
    const T max_div_10 = max_val / 10;
510
1.02k
    const T max_mod_10 = max_val % 10;
511
512
1.02k
    int first = i;
513
31.3k
    for (; i < len; ++i) {
514
31.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
30.7k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
30.7k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
504
                *result = PARSE_OVERFLOW;
519
504
                return negative ? -max_val : max_val;
520
504
            }
521
30.2k
            val = val * 10 + digit;
522
30.2k
        } else {
523
488
            if constexpr (enable_strict_mode) {
524
488
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
456
                    *result = PARSE_FAILURE;
527
456
                    return 0;
528
456
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
32
            *result = PARSE_SUCCESS;
545
488
            return static_cast<T>(negative ? -val : val);
546
488
        }
547
31.2k
    }
548
32
    *result = PARSE_SUCCESS;
549
32
    return static_cast<T>(negative ? -val : val);
550
1.02k
}
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
149
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
149
    if (UNLIKELY(len <= 0)) {
480
2
        *result = PARSE_FAILURE;
481
2
        return 0;
482
2
    }
483
484
147
    using UnsignedT = MakeUnsignedT<T>;
485
147
    UnsignedT val = 0;
486
147
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
147
    bool negative = false;
488
147
    int i = 0;
489
147
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
147
    }
502
503
    // This is the fast path where the string cannot overflow.
504
147
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
147
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
147
        return static_cast<T>(negative ? -val : val);
507
147
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
0
            if constexpr (enable_strict_mode) {
524
0
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
0
                    *result = PARSE_FAILURE;
527
0
                    return 0;
528
0
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
20
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
20
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
20
    using UnsignedT = MakeUnsignedT<T>;
485
20
    UnsignedT val = 0;
486
20
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
20
    bool negative = false;
488
20
    int i = 0;
489
20
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
20
    }
502
503
    // This is the fast path where the string cannot overflow.
504
20
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
20
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
20
        return static_cast<T>(negative ? -val : val);
507
20
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
4
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
4
    using UnsignedT = MakeUnsignedT<T>;
485
4
    UnsignedT val = 0;
486
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
4
    bool negative = false;
488
4
    int i = 0;
489
4
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
4
    }
502
503
    // This is the fast path where the string cannot overflow.
504
4
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
4
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
4
        return static_cast<T>(negative ? -val : val);
507
4
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
4
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
4
    using UnsignedT = MakeUnsignedT<T>;
485
4
    UnsignedT val = 0;
486
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
4
    bool negative = false;
488
4
    int i = 0;
489
4
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
4
    }
502
503
    // This is the fast path where the string cannot overflow.
504
4
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
0
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
0
        return static_cast<T>(negative ? -val : val);
507
0
    }
508
509
4
    const T max_div_10 = max_val / 10;
510
4
    const T max_mod_10 = max_val % 10;
511
512
4
    int first = i;
513
84
    for (; i < len; ++i) {
514
80
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
80
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
80
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
80
            val = val * 10 + digit;
522
80
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
80
    }
548
4
    *result = PARSE_SUCCESS;
549
4
    return static_cast<T>(negative ? -val : val);
550
4
}
551
552
template <typename T>
553
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
554
1.37k
                                                ParseResult* result) {
555
1.37k
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
1.37k
    T val = 0;
561
1.37k
    T max_val = std::numeric_limits<T>::max();
562
1.37k
    int i = 0;
563
564
1.37k
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
1.37k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
784
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
784
        return val;
569
784
    }
570
571
588
    const T max_div_10 = max_val / 10;
572
588
    const T max_mod_10 = max_val % 10;
573
574
588
    int first = i;
575
4.65k
    for (; i < len; ++i) {
576
4.31k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
4.26k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
4.26k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
196
                *result = PARSE_OVERFLOW;
581
196
                return max_val;
582
196
            }
583
4.06k
            val = val * 10 + digit;
584
4.06k
        } else {
585
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
49
                *result = PARSE_FAILURE;
589
49
                return 0;
590
49
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
49
        }
595
4.31k
    }
596
343
    *result = PARSE_SUCCESS;
597
343
    return val;
598
588
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
343
                                                ParseResult* result) {
555
343
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
343
    T val = 0;
561
343
    T max_val = std::numeric_limits<T>::max();
562
343
    int i = 0;
563
564
343
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
98
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
98
        return val;
569
98
    }
570
571
245
    const T max_div_10 = max_val / 10;
572
245
    const T max_mod_10 = max_val % 10;
573
574
245
    int first = i;
575
784
    for (; i < len; ++i) {
576
637
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
588
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
588
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
49
                *result = PARSE_OVERFLOW;
581
49
                return max_val;
582
49
            }
583
539
            val = val * 10 + digit;
584
539
        } else {
585
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
49
                *result = PARSE_FAILURE;
589
49
                return 0;
590
49
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
49
        }
595
637
    }
596
147
    *result = PARSE_SUCCESS;
597
147
    return val;
598
245
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
343
                                                ParseResult* result) {
555
343
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
343
    T val = 0;
561
343
    T max_val = std::numeric_limits<T>::max();
562
343
    int i = 0;
563
564
343
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
196
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
196
        return val;
569
196
    }
570
571
147
    const T max_div_10 = max_val / 10;
572
147
    const T max_mod_10 = max_val % 10;
573
574
147
    int first = i;
575
833
    for (; i < len; ++i) {
576
735
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
735
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
735
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
49
                *result = PARSE_OVERFLOW;
581
49
                return max_val;
582
49
            }
583
686
            val = val * 10 + digit;
584
686
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
735
    }
596
98
    *result = PARSE_SUCCESS;
597
98
    return val;
598
147
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
343
                                                ParseResult* result) {
555
343
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
343
    T val = 0;
561
343
    T max_val = std::numeric_limits<T>::max();
562
343
    int i = 0;
563
564
343
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
245
        return val;
569
245
    }
570
571
98
    const T max_div_10 = max_val / 10;
572
98
    const T max_mod_10 = max_val % 10;
573
574
98
    int first = i;
575
1.02k
    for (; i < len; ++i) {
576
980
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
980
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
980
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
49
                *result = PARSE_OVERFLOW;
581
49
                return max_val;
582
49
            }
583
931
            val = val * 10 + digit;
584
931
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
980
    }
596
49
    *result = PARSE_SUCCESS;
597
49
    return val;
598
98
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
343
                                                ParseResult* result) {
555
343
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
343
    T val = 0;
561
343
    T max_val = std::numeric_limits<T>::max();
562
343
    int i = 0;
563
564
343
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
245
        return val;
569
245
    }
570
571
98
    const T max_div_10 = max_val / 10;
572
98
    const T max_mod_10 = max_val % 10;
573
574
98
    int first = i;
575
2.00k
    for (; i < len; ++i) {
576
1.96k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
1.96k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
1.96k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
49
                *result = PARSE_OVERFLOW;
581
49
                return max_val;
582
49
            }
583
1.91k
            val = val * 10 + digit;
584
1.91k
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
1.96k
    }
596
49
    *result = PARSE_SUCCESS;
597
49
    return val;
598
98
}
599
600
template <typename T>
601
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
602
27.8k
                                       ParseResult* result) {
603
27.8k
    using UnsignedT = MakeUnsignedT<T>;
604
27.8k
    UnsignedT val = 0;
605
27.8k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
27.8k
    bool negative = false;
607
27.8k
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
27.8k
    int i = 0;
612
27.8k
    switch (*s) {
613
13.4k
    case '-':
614
13.4k
        negative = true;
615
13.4k
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
13.4k
        [[fallthrough]];
617
13.7k
    case '+':
618
13.7k
        i = 1;
619
27.8k
    }
620
621
27.8k
    const T max_div_base = max_val / base;
622
27.8k
    const T max_mod_base = max_val % base;
623
624
27.8k
    int first = i;
625
90.9k
    for (; i < len; ++i) {
626
76.6k
        T digit;
627
76.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
75.7k
            digit = s[i] - '0';
629
75.7k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
639
            digit = (s[i] - 'a' + 10);
631
639
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
98
            digit = (s[i] - 'A' + 10);
633
147
        } else {
634
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
147
                *result = PARSE_FAILURE;
638
147
                return 0;
639
147
            }
640
            // skip trailing whitespace.
641
0
            break;
642
147
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
76.4k
        if (digit >= base) {
646
392
            break;
647
392
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
76.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
12.9k
            *result = PARSE_OVERFLOW;
652
12.9k
            return static_cast<T>(negative ? -max_val : max_val);
653
12.9k
        }
654
63.1k
        val = val * base + digit;
655
63.1k
    }
656
14.7k
    *result = PARSE_SUCCESS;
657
14.7k
    return static_cast<T>(negative ? -val : val);
658
27.8k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
26.4k
                                       ParseResult* result) {
603
26.4k
    using UnsignedT = MakeUnsignedT<T>;
604
26.4k
    UnsignedT val = 0;
605
26.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
26.4k
    bool negative = false;
607
26.4k
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
26.4k
    int i = 0;
612
26.4k
    switch (*s) {
613
12.8k
    case '-':
614
12.8k
        negative = true;
615
12.8k
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
12.8k
        [[fallthrough]];
617
12.9k
    case '+':
618
12.9k
        i = 1;
619
26.4k
    }
620
621
26.4k
    const T max_div_base = max_val / base;
622
26.4k
    const T max_mod_base = max_val % base;
623
624
26.4k
    int first = i;
625
80.7k
    for (; i < len; ++i) {
626
67.4k
        T digit;
627
67.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
66.6k
            digit = s[i] - '0';
629
66.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
539
            digit = (s[i] - 'a' + 10);
631
539
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
98
            digit = (s[i] - 'A' + 10);
633
147
        } else {
634
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
147
                *result = PARSE_FAILURE;
638
147
                return 0;
639
147
            }
640
            // skip trailing whitespace.
641
0
            break;
642
147
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
67.3k
        if (digit >= base) {
646
392
            break;
647
392
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
66.9k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
12.6k
            *result = PARSE_OVERFLOW;
652
12.6k
            return static_cast<T>(negative ? -max_val : max_val);
653
12.6k
        }
654
54.2k
        val = val * base + digit;
655
54.2k
    }
656
13.6k
    *result = PARSE_SUCCESS;
657
13.6k
    return static_cast<T>(negative ? -val : val);
658
26.4k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
490
                                       ParseResult* result) {
603
490
    using UnsignedT = MakeUnsignedT<T>;
604
490
    UnsignedT val = 0;
605
490
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
490
    bool negative = false;
607
490
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
490
    int i = 0;
612
490
    switch (*s) {
613
196
    case '-':
614
196
        negative = true;
615
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
196
        [[fallthrough]];
617
245
    case '+':
618
245
        i = 1;
619
490
    }
620
621
490
    const T max_div_base = max_val / base;
622
490
    const T max_mod_base = max_val % base;
623
624
490
    int first = i;
625
2.10k
    for (; i < len; ++i) {
626
1.71k
        T digit;
627
1.71k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
1.61k
            digit = s[i] - '0';
629
1.61k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
98
            digit = (s[i] - 'a' + 10);
631
98
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
1.71k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
1.71k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
98
            *result = PARSE_OVERFLOW;
652
98
            return static_cast<T>(negative ? -max_val : max_val);
653
98
        }
654
1.61k
        val = val * base + digit;
655
1.61k
    }
656
392
    *result = PARSE_SUCCESS;
657
392
    return static_cast<T>(negative ? -val : val);
658
490
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
441
                                       ParseResult* result) {
603
441
    using UnsignedT = MakeUnsignedT<T>;
604
441
    UnsignedT val = 0;
605
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
441
    bool negative = false;
607
441
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
441
    int i = 0;
612
441
    switch (*s) {
613
147
    case '-':
614
147
        negative = true;
615
147
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
147
        [[fallthrough]];
617
245
    case '+':
618
245
        i = 1;
619
441
    }
620
621
441
    const T max_div_base = max_val / base;
622
441
    const T max_mod_base = max_val % base;
623
624
441
    int first = i;
625
3.03k
    for (; i < len; ++i) {
626
2.69k
        T digit;
627
2.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
2.69k
            digit = s[i] - '0';
629
2.69k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
0
            digit = (s[i] - 'a' + 10);
631
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
2.69k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
2.69k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
98
            *result = PARSE_OVERFLOW;
652
98
            return static_cast<T>(negative ? -max_val : max_val);
653
98
        }
654
2.59k
        val = val * base + digit;
655
2.59k
    }
656
343
    *result = PARSE_SUCCESS;
657
343
    return static_cast<T>(negative ? -val : val);
658
441
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
441
                                       ParseResult* result) {
603
441
    using UnsignedT = MakeUnsignedT<T>;
604
441
    UnsignedT val = 0;
605
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
441
    bool negative = false;
607
441
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
441
    int i = 0;
612
441
    switch (*s) {
613
196
    case '-':
614
196
        negative = true;
615
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
196
        [[fallthrough]];
617
245
    case '+':
618
245
        i = 1;
619
441
    }
620
621
441
    const T max_div_base = max_val / base;
622
441
    const T max_mod_base = max_val % base;
623
624
441
    int first = i;
625
5.09k
    for (; i < len; ++i) {
626
4.75k
        T digit;
627
4.75k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
4.75k
            digit = s[i] - '0';
629
4.75k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
0
            digit = (s[i] - 'a' + 10);
631
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
4.75k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
4.75k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
98
            *result = PARSE_OVERFLOW;
652
98
            return static_cast<T>(negative ? -max_val : max_val);
653
98
        }
654
4.65k
        val = val * base + digit;
655
4.65k
    }
656
343
    *result = PARSE_SUCCESS;
657
343
    return static_cast<T>(negative ? -val : val);
658
441
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
1
                                       ParseResult* result) {
603
1
    using UnsignedT = MakeUnsignedT<T>;
604
1
    UnsignedT val = 0;
605
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
1
    bool negative = false;
607
1
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
1
    int i = 0;
612
1
    switch (*s) {
613
0
    case '-':
614
0
        negative = true;
615
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
0
        [[fallthrough]];
617
0
    case '+':
618
0
        i = 1;
619
1
    }
620
621
1
    const T max_div_base = max_val / base;
622
1
    const T max_mod_base = max_val % base;
623
624
1
    int first = i;
625
3
    for (; i < len; ++i) {
626
2
        T digit;
627
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
0
            digit = s[i] - '0';
629
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
2
            digit = (s[i] - 'a' + 10);
631
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
2
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
0
            *result = PARSE_OVERFLOW;
652
0
            return static_cast<T>(negative ? -max_val : max_val);
653
0
        }
654
2
        val = val * base + digit;
655
2
    }
656
1
    *result = PARSE_SUCCESS;
657
1
    return static_cast<T>(negative ? -val : val);
658
1
}
659
660
template <typename T, bool enable_strict_mode>
661
278k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
278k
    T val = 0;
663
278k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
278k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
272k
        val = s[0] - '0';
670
272k
    } else {
671
6.29k
        *result = PARSE_FAILURE;
672
6.29k
        return 0;
673
6.29k
    }
674
552k
    for (int i = 1; i < len; ++i) {
675
283k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
280k
            T digit = s[i] - '0';
677
280k
            val = val * 10 + digit;
678
280k
        } else {
679
3.78k
            if constexpr (enable_strict_mode) {
680
1.31k
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
1.17k
                    *result = PARSE_FAILURE;
682
1.17k
                    return 0;
683
1.17k
                }
684
2.46k
            } else {
685
                // Save original position where non-digit was found
686
2.46k
                int remaining_len = len - i;
687
2.46k
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
2.46k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
2.46k
                if ((UNLIKELY(remaining_len != 0 &&
691
2.46k
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
465
                    *result = PARSE_FAILURE;
693
465
                    return 0;
694
465
                }
695
2.46k
            }
696
2.14k
            *result = PARSE_SUCCESS;
697
3.78k
            return val;
698
3.78k
        }
699
283k
    }
700
268k
    *result = PARSE_SUCCESS;
701
268k
    return val;
702
272k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
41.9k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
41.9k
    T val = 0;
663
41.9k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
41.9k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
41.0k
        val = s[0] - '0';
670
41.0k
    } else {
671
914
        *result = PARSE_FAILURE;
672
914
        return 0;
673
914
    }
674
57.7k
    for (int i = 1; i < len; ++i) {
675
17.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
16.6k
            T digit = s[i] - '0';
677
16.6k
            val = val * 10 + digit;
678
16.6k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
378
            } else {
685
                // Save original position where non-digit was found
686
378
                int remaining_len = len - i;
687
378
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
378
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
378
                if ((UNLIKELY(remaining_len != 0 &&
691
378
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
98
                    *result = PARSE_FAILURE;
693
98
                    return 0;
694
98
                }
695
378
            }
696
280
            *result = PARSE_SUCCESS;
697
378
            return val;
698
378
        }
699
17.0k
    }
700
40.6k
    *result = PARSE_SUCCESS;
701
40.6k
    return val;
702
41.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
60.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
60.5k
    T val = 0;
663
60.5k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
60.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
59.9k
        val = s[0] - '0';
670
59.9k
    } else {
671
582
        *result = PARSE_FAILURE;
672
582
        return 0;
673
582
    }
674
85.3k
    for (int i = 1; i < len; ++i) {
675
25.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
25.3k
            T digit = s[i] - '0';
677
25.3k
            val = val * 10 + digit;
678
25.3k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
2
            } else {
685
                // Save original position where non-digit was found
686
2
                int remaining_len = len - i;
687
2
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
2
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
2
                if ((UNLIKELY(remaining_len != 0 &&
691
2
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
2
                    *result = PARSE_FAILURE;
693
2
                    return 0;
694
2
                }
695
2
            }
696
0
            *result = PARSE_SUCCESS;
697
2
            return val;
698
2
        }
699
25.3k
    }
700
59.9k
    *result = PARSE_SUCCESS;
701
59.9k
    return val;
702
59.9k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
48
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
48
    T val = 0;
663
48
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
48
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
22
        val = s[0] - '0';
670
26
    } else {
671
26
        *result = PARSE_FAILURE;
672
26
        return 0;
673
26
    }
674
22
    for (int i = 1; i < len; ++i) {
675
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
0
            T digit = s[i] - '0';
677
0
            val = val * 10 + digit;
678
2
        } else {
679
2
            if constexpr (enable_strict_mode) {
680
2
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
2
                    *result = PARSE_FAILURE;
682
2
                    return 0;
683
2
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
0
            *result = PARSE_SUCCESS;
697
2
            return val;
698
2
        }
699
2
    }
700
20
    *result = PARSE_SUCCESS;
701
20
    return val;
702
22
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
51.0k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
51.0k
    T val = 0;
663
51.0k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
51.0k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
50.0k
        val = s[0] - '0';
670
50.0k
    } else {
671
918
        *result = PARSE_FAILURE;
672
918
        return 0;
673
918
    }
674
72.9k
    for (int i = 1; i < len; ++i) {
675
23.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
22.8k
            T digit = s[i] - '0';
677
22.8k
            val = val * 10 + digit;
678
22.8k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
957
            } else {
685
                // Save original position where non-digit was found
686
957
                int remaining_len = len - i;
687
957
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
957
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
957
                if ((UNLIKELY(remaining_len != 0 &&
691
957
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
74
                    *result = PARSE_FAILURE;
693
74
                    return 0;
694
74
                }
695
957
            }
696
883
            *result = PARSE_SUCCESS;
697
957
            return val;
698
957
        }
699
23.7k
    }
700
49.1k
    *result = PARSE_SUCCESS;
701
49.1k
    return val;
702
50.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
168
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
168
    T val = 0;
663
168
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
168
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
130
        val = s[0] - '0';
670
130
    } else {
671
38
        *result = PARSE_FAILURE;
672
38
        return 0;
673
38
    }
674
206
    for (int i = 1; i < len; ++i) {
675
158
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
76
            T digit = s[i] - '0';
677
76
            val = val * 10 + digit;
678
82
        } else {
679
82
            if constexpr (enable_strict_mode) {
680
82
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
82
                    *result = PARSE_FAILURE;
682
82
                    return 0;
683
82
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
0
            *result = PARSE_SUCCESS;
697
82
            return val;
698
82
        }
699
158
    }
700
48
    *result = PARSE_SUCCESS;
701
48
    return val;
702
130
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
89.9k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
89.9k
    T val = 0;
663
89.9k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
89.9k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
87.7k
        val = s[0] - '0';
670
87.7k
    } else {
671
2.19k
        *result = PARSE_FAILURE;
672
2.19k
        return 0;
673
2.19k
    }
674
265k
    for (int i = 1; i < len; ++i) {
675
178k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
177k
            T digit = s[i] - '0';
677
177k
            val = val * 10 + digit;
678
177k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
515
            } else {
685
                // Save original position where non-digit was found
686
515
                int remaining_len = len - i;
687
515
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
515
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
515
                if ((UNLIKELY(remaining_len != 0 &&
691
515
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
167
                    *result = PARSE_FAILURE;
693
167
                    return 0;
694
167
                }
695
515
            }
696
348
            *result = PARSE_SUCCESS;
697
515
            return val;
698
515
        }
699
178k
    }
700
87.2k
    *result = PARSE_SUCCESS;
701
87.2k
    return val;
702
87.7k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
608
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
608
    T val = 0;
663
608
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
608
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
523
        val = s[0] - '0';
670
523
    } else {
671
85
        *result = PARSE_FAILURE;
672
85
        return 0;
673
85
    }
674
1.28k
    for (int i = 1; i < len; ++i) {
675
1.08k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
764
            T digit = s[i] - '0';
677
764
            val = val * 10 + digit;
678
764
        } else {
679
320
            if constexpr (enable_strict_mode) {
680
320
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
292
                    *result = PARSE_FAILURE;
682
292
                    return 0;
683
292
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
28
            *result = PARSE_SUCCESS;
697
320
            return val;
698
320
        }
699
1.08k
    }
700
203
    *result = PARSE_SUCCESS;
701
203
    return val;
702
523
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
32.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
32.5k
    T val = 0;
663
32.5k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
32.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
31.5k
        val = s[0] - '0';
670
31.5k
    } else {
671
1.07k
        *result = PARSE_FAILURE;
672
1.07k
        return 0;
673
1.07k
    }
674
66.1k
    for (int i = 1; i < len; ++i) {
675
35.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
34.6k
            T digit = s[i] - '0';
677
34.6k
            val = val * 10 + digit;
678
34.6k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
615
            } else {
685
                // Save original position where non-digit was found
686
615
                int remaining_len = len - i;
687
615
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
615
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
615
                if ((UNLIKELY(remaining_len != 0 &&
691
615
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
124
                    *result = PARSE_FAILURE;
693
124
                    return 0;
694
124
                }
695
615
            }
696
491
            *result = PARSE_SUCCESS;
697
615
            return val;
698
615
        }
699
35.3k
    }
700
30.8k
    *result = PARSE_SUCCESS;
701
30.8k
    return val;
702
31.5k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
735
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
735
    T val = 0;
663
735
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
735
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
518
        val = s[0] - '0';
670
518
    } else {
671
217
        *result = PARSE_FAILURE;
672
217
        return 0;
673
217
    }
674
1.51k
    for (int i = 1; i < len; ++i) {
675
1.45k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
999
            T digit = s[i] - '0';
677
999
            val = val * 10 + digit;
678
999
        } else {
679
456
            if constexpr (enable_strict_mode) {
680
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
400
                    *result = PARSE_FAILURE;
682
400
                    return 0;
683
400
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
56
            *result = PARSE_SUCCESS;
697
456
            return val;
698
456
        }
699
1.45k
    }
700
62
    *result = PARSE_SUCCESS;
701
62
    return val;
702
518
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
752
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
752
    T val = 0;
663
752
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
752
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
512
        val = s[0] - '0';
670
512
    } else {
671
240
        *result = PARSE_FAILURE;
672
240
        return 0;
673
240
    }
674
1.49k
    for (int i = 1; i < len; ++i) {
675
1.44k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
986
            T digit = s[i] - '0';
677
986
            val = val * 10 + digit;
678
986
        } else {
679
456
            if constexpr (enable_strict_mode) {
680
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
400
                    *result = PARSE_FAILURE;
682
400
                    return 0;
683
400
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
56
            *result = PARSE_SUCCESS;
697
456
            return val;
698
456
        }
699
1.44k
    }
700
56
    *result = PARSE_SUCCESS;
701
56
    return val;
702
512
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
4
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
4
    T val = 0;
663
4
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
4
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
4
        val = s[0] - '0';
670
4
    } else {
671
0
        *result = PARSE_FAILURE;
672
0
        return 0;
673
0
    }
674
4
    for (int i = 1; i < len; ++i) {
675
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
0
            T digit = s[i] - '0';
677
0
            val = val * 10 + digit;
678
0
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
0
            } else {
685
                // Save original position where non-digit was found
686
0
                int remaining_len = len - i;
687
0
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
0
                if ((UNLIKELY(remaining_len != 0 &&
691
0
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
0
                    *result = PARSE_FAILURE;
693
0
                    return 0;
694
0
                }
695
0
            }
696
0
            *result = PARSE_SUCCESS;
697
0
            return val;
698
0
        }
699
0
    }
700
4
    *result = PARSE_SUCCESS;
701
4
    return val;
702
4
}
703
704
// at least the first char(if any) must be a digit.
705
template <typename T>
706
T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
707
136k
                                                  ParseResult* result) {
708
136k
    T val = 0;
709
136k
    if (max_len == 0) [[unlikely]] {
710
135k
        *result = PARSE_SUCCESS;
711
135k
        return val;
712
135k
    }
713
    // Factor out the first char for error handling speeds up the loop.
714
1.14k
    if (is_numeric_ascii(s[0])) [[likely]] {
715
1.14k
        val = s[0] - '0';
716
1.14k
    } else {
717
0
        *result = PARSE_FAILURE;
718
0
        return 0;
719
0
    }
720
5.13k
    for (int i = 1; i < max_len; ++i) {
721
3.99k
        if (is_numeric_ascii(s[i])) [[likely]] {
722
3.99k
            T digit = s[i] - '0';
723
3.99k
            val = val * 10 + digit;
724
3.99k
        } else {
725
            // 123abc, return 123
726
0
            *result = PARSE_SUCCESS;
727
0
            return val;
728
0
        }
729
3.99k
    }
730
1.14k
    *result = PARSE_SUCCESS;
731
1.14k
    return val;
732
1.14k
}
733
734
template <typename T>
735
152k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
152k
    int i = 0;
737
    // skip leading spaces
738
152k
    for (; i < len; ++i) {
739
152k
        if (!is_whitespace_ascii(s[i])) {
740
152k
            break;
741
152k
        }
742
152k
    }
743
744
    // skip back spaces
745
152k
    int j = len - 1;
746
152k
    for (; j >= i; j--) {
747
152k
        if (!is_whitespace_ascii(s[j])) {
748
152k
            break;
749
152k
        }
750
152k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
152k
    if (i < len && s[i] == '+') {
754
7.08k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
20
            *result = PARSE_FAILURE;
766
20
            return 0;
767
20
        }
768
7.08k
    }
769
152k
    if (UNLIKELY(i > j)) {
770
32
        *result = PARSE_FAILURE;
771
32
        return 0;
772
32
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
152k
    double val = 0;
776
152k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
152k
    if (res.ptr == s + j + 1) {
779
148k
        *result = PARSE_SUCCESS;
780
148k
        return val;
781
148k
    } else {
782
4.61k
        *result = PARSE_FAILURE;
783
4.61k
    }
784
4.61k
    return 0;
785
152k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
735
87.6k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
87.6k
    int i = 0;
737
    // skip leading spaces
738
87.6k
    for (; i < len; ++i) {
739
87.6k
        if (!is_whitespace_ascii(s[i])) {
740
87.6k
            break;
741
87.6k
        }
742
87.6k
    }
743
744
    // skip back spaces
745
87.6k
    int j = len - 1;
746
87.6k
    for (; j >= i; j--) {
747
87.6k
        if (!is_whitespace_ascii(s[j])) {
748
87.6k
            break;
749
87.6k
        }
750
87.6k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
87.6k
    if (i < len && s[i] == '+') {
754
3.54k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
10
            *result = PARSE_FAILURE;
766
10
            return 0;
767
10
        }
768
3.54k
    }
769
87.6k
    if (UNLIKELY(i > j)) {
770
18
        *result = PARSE_FAILURE;
771
18
        return 0;
772
18
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
87.6k
    double val = 0;
776
87.6k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
87.6k
    if (res.ptr == s + j + 1) {
779
85.3k
        *result = PARSE_SUCCESS;
780
85.3k
        return val;
781
85.3k
    } else {
782
2.32k
        *result = PARSE_FAILURE;
783
2.32k
    }
784
2.32k
    return 0;
785
87.6k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
735
65.1k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
65.1k
    int i = 0;
737
    // skip leading spaces
738
65.1k
    for (; i < len; ++i) {
739
65.1k
        if (!is_whitespace_ascii(s[i])) {
740
65.1k
            break;
741
65.1k
        }
742
65.1k
    }
743
744
    // skip back spaces
745
65.1k
    int j = len - 1;
746
65.1k
    for (; j >= i; j--) {
747
65.1k
        if (!is_whitespace_ascii(s[j])) {
748
65.1k
            break;
749
65.1k
        }
750
65.1k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
65.1k
    if (i < len && s[i] == '+') {
754
3.54k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
10
            *result = PARSE_FAILURE;
766
10
            return 0;
767
10
        }
768
3.54k
    }
769
65.1k
    if (UNLIKELY(i > j)) {
770
14
        *result = PARSE_FAILURE;
771
14
        return 0;
772
14
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
65.1k
    double val = 0;
776
65.1k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
65.1k
    if (res.ptr == s + j + 1) {
779
62.8k
        *result = PARSE_SUCCESS;
780
62.8k
        return val;
781
62.8k
    } else {
782
2.28k
        *result = PARSE_FAILURE;
783
2.28k
    }
784
2.28k
    return 0;
785
65.1k
}
786
787
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
788
11.3k
                                                  ParseResult* result) {
789
11.3k
    *result = PARSE_SUCCESS;
790
791
11.3k
    if (len == 1) {
792
2.66k
        if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
793
333
            return true;
794
333
        }
795
2.32k
        if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
796
934
            return false;
797
934
        }
798
1.39k
        *result = PARSE_FAILURE;
799
1.39k
        return false;
800
2.32k
    }
801
802
8.71k
    if (len == 2) {
803
975
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
804
10
            return true;
805
10
        }
806
965
        if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
807
9
            return false;
808
9
        }
809
965
    }
810
811
8.69k
    if (len == 3) {
812
42
        if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
813
42
            (s[2] == 's' || s[2] == 'S')) {
814
10
            return true;
815
10
        }
816
32
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
817
32
            (s[2] == 'f' || s[2] == 'F')) {
818
9
            return false;
819
9
        }
820
32
    }
821
822
8.67k
    if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
823
8.67k
        (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
824
3.38k
        return true;
825
3.38k
    }
826
827
5.29k
    if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
828
5.29k
        (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
829
5.29k
        (s[4] == 'e' || s[4] == 'E')) {
830
3.42k
        return false;
831
3.42k
    }
832
833
    // No valid boolean value found
834
1.87k
    *result = PARSE_FAILURE;
835
1.87k
    return false;
836
5.29k
}
837
#include "common/compile_check_avoid_end.h"
838
} // end namespace doris