Coverage Report

Created: 2026-01-28 00:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/string_parser.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
#include <sys/types.h>
27
28
#include <algorithm>
29
#include <cstdlib>
30
// IWYU pragma: no_include <bits/std_abs.h>
31
#include <cmath> // IWYU pragma: keep
32
#include <cstdint>
33
#include <limits>
34
#include <map>
35
#include <string>
36
#include <type_traits>
37
#include <utility>
38
39
#include "common/compiler_util.h" // IWYU pragma: keep
40
#include "common/status.h"
41
#include "runtime/large_int_value.h"
42
#include "runtime/primitive_type.h"
43
#include "vec/common/int_exp.h"
44
#include "vec/common/string_utils/string_utils.h"
45
#include "vec/core/extended_types.h"
46
#include "vec/data_types/number_traits.h"
47
48
namespace doris {
49
#include "common/compile_check_avoid_begin.h"
50
namespace vectorized {
51
template <DecimalNativeTypeConcept T>
52
struct Decimal;
53
} // namespace vectorized
54
55
// they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not.
56
#ifndef SET_PARAMS_RET_FALSE_IFN
57
#define SET_PARAMS_RET_FALSE_IFN(stmt, ...)                           \
58
3.93M
    do {                                                              \
59
3.93M
        if (!(stmt)) [[unlikely]] {                                   \
60
36.1k
            if constexpr (IsStrict) {                                 \
61
123
                params.status = Status::InvalidArgument(__VA_ARGS__); \
62
123
            }                                                         \
63
36.1k
            return false;                                             \
64
36.1k
        }                                                             \
65
3.93M
    } while (false)
66
#endif
67
68
#ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION
69
#define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \
70
157
    do {                                          \
71
157
        try {                                     \
72
157
            { stmt; }                             \
73
157
        } catch (const doris::Exception& e) {     \
74
15
            if constexpr (IsStrict) {             \
75
5
                params.status = e.to_status();    \
76
5
            }                                     \
77
15
            return false;                         \
78
15
        }                                         \
79
157
    } while (false)
80
#endif
81
82
// skip leading and trailing ascii whitespaces,
83
// return the pointer to the first non-whitespace char,
84
// and update the len to the new length, which does not include
85
// leading and trailing whitespaces
86
template <typename T>
87
544k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
88
1.01M
    while (len > 0 && is_whitespace_ascii(*s)) {
89
469k
        ++s;
90
469k
        --len;
91
469k
    }
92
93
1.01M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
94
465k
        --len;
95
465k
    }
96
97
544k
    return s;
98
544k
}
_ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_
Line
Count
Source
87
515k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
88
909k
    while (len > 0 && is_whitespace_ascii(*s)) {
89
393k
        ++s;
90
393k
        --len;
91
393k
    }
92
93
905k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
94
390k
        --len;
95
390k
    }
96
97
515k
    return s;
98
515k
}
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_
Line
Count
Source
87
1.37k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
88
4.90k
    while (len > 0 && is_whitespace_ascii(*s)) {
89
3.52k
        ++s;
90
3.52k
        --len;
91
3.52k
    }
92
93
4.90k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
94
3.52k
        --len;
95
3.52k
    }
96
97
1.37k
    return s;
98
1.37k
}
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_
Line
Count
Source
87
27.8k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
88
100k
    while (len > 0 && is_whitespace_ascii(*s)) {
89
72.4k
        ++s;
90
72.4k
        --len;
91
72.4k
    }
92
93
99.8k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
94
72.0k
        --len;
95
72.0k
    }
96
97
27.8k
    return s;
98
27.8k
}
99
100
template <typename T>
101
53.9k
inline const char* skip_leading_whitespace(const char* __restrict s, T& len) {
102
157k
    while (len > 0 && is_whitespace_ascii(*s)) {
103
103k
        ++s;
104
103k
        --len;
105
103k
    }
106
107
53.9k
    return s;
108
53.9k
}
109
110
// skip trailing ascii whitespaces,
111
// return the pointer to the first char,
112
// and update the len to the new length, which does not include
113
// trailing whitespaces
114
template <typename T>
115
44.3k
inline const char* skip_trailing_whitespaces(const char* s, T& len) {
116
160k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
117
115k
        --len;
118
115k
    }
119
120
44.3k
    return s;
121
44.3k
}
122
123
template <bool (*Pred)(char)>
124
436k
bool range_suite(const char* s, const char* end) {
125
436k
    return std::ranges::all_of(s, end, Pred);
126
436k
}
_ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_
Line
Count
Source
124
434k
bool range_suite(const char* s, const char* end) {
125
434k
    return std::ranges::all_of(s, end, Pred);
126
434k
}
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_
Line
Count
Source
124
2.28k
bool range_suite(const char* s, const char* end) {
125
2.28k
    return std::ranges::all_of(s, end, Pred);
126
2.28k
}
127
128
inline auto is_digit_range = range_suite<is_numeric_ascii>;
129
inline auto is_space_range = range_suite<is_whitespace_ascii>;
130
131
// combine in_bound and range_suite is ok. won't lead to duplicated calculation.
132
462k
inline bool in_bound(const char* s, const char* end, size_t offset) {
133
462k
    if (s + offset >= end) [[unlikely]] {
134
3.12k
        return false;
135
3.12k
    }
136
459k
    return true;
137
462k
}
138
139
// LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
140
// if need result, use StringRef{origin_s, s} outside
141
template <int LEN, bool (*Pred)(char)>
142
1.50M
bool skip_qualified_char(const char*& s, const char* end) {
143
1.50M
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
1.19M
        while (s != end && Pred(*s)) {
146
692k
            ++s;
147
692k
        }
148
997k
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
1.98M
        for (int i = 0; i < LEN; ++i, ++s) {
151
997k
            if (s == end || !Pred(*s)) [[unlikely]] {
152
10.8k
                return false;
153
10.8k
            }
154
997k
        }
155
997k
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
54
        int count = 0;
158
360
        while (s != end && Pred(*s)) {
159
306
            ++s;
160
306
            ++count;
161
306
        }
162
54
        if (count < -LEN) [[unlikely]] {
163
0
            return false;
164
0
        }
165
54
    }
166
987k
    return true;
167
1.50M
}
_ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
142
198k
bool skip_qualified_char(const char*& s, const char* end) {
143
198k
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
201k
        while (s != end && Pred(*s)) {
146
3.03k
            ++s;
147
3.03k
        }
148
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
        for (int i = 0; i < LEN; ++i, ++s) {
151
            if (s == end || !Pred(*s)) [[unlikely]] {
152
                return false;
153
            }
154
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
198k
    return true;
167
198k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_
Line
Count
Source
142
308k
bool skip_qualified_char(const char*& s, const char* end) {
143
308k
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
997k
        while (s != end && Pred(*s)) {
146
689k
            ++s;
147
689k
        }
148
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
        for (int i = 0; i < LEN; ++i, ++s) {
151
            if (s == end || !Pred(*s)) [[unlikely]] {
152
                return false;
153
            }
154
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
308k
    return true;
167
308k
}
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
142
54
bool skip_qualified_char(const char*& s, const char* end) {
143
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
        while (s != end && Pred(*s)) {
146
            ++s;
147
        }
148
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
        for (int i = 0; i < LEN; ++i, ++s) {
151
            if (s == end || !Pred(*s)) [[unlikely]] {
152
                return false;
153
            }
154
        }
155
54
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
54
        int count = 0;
158
360
        while (s != end && Pred(*s)) {
159
306
            ++s;
160
306
            ++count;
161
306
        }
162
54
        if (count < -LEN) [[unlikely]] {
163
0
            return false;
164
0
        }
165
54
    }
166
54
    return true;
167
54
}
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_
_ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_
Line
Count
Source
142
35.3k
bool skip_qualified_char(const char*& s, const char* end) {
143
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
        while (s != end && Pred(*s)) {
146
            ++s;
147
        }
148
35.3k
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
59.9k
        for (int i = 0; i < LEN; ++i, ++s) {
151
35.3k
            if (s == end || !Pred(*s)) [[unlikely]] {
152
10.6k
                return false;
153
10.6k
            }
154
35.3k
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
24.6k
    return true;
167
35.3k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_
Line
Count
Source
142
176k
bool skip_qualified_char(const char*& s, const char* end) {
143
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
        while (s != end && Pred(*s)) {
146
            ++s;
147
        }
148
176k
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
352k
        for (int i = 0; i < LEN; ++i, ++s) {
151
176k
            if (s == end || !Pred(*s)) [[unlikely]] {
152
48
                return false;
153
48
            }
154
176k
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
176k
    return true;
167
176k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_
Line
Count
Source
142
439k
bool skip_qualified_char(const char*& s, const char* end) {
143
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
        while (s != end && Pred(*s)) {
146
            ++s;
147
        }
148
439k
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
878k
        for (int i = 0; i < LEN; ++i, ++s) {
151
439k
            if (s == end || !Pred(*s)) [[unlikely]] {
152
42
                return false;
153
42
            }
154
439k
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
439k
    return true;
167
439k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_
Line
Count
Source
142
347k
bool skip_qualified_char(const char*& s, const char* end) {
143
    if constexpr (LEN == 0) {
144
        // Consume any length of characters that match the predicate.
145
        while (s != end && Pred(*s)) {
146
            ++s;
147
        }
148
347k
    } else if constexpr (LEN > 0) {
149
        // Consume exactly LEN characters that match the predicate.
150
694k
        for (int i = 0; i < LEN; ++i, ++s) {
151
347k
            if (s == end || !Pred(*s)) [[unlikely]] {
152
24
                return false;
153
24
            }
154
347k
        }
155
    } else { // LEN < 0
156
        // Consume at least -LEN characters that match the predicate.
157
        int count = 0;
158
        while (s != end && Pred(*s)) {
159
            ++s;
160
            ++count;
161
        }
162
        if (count < -LEN) [[unlikely]] {
163
            return false;
164
        }
165
    }
166
347k
    return true;
167
347k
}
168
169
inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
170
inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
171
inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
172
inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
173
inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;
174
175
176k
inline bool is_delimiter(char c) {
176
176k
    return c == ' ' || c == 'T' || c == ':';
177
176k
}
178
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
179
180
664k
inline bool is_date_sep(char c) {
181
664k
    return c == '-' || c == '/';
182
664k
}
183
inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;
184
185
347k
inline bool is_colon(char c) {
186
347k
    return c == ':';
187
347k
}
188
inline auto consume_one_colon = skip_qualified_char<1, is_colon>;
189
190
// only consume a string of digit, not include sign.
191
// when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
192
// LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
193
template <typename T, int LEN = 0, int MAX_LEN = -1>
194
20
bool consume_digit(const char*& s, const char* end, T& out) {
195
20
    static_assert(LEN >= 0);
196
    if constexpr (MAX_LEN > 0) {
197
        out = 0;
198
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
199
            if (s == end || !is_numeric_ascii(*s)) {
200
                if (i < LEN) [[unlikely]] {
201
                    return false;
202
                }
203
                break; // stop consuming if we have consumed enough digits.
204
            }
205
            out = out * 10 + (*s - '0');
206
        }
207
    } else if constexpr (LEN == 0) {
208
        // Consume any length of digits.
209
        out = 0;
210
        while (s != end && is_numeric_ascii(*s)) {
211
            out = out * 10 + (*s - '0');
212
            ++s;
213
        }
214
20
    } else if constexpr (LEN > 0) {
215
        // Consume exactly LEN digits.
216
20
        out = 0;
217
85
        for (int i = 0; i < LEN; ++i, ++s) {
218
65
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
219
0
                return false;
220
0
            }
221
65
            out = out * 10 + (*s - '0');
222
65
        }
223
20
    }
224
20
    return true;
225
20
}
_ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_
Line
Count
Source
194
15
bool consume_digit(const char*& s, const char* end, T& out) {
195
15
    static_assert(LEN >= 0);
196
    if constexpr (MAX_LEN > 0) {
197
        out = 0;
198
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
199
            if (s == end || !is_numeric_ascii(*s)) {
200
                if (i < LEN) [[unlikely]] {
201
                    return false;
202
                }
203
                break; // stop consuming if we have consumed enough digits.
204
            }
205
            out = out * 10 + (*s - '0');
206
        }
207
    } else if constexpr (LEN == 0) {
208
        // Consume any length of digits.
209
        out = 0;
210
        while (s != end && is_numeric_ascii(*s)) {
211
            out = out * 10 + (*s - '0');
212
            ++s;
213
        }
214
15
    } else if constexpr (LEN > 0) {
215
        // Consume exactly LEN digits.
216
15
        out = 0;
217
75
        for (int i = 0; i < LEN; ++i, ++s) {
218
60
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
219
0
                return false;
220
0
            }
221
60
            out = out * 10 + (*s - '0');
222
60
        }
223
15
    }
224
15
    return true;
225
15
}
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_
Line
Count
Source
194
5
bool consume_digit(const char*& s, const char* end, T& out) {
195
5
    static_assert(LEN >= 0);
196
    if constexpr (MAX_LEN > 0) {
197
        out = 0;
198
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
199
            if (s == end || !is_numeric_ascii(*s)) {
200
                if (i < LEN) [[unlikely]] {
201
                    return false;
202
                }
203
                break; // stop consuming if we have consumed enough digits.
204
            }
205
            out = out * 10 + (*s - '0');
206
        }
207
    } else if constexpr (LEN == 0) {
208
        // Consume any length of digits.
209
        out = 0;
210
        while (s != end && is_numeric_ascii(*s)) {
211
            out = out * 10 + (*s - '0');
212
            ++s;
213
        }
214
5
    } else if constexpr (LEN > 0) {
215
        // Consume exactly LEN digits.
216
5
        out = 0;
217
10
        for (int i = 0; i < LEN; ++i, ++s) {
218
5
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
219
0
                return false;
220
0
            }
221
5
            out = out * 10 + (*s - '0');
222
5
        }
223
5
    }
224
5
    return true;
225
5
}
226
227
// specialized version for 2 digits, which is used very often in date/time parsing.
228
template <>
229
518k
inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) {
230
518k
    out = 0;
231
518k
    if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1)))
232
18.2k
            [[unlikely]] {
233
18.2k
        return false;
234
18.2k
    }
235
500k
    out = (s[0] - '0') * 10 + (s[1] - '0');
236
500k
    s += 2; // consume 2 digits
237
500k
    return true;
238
518k
}
239
240
// specialized version for 1 or 2 digits, which is used very often in date/time parsing.
241
template <>
242
987k
inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) {
243
987k
    out = 0;
244
987k
    if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
245
480
        return false;
246
986k
    } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) {
247
        // consume 2 digits
248
970k
        out = (*s - '0') * 10 + (*(s + 1) - '0');
249
970k
        s += 2;
250
970k
    } else {
251
        // consume 1 digit
252
16.6k
        out = *s - '0';
253
16.6k
        ++s;
254
16.6k
    }
255
986k
    return true;
256
987k
}
257
258
template <bool (*Pred)(char)>
259
148
uint32_t count_valid_length(const char* s, const char* end) {
260
148
    DCHECK(s <= end) << "s: " << s << ", end: " << end;
261
148
    uint32_t count = 0;
262
449
    while (s != end && Pred(*s)) {
263
301
        ++count;
264
301
        ++s;
265
301
    }
266
148
    return count;
267
148
}
268
269
inline auto count_digits = count_valid_length<is_numeric_ascii>;
270
271
136
inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
272
136
    std::string result(6, '0');
273
136
    result[0] = sign;
274
136
    result[1] = '0' + (hour_offset / 10);
275
136
    result[2] = '0' + (hour_offset % 10);
276
136
    result[3] = ':';
277
136
    result[4] = '0' + (minute_offset / 10);
278
136
    result[5] = '0' + (minute_offset % 10);
279
136
    DCHECK_EQ(result.size(), 6);
280
136
    return result;
281
136
}
282
283
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
284
// this is significantly faster than libc (atoi/strtol and atof/strtod).
285
//
286
// Strings with leading and trailing whitespaces are accepted.
287
// Branching is heavily optimized for the non-whitespace successful case.
288
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
289
// If that first attempt was unsuccessful, these functions retry the parsing after removing
290
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
291
//
292
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
293
// data type.  This is different from hive, which returns NULL for overflow slots for int types
294
// and inf/-inf for float types.
295
//
296
// Things we tried that did not work:
297
//  - lookup table for converting character to digit
298
// Improvements (TODO):
299
//  - Validate input using _simd_compare_ranges
300
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
301
class StringParser {
302
public:
303
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
304
305
    template <typename T>
306
447k
    static T numeric_limits(bool negative) {
307
447k
        if constexpr (std::is_same_v<T, __int128>) {
308
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
309
399k
        } else {
310
399k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
399k
        }
312
447k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
306
48.3k
    static T numeric_limits(bool negative) {
307
48.3k
        if constexpr (std::is_same_v<T, __int128>) {
308
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
309
        } else {
310
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
        }
312
48.3k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
306
165k
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
165k
        } else {
310
165k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
165k
        }
312
165k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
306
76.9k
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
76.9k
        } else {
310
76.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
76.9k
        }
312
76.9k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
306
68.2k
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
68.2k
        } else {
310
68.2k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
68.2k
        }
312
68.2k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
306
87.9k
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
87.9k
        } else {
310
87.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
87.9k
        }
312
87.9k
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
306
147
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
147
        } else {
310
147
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
147
        }
312
147
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
306
4
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
4
        } else {
310
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
4
        }
312
4
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
306
4
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
4
        } else {
310
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
4
        }
312
4
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
306
21
    static T numeric_limits(bool negative) {
307
        if constexpr (std::is_same_v<T, __int128>) {
308
            return negative ? MIN_INT128 : MAX_INT128;
309
21
        } else {
310
21
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
311
21
        }
312
21
    }
313
314
    template <typename T>
315
873k
    static T get_scale_multiplier(int scale) {
316
873k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
317
873k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
318
873k
                      "You can only instantiate as int32_t, int64_t, __int128.");
319
873k
        if constexpr (std::is_same_v<T, int32_t>) {
320
133k
            return common::exp10_i32(scale);
321
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
322
182k
            return common::exp10_i64(scale);
323
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
324
237k
            return common::exp10_i128(scale);
325
320k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
326
320k
            return common::exp10_i256(scale);
327
320k
        }
328
873k
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
315
133k
    static T get_scale_multiplier(int scale) {
316
133k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
317
133k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
318
133k
                      "You can only instantiate as int32_t, int64_t, __int128.");
319
133k
        if constexpr (std::is_same_v<T, int32_t>) {
320
133k
            return common::exp10_i32(scale);
321
        } else if constexpr (std::is_same_v<T, int64_t>) {
322
            return common::exp10_i64(scale);
323
        } else if constexpr (std::is_same_v<T, __int128>) {
324
            return common::exp10_i128(scale);
325
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
326
            return common::exp10_i256(scale);
327
        }
328
133k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
315
182k
    static T get_scale_multiplier(int scale) {
316
182k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
317
182k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
318
182k
                      "You can only instantiate as int32_t, int64_t, __int128.");
319
        if constexpr (std::is_same_v<T, int32_t>) {
320
            return common::exp10_i32(scale);
321
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
322
182k
            return common::exp10_i64(scale);
323
        } else if constexpr (std::is_same_v<T, __int128>) {
324
            return common::exp10_i128(scale);
325
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
326
            return common::exp10_i256(scale);
327
        }
328
182k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
315
237k
    static T get_scale_multiplier(int scale) {
316
237k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
317
237k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
318
237k
                      "You can only instantiate as int32_t, int64_t, __int128.");
319
        if constexpr (std::is_same_v<T, int32_t>) {
320
            return common::exp10_i32(scale);
321
        } else if constexpr (std::is_same_v<T, int64_t>) {
322
            return common::exp10_i64(scale);
323
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
324
237k
            return common::exp10_i128(scale);
325
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
326
            return common::exp10_i256(scale);
327
        }
328
237k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
315
320k
    static T get_scale_multiplier(int scale) {
316
320k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
317
320k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
318
320k
                      "You can only instantiate as int32_t, int64_t, __int128.");
319
        if constexpr (std::is_same_v<T, int32_t>) {
320
            return common::exp10_i32(scale);
321
        } else if constexpr (std::is_same_v<T, int64_t>) {
322
            return common::exp10_i64(scale);
323
        } else if constexpr (std::is_same_v<T, __int128>) {
324
            return common::exp10_i128(scale);
325
320k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
326
320k
            return common::exp10_i256(scale);
327
320k
        }
328
320k
    }
329
330
    // This is considerably faster than glibc's implementation (25x).
331
    // Assumes s represents a decimal number.
332
    template <typename T, bool enable_strict_mode = false>
333
354k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
354k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
354k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
300k
            return ans;
337
300k
        }
338
53.9k
        s = skip_leading_whitespace(s, len);
339
53.9k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
354k
    }
_ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
45.2k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
45.2k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
45.2k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
43.9k
            return ans;
337
43.9k
        }
338
1.33k
        s = skip_leading_whitespace(s, len);
339
1.33k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
45.2k
    }
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
95.4k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
95.4k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
95.4k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
66.2k
            return ans;
337
66.2k
        }
338
29.2k
        s = skip_leading_whitespace(s, len);
339
29.2k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
95.4k
    }
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
66.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
66.3k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
66.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
58.1k
            return ans;
337
58.1k
        }
338
8.12k
        s = skip_leading_whitespace(s, len);
339
8.12k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
66.3k
    }
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
61.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
61.3k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
61.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
55.0k
            return ans;
337
55.0k
        }
338
6.33k
        s = skip_leading_whitespace(s, len);
339
6.33k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
61.3k
    }
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
81.0k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
81.0k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
81.0k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
76.4k
            return ans;
337
76.4k
        }
338
4.50k
        s = skip_leading_whitespace(s, len);
339
4.50k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
81.0k
    }
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
1.00k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
1.00k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
1.00k
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
88
            return ans;
337
88
        }
338
912
        s = skip_leading_whitespace(s, len);
339
912
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
1.00k
    }
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
984
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
984
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
984
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
88
            return ans;
337
88
        }
338
896
        s = skip_leading_whitespace(s, len);
339
896
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
984
    }
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
968
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
968
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
968
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
88
            return ans;
337
88
        }
338
880
        s = skip_leading_whitespace(s, len);
339
880
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
968
    }
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
961
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
961
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
961
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
94
            return ans;
337
94
        }
338
867
        s = skip_leading_whitespace(s, len);
339
867
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
961
    }
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
936
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
936
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
936
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
88
            return ans;
337
88
        }
338
848
        s = skip_leading_whitespace(s, len);
339
848
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
936
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
4
            return ans;
337
4
        }
338
0
        s = skip_leading_whitespace(s, len);
339
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
4
    }
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
4
            return ans;
337
4
        }
338
0
        s = skip_leading_whitespace(s, len);
339
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
4
    }
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
333
20
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
334
20
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
335
20
        if (LIKELY(*result == PARSE_SUCCESS)) {
336
20
            return ans;
337
20
        }
338
0
        s = skip_leading_whitespace(s, len);
339
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
340
20
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE
341
342
    // This is considerably faster than glibc's implementation.
343
    // In the case of overflow, the max/min value for the data type will be returned.
344
    // Assumes s represents a decimal number.
345
    template <typename T>
346
1.37k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
347
1.37k
        s = skip_ascii_whitespaces(s, len);
348
1.37k
        return string_to_unsigned_int_internal<T>(s, len, result);
349
1.37k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
346
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
347
343
        s = skip_ascii_whitespaces(s, len);
348
343
        return string_to_unsigned_int_internal<T>(s, len, result);
349
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
346
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
347
343
        s = skip_ascii_whitespaces(s, len);
348
343
        return string_to_unsigned_int_internal<T>(s, len, result);
349
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
346
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
347
343
        s = skip_ascii_whitespaces(s, len);
348
343
        return string_to_unsigned_int_internal<T>(s, len, result);
349
343
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
346
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
347
343
        s = skip_ascii_whitespaces(s, len);
348
343
        return string_to_unsigned_int_internal<T>(s, len, result);
349
343
    }
350
351
    // Convert a string s representing a number in given base into a decimal number.
352
    template <typename T>
353
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
354
27.8k
                                  ParseResult* result) {
355
27.8k
        s = skip_ascii_whitespaces(s, len);
356
27.8k
        return string_to_int_internal<T>(s, len, base, result);
357
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
354
26.4k
                                  ParseResult* result) {
355
26.4k
        s = skip_ascii_whitespaces(s, len);
356
26.4k
        return string_to_int_internal<T>(s, len, base, result);
357
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
354
490
                                  ParseResult* result) {
355
490
        s = skip_ascii_whitespaces(s, len);
356
490
        return string_to_int_internal<T>(s, len, base, result);
357
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
354
441
                                  ParseResult* result) {
355
441
        s = skip_ascii_whitespaces(s, len);
356
441
        return string_to_int_internal<T>(s, len, base, result);
357
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
354
441
                                  ParseResult* result) {
355
441
        s = skip_ascii_whitespaces(s, len);
356
441
        return string_to_int_internal<T>(s, len, base, result);
357
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
354
1
                                  ParseResult* result) {
355
1
        s = skip_ascii_whitespaces(s, len);
356
1
        return string_to_int_internal<T>(s, len, base, result);
357
1
    }
358
359
    template <typename T>
360
152k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
361
152k
        s = skip_ascii_whitespaces(s, len);
362
152k
        return string_to_float_internal<T>(s, len, result);
363
152k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
360
87.6k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
361
87.6k
        s = skip_ascii_whitespaces(s, len);
362
87.6k
        return string_to_float_internal<T>(s, len, result);
363
87.6k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
360
65.1k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
361
65.1k
        s = skip_ascii_whitespaces(s, len);
362
65.1k
        return string_to_float_internal<T>(s, len, result);
363
65.1k
    }
364
365
    // Parses a string for 'true' or 'false', case insensitive.
366
11.3k
    static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
367
11.3k
        s = skip_ascii_whitespaces(s, len);
368
11.3k
        return string_to_bool_internal(s, len, result);
369
11.3k
    }
370
371
    template <PrimitiveType P>
372
    static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
373
            const char* __restrict s, size_t len, int type_precision, int type_scale,
374
            ParseResult* result);
375
376
    template <typename T>
377
    static Status split_string_to_map(const std::string& base, const T element_separator,
378
                                      const T key_value_separator,
379
                                      std::map<std::string, std::string>* result) {
380
        int key_pos = 0;
381
        int key_end;
382
        int val_pos;
383
        int val_end;
384
385
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
386
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
387
                std::string::npos) {
388
                break;
389
            }
390
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
391
                val_end = base.size();
392
            }
393
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
394
                                          base.substr(val_pos, val_end - val_pos)));
395
            key_pos = val_end;
396
            if (key_pos != std::string::npos) {
397
                ++key_pos;
398
            }
399
        }
400
401
        return Status::OK();
402
    }
403
404
    // This is considerably faster than glibc's implementation.
405
    // In the case of overflow, the max/min value for the data type will be returned.
406
    // Assumes s represents a decimal number.
407
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
408
    template <typename T, bool enable_strict_mode = false>
409
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
410
411
    // This is considerably faster than glibc's implementation.
412
    // In the case of overflow, the max/min value for the data type will be returned.
413
    // Assumes s represents a decimal number.
414
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
415
    template <typename T>
416
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
417
                                                    ParseResult* result);
418
419
    // Convert a string s representing a number in given base into a decimal number.
420
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
421
    template <typename T>
422
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
423
                                           ParseResult* result);
424
425
    // Converts an ascii string to an integer of type T assuming it cannot overflow
426
    // and the number is positive.
427
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
428
    template <typename T, bool enable_strict_mode = false>
429
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
430
                                              ParseResult* result);
431
432
    // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
433
    // char is not a digit.
434
    template <typename T>
435
    static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
436
                                                      ParseResult* result);
437
438
    // This is considerably faster than glibc's implementation (>100x why???)
439
    // No special case handling needs to be done for overflows, the floating point spec
440
    // already does it and will cap the values to -inf/inf
441
    // To avoid inaccurate conversions this function falls back to strtod for
442
    // scientific notation.
443
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
444
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
445
    template <typename T>
446
    static inline T string_to_float_internal(const char* __restrict s, int len,
447
                                             ParseResult* result);
448
449
    // parses a string for 'true' or 'false', case insensitive
450
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
451
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
452
                                               ParseResult* result);
453
454
    // Returns true if s only contains whitespace.
455
3.54k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
456
6.44k
        for (int i = 0; i < len; ++i) {
457
6.00k
            if (!LIKELY(is_whitespace_ascii(s[i]))) {
458
3.10k
                return false;
459
3.10k
            }
460
6.00k
        }
461
440
        return true;
462
3.54k
    }
463
464
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
465
3.65k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
466
3.65k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
467
3.65k
    }
468
469
2.67k
    static inline bool is_all_digit(const char* __restrict s, int len) {
470
5.57k
        for (int i = 0; i < len; ++i) {
471
3.05k
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
472
151
                return false;
473
151
            }
474
3.05k
        }
475
2.52k
        return true;
476
2.67k
    }
477
}; // end of class StringParser
478
479
template <typename T, bool enable_strict_mode>
480
408k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
408k
    if (UNLIKELY(len <= 0)) {
482
2.25k
        *result = PARSE_FAILURE;
483
2.25k
        return 0;
484
2.25k
    }
485
486
406k
    using UnsignedT = MakeUnsignedT<T>;
487
406k
    UnsignedT val = 0;
488
406k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
406k
    bool negative = false;
490
406k
    int i = 0;
491
406k
    switch (*s) {
492
102k
    case '-':
493
102k
        negative = true;
494
102k
        max_val += 1;
495
102k
        [[fallthrough]];
496
105k
    case '+':
497
105k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
105k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
406k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
406k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
240k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
240k
        return static_cast<T>(negative ? -val : val);
509
240k
    }
510
511
166k
    const T max_div_10 = max_val / 10;
512
166k
    const T max_mod_10 = max_val % 10;
513
514
166k
    int first = i;
515
1.68M
    for (; i < len; ++i) {
516
1.61M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
1.57M
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
1.57M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
48.4k
                *result = PARSE_OVERFLOW;
521
48.4k
                return negative ? -max_val : max_val;
522
48.4k
            }
523
1.52M
            val = val * 10 + digit;
524
1.52M
        } else {
525
45.9k
            if constexpr (enable_strict_mode) {
526
4.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
3.78k
                    *result = PARSE_FAILURE;
529
3.78k
                    return 0;
530
3.78k
                }
531
41.8k
            } else {
532
                // Save original position where non-digit was found
533
41.8k
                int remaining_len = len - i;
534
41.8k
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
41.8k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
41.8k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
41.8k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
28.9k
                    *result = PARSE_FAILURE;
542
28.9k
                    return 0;
543
28.9k
                }
544
41.8k
            }
545
            // Returning here is slightly faster than breaking the loop.
546
13.1k
            *result = PARSE_SUCCESS;
547
45.9k
            return static_cast<T>(negative ? -val : val);
548
45.9k
        }
549
1.61M
    }
550
71.7k
    *result = PARSE_SUCCESS;
551
71.7k
    return static_cast<T>(negative ? -val : val);
552
166k
}
_ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
46.6k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
46.6k
    if (UNLIKELY(len <= 0)) {
482
44
        *result = PARSE_FAILURE;
483
44
        return 0;
484
44
    }
485
486
46.5k
    using UnsignedT = MakeUnsignedT<T>;
487
46.5k
    UnsignedT val = 0;
488
46.5k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
46.5k
    bool negative = false;
490
46.5k
    int i = 0;
491
46.5k
    switch (*s) {
492
3.54k
    case '-':
493
3.54k
        negative = true;
494
3.54k
        max_val += 1;
495
3.54k
        [[fallthrough]];
496
3.82k
    case '+':
497
3.82k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
3.82k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
46.5k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
46.5k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
41.9k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
41.9k
        return static_cast<T>(negative ? -val : val);
509
41.9k
    }
510
511
4.65k
    const T max_div_10 = max_val / 10;
512
4.65k
    const T max_mod_10 = max_val % 10;
513
514
4.65k
    int first = i;
515
172k
    for (; i < len; ++i) {
516
169k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
168k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
168k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
512
                *result = PARSE_OVERFLOW;
521
512
                return negative ? -max_val : max_val;
522
512
            }
523
168k
            val = val * 10 + digit;
524
168k
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
536
            } else {
532
                // Save original position where non-digit was found
533
536
                int remaining_len = len - i;
534
536
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
536
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
376
                    *result = PARSE_FAILURE;
542
376
                    return 0;
543
376
                }
544
536
            }
545
            // Returning here is slightly faster than breaking the loop.
546
160
            *result = PARSE_SUCCESS;
547
536
            return static_cast<T>(negative ? -val : val);
548
536
        }
549
169k
    }
550
3.60k
    *result = PARSE_SUCCESS;
551
3.60k
    return static_cast<T>(negative ? -val : val);
552
4.65k
}
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
124k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
124k
    if (UNLIKELY(len <= 0)) {
482
218
        *result = PARSE_FAILURE;
483
218
        return 0;
484
218
    }
485
486
124k
    using UnsignedT = MakeUnsignedT<T>;
487
124k
    UnsignedT val = 0;
488
124k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
124k
    bool negative = false;
490
124k
    int i = 0;
491
124k
    switch (*s) {
492
22.4k
    case '-':
493
22.4k
        negative = true;
494
22.4k
        max_val += 1;
495
22.4k
        [[fallthrough]];
496
22.9k
    case '+':
497
22.9k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
22.9k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
124k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
124k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
60.4k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
60.4k
        return static_cast<T>(negative ? -val : val);
509
60.4k
    }
510
511
64.0k
    const T max_div_10 = max_val / 10;
512
64.0k
    const T max_mod_10 = max_val % 10;
513
514
64.0k
    int first = i;
515
154k
    for (; i < len; ++i) {
516
147k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
111k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
111k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
21.2k
                *result = PARSE_OVERFLOW;
521
21.2k
                return negative ? -max_val : max_val;
522
21.2k
            }
523
90.2k
            val = val * 10 + digit;
524
90.2k
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
35.9k
            } else {
532
                // Save original position where non-digit was found
533
35.9k
                int remaining_len = len - i;
534
35.9k
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
35.9k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
35.9k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
35.9k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
24.5k
                    *result = PARSE_FAILURE;
542
24.5k
                    return 0;
543
24.5k
                }
544
35.9k
            }
545
            // Returning here is slightly faster than breaking the loop.
546
11.4k
            *result = PARSE_SUCCESS;
547
35.9k
            return static_cast<T>(negative ? -val : val);
548
35.9k
        }
549
147k
    }
550
6.80k
    *result = PARSE_SUCCESS;
551
6.80k
    return static_cast<T>(negative ? -val : val);
552
64.0k
}
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
74.4k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
74.4k
    if (UNLIKELY(len <= 0)) {
482
8
        *result = PARSE_FAILURE;
483
8
        return 0;
484
8
    }
485
486
74.4k
    using UnsignedT = MakeUnsignedT<T>;
487
74.4k
    UnsignedT val = 0;
488
74.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
74.4k
    bool negative = false;
490
74.4k
    int i = 0;
491
74.4k
    switch (*s) {
492
12.8k
    case '-':
493
12.8k
        negative = true;
494
12.8k
        max_val += 1;
495
12.8k
        [[fallthrough]];
496
13.1k
    case '+':
497
13.1k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
13.1k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
74.4k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
74.4k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
50.8k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
50.8k
        return static_cast<T>(negative ? -val : val);
509
50.8k
    }
510
511
23.6k
    const T max_div_10 = max_val / 10;
512
23.6k
    const T max_mod_10 = max_val % 10;
513
514
23.6k
    int first = i;
515
123k
    for (; i < len; ++i) {
516
114k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
112k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
112k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
12.9k
                *result = PARSE_OVERFLOW;
521
12.9k
                return negative ? -max_val : max_val;
522
12.9k
            }
523
99.5k
            val = val * 10 + digit;
524
99.5k
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
1.90k
            } else {
532
                // Save original position where non-digit was found
533
1.90k
                int remaining_len = len - i;
534
1.90k
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
1.90k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
1.90k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
1.90k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
1.29k
                    *result = PARSE_FAILURE;
542
1.29k
                    return 0;
543
1.29k
                }
544
1.90k
            }
545
            // Returning here is slightly faster than breaking the loop.
546
610
            *result = PARSE_SUCCESS;
547
1.90k
            return static_cast<T>(negative ? -val : val);
548
1.90k
        }
549
114k
    }
550
8.80k
    *result = PARSE_SUCCESS;
551
8.80k
    return static_cast<T>(negative ? -val : val);
552
23.6k
}
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
67.7k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
67.7k
    if (UNLIKELY(len <= 0)) {
482
1.92k
        *result = PARSE_FAILURE;
483
1.92k
        return 0;
484
1.92k
    }
485
486
65.7k
    using UnsignedT = MakeUnsignedT<T>;
487
65.7k
    UnsignedT val = 0;
488
65.7k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
65.7k
    bool negative = false;
490
65.7k
    int i = 0;
491
65.7k
    switch (*s) {
492
10.1k
    case '-':
493
10.1k
        negative = true;
494
10.1k
        max_val += 1;
495
10.1k
        [[fallthrough]];
496
10.5k
    case '+':
497
10.5k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
10.5k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
65.7k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
65.7k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
52.2k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
52.2k
        return static_cast<T>(negative ? -val : val);
509
52.2k
    }
510
511
13.5k
    const T max_div_10 = max_val / 10;
512
13.5k
    const T max_mod_10 = max_val % 10;
513
514
13.5k
    int first = i;
515
128k
    for (; i < len; ++i) {
516
122k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
120k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
120k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
5.79k
                *result = PARSE_OVERFLOW;
521
5.79k
                return negative ? -max_val : max_val;
522
5.79k
            }
523
115k
            val = val * 10 + digit;
524
115k
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
1.93k
            } else {
532
                // Save original position where non-digit was found
533
1.93k
                int remaining_len = len - i;
534
1.93k
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
1.93k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
1.93k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
1.93k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
1.54k
                    *result = PARSE_FAILURE;
542
1.54k
                    return 0;
543
1.54k
                }
544
1.93k
            }
545
            // Returning here is slightly faster than breaking the loop.
546
386
            *result = PARSE_SUCCESS;
547
1.93k
            return static_cast<T>(negative ? -val : val);
548
1.93k
        }
549
122k
    }
550
5.82k
    *result = PARSE_SUCCESS;
551
5.82k
    return static_cast<T>(negative ? -val : val);
552
13.5k
}
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
85.5k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
85.5k
    if (UNLIKELY(len <= 0)) {
482
14
        *result = PARSE_FAILURE;
483
14
        return 0;
484
14
    }
485
486
85.4k
    using UnsignedT = MakeUnsignedT<T>;
487
85.4k
    UnsignedT val = 0;
488
85.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
85.4k
    bool negative = false;
490
85.4k
    int i = 0;
491
85.4k
    switch (*s) {
492
50.2k
    case '-':
493
50.2k
        negative = true;
494
50.2k
        max_val += 1;
495
50.2k
        [[fallthrough]];
496
50.5k
    case '+':
497
50.5k
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
50.5k
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
85.4k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
85.4k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
32.3k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
32.3k
        return static_cast<T>(negative ? -val : val);
509
32.3k
    }
510
511
53.1k
    const T max_div_10 = max_val / 10;
512
53.1k
    const T max_mod_10 = max_val % 10;
513
514
53.1k
    int first = i;
515
1.03M
    for (; i < len; ++i) {
516
989k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
988k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
988k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
5.22k
                *result = PARSE_OVERFLOW;
521
5.22k
                return negative ? -max_val : max_val;
522
5.22k
            }
523
983k
            val = val * 10 + digit;
524
983k
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
1.49k
            } else {
532
                // Save original position where non-digit was found
533
1.49k
                int remaining_len = len - i;
534
1.49k
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
1.49k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
1.49k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
1.49k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
1.24k
                    *result = PARSE_FAILURE;
542
1.24k
                    return 0;
543
1.24k
                }
544
1.49k
            }
545
            // Returning here is slightly faster than breaking the loop.
546
244
            *result = PARSE_SUCCESS;
547
1.49k
            return static_cast<T>(negative ? -val : val);
548
1.49k
        }
549
989k
    }
550
46.4k
    *result = PARSE_SUCCESS;
551
46.4k
    return static_cast<T>(negative ? -val : val);
552
53.1k
}
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
149
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
149
    if (UNLIKELY(len <= 0)) {
482
2
        *result = PARSE_FAILURE;
483
2
        return 0;
484
2
    }
485
486
147
    using UnsignedT = MakeUnsignedT<T>;
487
147
    UnsignedT val = 0;
488
147
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
147
    bool negative = false;
490
147
    int i = 0;
491
147
    switch (*s) {
492
0
    case '-':
493
0
        negative = true;
494
0
        max_val += 1;
495
0
        [[fallthrough]];
496
0
    case '+':
497
0
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
0
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
147
    }
504
505
    // This is the fast path where the string cannot overflow.
506
147
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
147
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
147
        return static_cast<T>(negative ? -val : val);
509
147
    }
510
511
0
    const T max_div_10 = max_val / 10;
512
0
    const T max_mod_10 = max_val % 10;
513
514
0
    int first = i;
515
0
    for (; i < len; ++i) {
516
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
0
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
0
                *result = PARSE_OVERFLOW;
521
0
                return negative ? -max_val : max_val;
522
0
            }
523
0
            val = val * 10 + digit;
524
0
        } else {
525
0
            if constexpr (enable_strict_mode) {
526
0
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
0
                    *result = PARSE_FAILURE;
529
0
                    return 0;
530
0
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
0
            *result = PARSE_SUCCESS;
547
0
            return static_cast<T>(negative ? -val : val);
548
0
        }
549
0
    }
550
0
    *result = PARSE_SUCCESS;
551
0
    return static_cast<T>(negative ? -val : val);
552
0
}
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
1.91k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
1.91k
    if (UNLIKELY(len <= 0)) {
482
8
        *result = PARSE_FAILURE;
483
8
        return 0;
484
8
    }
485
486
1.90k
    using UnsignedT = MakeUnsignedT<T>;
487
1.90k
    UnsignedT val = 0;
488
1.90k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
1.90k
    bool negative = false;
490
1.90k
    int i = 0;
491
1.90k
    switch (*s) {
492
632
    case '-':
493
632
        negative = true;
494
632
        max_val += 1;
495
632
        [[fallthrough]];
496
988
    case '+':
497
988
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
988
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
1.90k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
1.90k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
48
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
48
        return static_cast<T>(negative ? -val : val);
509
48
    }
510
511
1.85k
    const T max_div_10 = max_val / 10;
512
1.85k
    const T max_mod_10 = max_val % 10;
513
514
1.85k
    int first = i;
515
6.58k
    for (; i < len; ++i) {
516
6.51k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
5.32k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
5.32k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
600
                *result = PARSE_OVERFLOW;
521
600
                return negative ? -max_val : max_val;
522
600
            }
523
4.72k
            val = val * 10 + digit;
524
4.72k
        } else {
525
1.18k
            if constexpr (enable_strict_mode) {
526
1.18k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
1.10k
                    *result = PARSE_FAILURE;
529
1.10k
                    return 0;
530
1.10k
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
88
            *result = PARSE_SUCCESS;
547
1.18k
            return static_cast<T>(negative ? -val : val);
548
1.18k
        }
549
6.51k
    }
550
68
    *result = PARSE_SUCCESS;
551
68
    return static_cast<T>(negative ? -val : val);
552
1.85k
}
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
1.88k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
1.88k
    if (UNLIKELY(len <= 0)) {
482
8
        *result = PARSE_FAILURE;
483
8
        return 0;
484
8
    }
485
486
1.87k
    using UnsignedT = MakeUnsignedT<T>;
487
1.87k
    UnsignedT val = 0;
488
1.87k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
1.87k
    bool negative = false;
490
1.87k
    int i = 0;
491
1.87k
    switch (*s) {
492
620
    case '-':
493
620
        negative = true;
494
620
        max_val += 1;
495
620
        [[fallthrough]];
496
970
    case '+':
497
970
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
970
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
1.87k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
1.87k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
168
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
168
        return static_cast<T>(negative ? -val : val);
509
168
    }
510
511
1.70k
    const T max_div_10 = max_val / 10;
512
1.70k
    const T max_mod_10 = max_val % 10;
513
514
1.70k
    int first = i;
515
7.87k
    for (; i < len; ++i) {
516
7.83k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
6.74k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
6.74k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
576
                *result = PARSE_OVERFLOW;
521
576
                return negative ? -max_val : max_val;
522
576
            }
523
6.17k
            val = val * 10 + digit;
524
6.17k
        } else {
525
1.08k
            if constexpr (enable_strict_mode) {
526
1.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
1.00k
                    *result = PARSE_FAILURE;
529
1.00k
                    return 0;
530
1.00k
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
88
            *result = PARSE_SUCCESS;
547
1.08k
            return static_cast<T>(negative ? -val : val);
548
1.08k
        }
549
7.83k
    }
550
40
    *result = PARSE_SUCCESS;
551
40
    return static_cast<T>(negative ? -val : val);
552
1.70k
}
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
1.87k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
1.87k
    if (UNLIKELY(len <= 0)) {
482
8
        *result = PARSE_FAILURE;
483
8
        return 0;
484
8
    }
485
486
1.86k
    using UnsignedT = MakeUnsignedT<T>;
487
1.86k
    UnsignedT val = 0;
488
1.86k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
1.86k
    bool negative = false;
490
1.86k
    int i = 0;
491
1.86k
    switch (*s) {
492
608
    case '-':
493
608
        negative = true;
494
608
        max_val += 1;
495
608
        [[fallthrough]];
496
952
    case '+':
497
952
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
952
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
1.86k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
1.86k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
488
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
488
        return static_cast<T>(negative ? -val : val);
509
488
    }
510
511
1.37k
    const T max_div_10 = max_val / 10;
512
1.37k
    const T max_mod_10 = max_val % 10;
513
514
1.37k
    int first = i;
515
10.7k
    for (; i < len; ++i) {
516
10.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
9.90k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
9.90k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
552
                *result = PARSE_OVERFLOW;
521
552
                return negative ? -max_val : max_val;
522
552
            }
523
9.34k
            val = val * 10 + digit;
524
9.34k
        } else {
525
795
            if constexpr (enable_strict_mode) {
526
795
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
735
                    *result = PARSE_FAILURE;
529
735
                    return 0;
530
735
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
60
            *result = PARSE_SUCCESS;
547
795
            return static_cast<T>(negative ? -val : val);
548
795
        }
549
10.6k
    }
550
32
    *result = PARSE_SUCCESS;
551
32
    return static_cast<T>(negative ? -val : val);
552
1.37k
}
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
1.83k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
1.83k
    if (UNLIKELY(len <= 0)) {
482
10
        *result = PARSE_FAILURE;
483
10
        return 0;
484
10
    }
485
486
1.82k
    using UnsignedT = MakeUnsignedT<T>;
487
1.82k
    UnsignedT val = 0;
488
1.82k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
1.82k
    bool negative = false;
490
1.82k
    int i = 0;
491
1.82k
    switch (*s) {
492
596
    case '-':
493
596
        negative = true;
494
596
        max_val += 1;
495
596
        [[fallthrough]];
496
934
    case '+':
497
934
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
934
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
1.82k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
1.82k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
738
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
738
        return static_cast<T>(negative ? -val : val);
509
738
    }
510
511
1.08k
    const T max_div_10 = max_val / 10;
512
1.08k
    const T max_mod_10 = max_val % 10;
513
514
1.08k
    int first = i;
515
16.9k
    for (; i < len; ++i) {
516
16.8k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
16.3k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
16.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
528
                *result = PARSE_OVERFLOW;
521
528
                return negative ? -max_val : max_val;
522
528
            }
523
15.8k
            val = val * 10 + digit;
524
15.8k
        } else {
525
523
            if constexpr (enable_strict_mode) {
526
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
491
                    *result = PARSE_FAILURE;
529
491
                    return 0;
530
491
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
32
            *result = PARSE_SUCCESS;
547
523
            return static_cast<T>(negative ? -val : val);
548
523
        }
549
16.8k
    }
550
32
    *result = PARSE_SUCCESS;
551
32
    return static_cast<T>(negative ? -val : val);
552
1.08k
}
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
1.78k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
1.78k
    if (UNLIKELY(len <= 0)) {
482
8
        *result = PARSE_FAILURE;
483
8
        return 0;
484
8
    }
485
486
1.77k
    using UnsignedT = MakeUnsignedT<T>;
487
1.77k
    UnsignedT val = 0;
488
1.77k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
1.77k
    bool negative = false;
490
1.77k
    int i = 0;
491
1.77k
    switch (*s) {
492
584
    case '-':
493
584
        negative = true;
494
584
        max_val += 1;
495
584
        [[fallthrough]];
496
916
    case '+':
497
916
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
916
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
1.77k
    }
504
505
    // This is the fast path where the string cannot overflow.
506
1.77k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
752
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
752
        return static_cast<T>(negative ? -val : val);
509
752
    }
510
511
1.02k
    const T max_div_10 = max_val / 10;
512
1.02k
    const T max_mod_10 = max_val % 10;
513
514
1.02k
    int first = i;
515
31.3k
    for (; i < len; ++i) {
516
31.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
30.7k
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
30.7k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
504
                *result = PARSE_OVERFLOW;
521
504
                return negative ? -max_val : max_val;
522
504
            }
523
30.2k
            val = val * 10 + digit;
524
30.2k
        } else {
525
488
            if constexpr (enable_strict_mode) {
526
488
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
456
                    *result = PARSE_FAILURE;
529
456
                    return 0;
530
456
                }
531
            } else {
532
                // Save original position where non-digit was found
533
                int remaining_len = len - i;
534
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
                    *result = PARSE_FAILURE;
542
                    return 0;
543
                }
544
            }
545
            // Returning here is slightly faster than breaking the loop.
546
32
            *result = PARSE_SUCCESS;
547
488
            return static_cast<T>(negative ? -val : val);
548
488
        }
549
31.2k
    }
550
32
    *result = PARSE_SUCCESS;
551
32
    return static_cast<T>(negative ? -val : val);
552
1.02k
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
4
    if (UNLIKELY(len <= 0)) {
482
0
        *result = PARSE_FAILURE;
483
0
        return 0;
484
0
    }
485
486
4
    using UnsignedT = MakeUnsignedT<T>;
487
4
    UnsignedT val = 0;
488
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
4
    bool negative = false;
490
4
    int i = 0;
491
4
    switch (*s) {
492
0
    case '-':
493
0
        negative = true;
494
0
        max_val += 1;
495
0
        [[fallthrough]];
496
0
    case '+':
497
0
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
0
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
4
    }
504
505
    // This is the fast path where the string cannot overflow.
506
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
4
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
4
        return static_cast<T>(negative ? -val : val);
509
4
    }
510
511
0
    const T max_div_10 = max_val / 10;
512
0
    const T max_mod_10 = max_val % 10;
513
514
0
    int first = i;
515
0
    for (; i < len; ++i) {
516
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
0
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
0
                *result = PARSE_OVERFLOW;
521
0
                return negative ? -max_val : max_val;
522
0
            }
523
0
            val = val * 10 + digit;
524
0
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
0
            } else {
532
                // Save original position where non-digit was found
533
0
                int remaining_len = len - i;
534
0
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
0
                    *result = PARSE_FAILURE;
542
0
                    return 0;
543
0
                }
544
0
            }
545
            // Returning here is slightly faster than breaking the loop.
546
0
            *result = PARSE_SUCCESS;
547
0
            return static_cast<T>(negative ? -val : val);
548
0
        }
549
0
    }
550
0
    *result = PARSE_SUCCESS;
551
0
    return static_cast<T>(negative ? -val : val);
552
0
}
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
4
    if (UNLIKELY(len <= 0)) {
482
0
        *result = PARSE_FAILURE;
483
0
        return 0;
484
0
    }
485
486
4
    using UnsignedT = MakeUnsignedT<T>;
487
4
    UnsignedT val = 0;
488
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
4
    bool negative = false;
490
4
    int i = 0;
491
4
    switch (*s) {
492
0
    case '-':
493
0
        negative = true;
494
0
        max_val += 1;
495
0
        [[fallthrough]];
496
0
    case '+':
497
0
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
0
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
4
    }
504
505
    // This is the fast path where the string cannot overflow.
506
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
0
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
0
        return static_cast<T>(negative ? -val : val);
509
0
    }
510
511
4
    const T max_div_10 = max_val / 10;
512
4
    const T max_mod_10 = max_val % 10;
513
514
4
    int first = i;
515
84
    for (; i < len; ++i) {
516
80
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
80
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
80
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
0
                *result = PARSE_OVERFLOW;
521
0
                return negative ? -max_val : max_val;
522
0
            }
523
80
            val = val * 10 + digit;
524
80
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
0
            } else {
532
                // Save original position where non-digit was found
533
0
                int remaining_len = len - i;
534
0
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
0
                    *result = PARSE_FAILURE;
542
0
                    return 0;
543
0
                }
544
0
            }
545
            // Returning here is slightly faster than breaking the loop.
546
0
            *result = PARSE_SUCCESS;
547
0
            return static_cast<T>(negative ? -val : val);
548
0
        }
549
80
    }
550
4
    *result = PARSE_SUCCESS;
551
4
    return static_cast<T>(negative ? -val : val);
552
4
}
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
20
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
481
20
    if (UNLIKELY(len <= 0)) {
482
0
        *result = PARSE_FAILURE;
483
0
        return 0;
484
0
    }
485
486
20
    using UnsignedT = MakeUnsignedT<T>;
487
20
    UnsignedT val = 0;
488
20
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
489
20
    bool negative = false;
490
20
    int i = 0;
491
20
    switch (*s) {
492
0
    case '-':
493
0
        negative = true;
494
0
        max_val += 1;
495
0
        [[fallthrough]];
496
0
    case '+':
497
0
        ++i;
498
        // only one '+'/'-' char, so could return failure directly
499
0
        if (UNLIKELY(len == 1)) {
500
0
            *result = PARSE_FAILURE;
501
0
            return 0;
502
0
        }
503
20
    }
504
505
    // This is the fast path where the string cannot overflow.
506
20
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
507
20
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
508
20
        return static_cast<T>(negative ? -val : val);
509
20
    }
510
511
0
    const T max_div_10 = max_val / 10;
512
0
    const T max_mod_10 = max_val % 10;
513
514
0
    int first = i;
515
0
    for (; i < len; ++i) {
516
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
517
0
            T digit = s[i] - '0';
518
            // This is a tricky check to see if adding this digit will cause an overflow.
519
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
520
0
                *result = PARSE_OVERFLOW;
521
0
                return negative ? -max_val : max_val;
522
0
            }
523
0
            val = val * 10 + digit;
524
0
        } else {
525
            if constexpr (enable_strict_mode) {
526
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
527
                    // Reject the string because the remaining chars are not all whitespace
528
                    *result = PARSE_FAILURE;
529
                    return 0;
530
                }
531
0
            } else {
532
                // Save original position where non-digit was found
533
0
                int remaining_len = len - i;
534
0
                const char* remaining_s = s + i;
535
                // Skip trailing whitespaces from the remaining portion
536
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
537
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
538
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
539
                    // Reject the string because either the first char was not a digit,
540
                    // or the remaining chars are not all whitespace
541
0
                    *result = PARSE_FAILURE;
542
0
                    return 0;
543
0
                }
544
0
            }
545
            // Returning here is slightly faster than breaking the loop.
546
0
            *result = PARSE_SUCCESS;
547
0
            return static_cast<T>(negative ? -val : val);
548
0
        }
549
0
    }
550
0
    *result = PARSE_SUCCESS;
551
0
    return static_cast<T>(negative ? -val : val);
552
0
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE
553
554
template <typename T>
555
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
556
1.37k
                                                ParseResult* result) {
557
1.37k
    if (UNLIKELY(len <= 0)) {
558
0
        *result = PARSE_FAILURE;
559
0
        return 0;
560
0
    }
561
562
1.37k
    T val = 0;
563
1.37k
    T max_val = std::numeric_limits<T>::max();
564
1.37k
    int i = 0;
565
566
1.37k
    using signedT = MakeSignedT<T>;
567
    // This is the fast path where the string cannot overflow.
568
1.37k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
569
784
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
570
784
        return val;
571
784
    }
572
573
588
    const T max_div_10 = max_val / 10;
574
588
    const T max_mod_10 = max_val % 10;
575
576
588
    int first = i;
577
4.65k
    for (; i < len; ++i) {
578
4.31k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
579
4.26k
            T digit = s[i] - '0';
580
            // This is a tricky check to see if adding this digit will cause an overflow.
581
4.26k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
582
196
                *result = PARSE_OVERFLOW;
583
196
                return max_val;
584
196
            }
585
4.06k
            val = val * 10 + digit;
586
4.06k
        } else {
587
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
588
                // Reject the string because either the first char was not a digit,
589
                // or the remaining chars are not all whitespace
590
49
                *result = PARSE_FAILURE;
591
49
                return 0;
592
49
            }
593
            // Returning here is slightly faster than breaking the loop.
594
0
            *result = PARSE_SUCCESS;
595
0
            return val;
596
49
        }
597
4.31k
    }
598
343
    *result = PARSE_SUCCESS;
599
343
    return val;
600
588
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
556
343
                                                ParseResult* result) {
557
343
    if (UNLIKELY(len <= 0)) {
558
0
        *result = PARSE_FAILURE;
559
0
        return 0;
560
0
    }
561
562
343
    T val = 0;
563
343
    T max_val = std::numeric_limits<T>::max();
564
343
    int i = 0;
565
566
343
    using signedT = MakeSignedT<T>;
567
    // This is the fast path where the string cannot overflow.
568
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
569
98
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
570
98
        return val;
571
98
    }
572
573
245
    const T max_div_10 = max_val / 10;
574
245
    const T max_mod_10 = max_val % 10;
575
576
245
    int first = i;
577
784
    for (; i < len; ++i) {
578
637
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
579
588
            T digit = s[i] - '0';
580
            // This is a tricky check to see if adding this digit will cause an overflow.
581
588
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
582
49
                *result = PARSE_OVERFLOW;
583
49
                return max_val;
584
49
            }
585
539
            val = val * 10 + digit;
586
539
        } else {
587
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
588
                // Reject the string because either the first char was not a digit,
589
                // or the remaining chars are not all whitespace
590
49
                *result = PARSE_FAILURE;
591
49
                return 0;
592
49
            }
593
            // Returning here is slightly faster than breaking the loop.
594
0
            *result = PARSE_SUCCESS;
595
0
            return val;
596
49
        }
597
637
    }
598
147
    *result = PARSE_SUCCESS;
599
147
    return val;
600
245
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
556
343
                                                ParseResult* result) {
557
343
    if (UNLIKELY(len <= 0)) {
558
0
        *result = PARSE_FAILURE;
559
0
        return 0;
560
0
    }
561
562
343
    T val = 0;
563
343
    T max_val = std::numeric_limits<T>::max();
564
343
    int i = 0;
565
566
343
    using signedT = MakeSignedT<T>;
567
    // This is the fast path where the string cannot overflow.
568
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
569
196
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
570
196
        return val;
571
196
    }
572
573
147
    const T max_div_10 = max_val / 10;
574
147
    const T max_mod_10 = max_val % 10;
575
576
147
    int first = i;
577
833
    for (; i < len; ++i) {
578
735
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
579
735
            T digit = s[i] - '0';
580
            // This is a tricky check to see if adding this digit will cause an overflow.
581
735
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
582
49
                *result = PARSE_OVERFLOW;
583
49
                return max_val;
584
49
            }
585
686
            val = val * 10 + digit;
586
686
        } else {
587
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
588
                // Reject the string because either the first char was not a digit,
589
                // or the remaining chars are not all whitespace
590
0
                *result = PARSE_FAILURE;
591
0
                return 0;
592
0
            }
593
            // Returning here is slightly faster than breaking the loop.
594
0
            *result = PARSE_SUCCESS;
595
0
            return val;
596
0
        }
597
735
    }
598
98
    *result = PARSE_SUCCESS;
599
98
    return val;
600
147
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
556
343
                                                ParseResult* result) {
557
343
    if (UNLIKELY(len <= 0)) {
558
0
        *result = PARSE_FAILURE;
559
0
        return 0;
560
0
    }
561
562
343
    T val = 0;
563
343
    T max_val = std::numeric_limits<T>::max();
564
343
    int i = 0;
565
566
343
    using signedT = MakeSignedT<T>;
567
    // This is the fast path where the string cannot overflow.
568
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
569
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
570
245
        return val;
571
245
    }
572
573
98
    const T max_div_10 = max_val / 10;
574
98
    const T max_mod_10 = max_val % 10;
575
576
98
    int first = i;
577
1.02k
    for (; i < len; ++i) {
578
980
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
579
980
            T digit = s[i] - '0';
580
            // This is a tricky check to see if adding this digit will cause an overflow.
581
980
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
582
49
                *result = PARSE_OVERFLOW;
583
49
                return max_val;
584
49
            }
585
931
            val = val * 10 + digit;
586
931
        } else {
587
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
588
                // Reject the string because either the first char was not a digit,
589
                // or the remaining chars are not all whitespace
590
0
                *result = PARSE_FAILURE;
591
0
                return 0;
592
0
            }
593
            // Returning here is slightly faster than breaking the loop.
594
0
            *result = PARSE_SUCCESS;
595
0
            return val;
596
0
        }
597
980
    }
598
49
    *result = PARSE_SUCCESS;
599
49
    return val;
600
98
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
556
343
                                                ParseResult* result) {
557
343
    if (UNLIKELY(len <= 0)) {
558
0
        *result = PARSE_FAILURE;
559
0
        return 0;
560
0
    }
561
562
343
    T val = 0;
563
343
    T max_val = std::numeric_limits<T>::max();
564
343
    int i = 0;
565
566
343
    using signedT = MakeSignedT<T>;
567
    // This is the fast path where the string cannot overflow.
568
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
569
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
570
245
        return val;
571
245
    }
572
573
98
    const T max_div_10 = max_val / 10;
574
98
    const T max_mod_10 = max_val % 10;
575
576
98
    int first = i;
577
2.00k
    for (; i < len; ++i) {
578
1.96k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
579
1.96k
            T digit = s[i] - '0';
580
            // This is a tricky check to see if adding this digit will cause an overflow.
581
1.96k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
582
49
                *result = PARSE_OVERFLOW;
583
49
                return max_val;
584
49
            }
585
1.91k
            val = val * 10 + digit;
586
1.91k
        } else {
587
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
588
                // Reject the string because either the first char was not a digit,
589
                // or the remaining chars are not all whitespace
590
0
                *result = PARSE_FAILURE;
591
0
                return 0;
592
0
            }
593
            // Returning here is slightly faster than breaking the loop.
594
0
            *result = PARSE_SUCCESS;
595
0
            return val;
596
0
        }
597
1.96k
    }
598
49
    *result = PARSE_SUCCESS;
599
49
    return val;
600
98
}
601
602
template <typename T>
603
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
604
27.8k
                                       ParseResult* result) {
605
27.8k
    using UnsignedT = MakeUnsignedT<T>;
606
27.8k
    UnsignedT val = 0;
607
27.8k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
27.8k
    bool negative = false;
609
27.8k
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
27.8k
    int i = 0;
614
27.8k
    switch (*s) {
615
13.4k
    case '-':
616
13.4k
        negative = true;
617
13.4k
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
13.4k
        [[fallthrough]];
619
13.7k
    case '+':
620
13.7k
        i = 1;
621
27.8k
    }
622
623
27.8k
    const T max_div_base = max_val / base;
624
27.8k
    const T max_mod_base = max_val % base;
625
626
27.8k
    int first = i;
627
90.9k
    for (; i < len; ++i) {
628
76.6k
        T digit;
629
76.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
75.7k
            digit = s[i] - '0';
631
75.7k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
639
            digit = (s[i] - 'a' + 10);
633
639
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
98
            digit = (s[i] - 'A' + 10);
635
147
        } else {
636
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
147
                *result = PARSE_FAILURE;
640
147
                return 0;
641
147
            }
642
            // skip trailing whitespace.
643
0
            break;
644
147
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
76.4k
        if (digit >= base) {
648
392
            break;
649
392
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
76.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
12.9k
            *result = PARSE_OVERFLOW;
654
12.9k
            return static_cast<T>(negative ? -max_val : max_val);
655
12.9k
        }
656
63.1k
        val = val * base + digit;
657
63.1k
    }
658
14.7k
    *result = PARSE_SUCCESS;
659
14.7k
    return static_cast<T>(negative ? -val : val);
660
27.8k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
604
26.4k
                                       ParseResult* result) {
605
26.4k
    using UnsignedT = MakeUnsignedT<T>;
606
26.4k
    UnsignedT val = 0;
607
26.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
26.4k
    bool negative = false;
609
26.4k
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
26.4k
    int i = 0;
614
26.4k
    switch (*s) {
615
12.8k
    case '-':
616
12.8k
        negative = true;
617
12.8k
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
12.8k
        [[fallthrough]];
619
12.9k
    case '+':
620
12.9k
        i = 1;
621
26.4k
    }
622
623
26.4k
    const T max_div_base = max_val / base;
624
26.4k
    const T max_mod_base = max_val % base;
625
626
26.4k
    int first = i;
627
80.7k
    for (; i < len; ++i) {
628
67.4k
        T digit;
629
67.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
66.6k
            digit = s[i] - '0';
631
66.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
539
            digit = (s[i] - 'a' + 10);
633
539
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
98
            digit = (s[i] - 'A' + 10);
635
147
        } else {
636
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
147
                *result = PARSE_FAILURE;
640
147
                return 0;
641
147
            }
642
            // skip trailing whitespace.
643
0
            break;
644
147
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
67.3k
        if (digit >= base) {
648
392
            break;
649
392
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
66.9k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
12.6k
            *result = PARSE_OVERFLOW;
654
12.6k
            return static_cast<T>(negative ? -max_val : max_val);
655
12.6k
        }
656
54.2k
        val = val * base + digit;
657
54.2k
    }
658
13.6k
    *result = PARSE_SUCCESS;
659
13.6k
    return static_cast<T>(negative ? -val : val);
660
26.4k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
604
490
                                       ParseResult* result) {
605
490
    using UnsignedT = MakeUnsignedT<T>;
606
490
    UnsignedT val = 0;
607
490
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
490
    bool negative = false;
609
490
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
490
    int i = 0;
614
490
    switch (*s) {
615
196
    case '-':
616
196
        negative = true;
617
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
196
        [[fallthrough]];
619
245
    case '+':
620
245
        i = 1;
621
490
    }
622
623
490
    const T max_div_base = max_val / base;
624
490
    const T max_mod_base = max_val % base;
625
626
490
    int first = i;
627
2.10k
    for (; i < len; ++i) {
628
1.71k
        T digit;
629
1.71k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
1.61k
            digit = s[i] - '0';
631
1.61k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
98
            digit = (s[i] - 'a' + 10);
633
98
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
0
            digit = (s[i] - 'A' + 10);
635
0
        } else {
636
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
0
                *result = PARSE_FAILURE;
640
0
                return 0;
641
0
            }
642
            // skip trailing whitespace.
643
0
            break;
644
0
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
1.71k
        if (digit >= base) {
648
0
            break;
649
0
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
1.71k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
98
            *result = PARSE_OVERFLOW;
654
98
            return static_cast<T>(negative ? -max_val : max_val);
655
98
        }
656
1.61k
        val = val * base + digit;
657
1.61k
    }
658
392
    *result = PARSE_SUCCESS;
659
392
    return static_cast<T>(negative ? -val : val);
660
490
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
604
441
                                       ParseResult* result) {
605
441
    using UnsignedT = MakeUnsignedT<T>;
606
441
    UnsignedT val = 0;
607
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
441
    bool negative = false;
609
441
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
441
    int i = 0;
614
441
    switch (*s) {
615
147
    case '-':
616
147
        negative = true;
617
147
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
147
        [[fallthrough]];
619
245
    case '+':
620
245
        i = 1;
621
441
    }
622
623
441
    const T max_div_base = max_val / base;
624
441
    const T max_mod_base = max_val % base;
625
626
441
    int first = i;
627
3.03k
    for (; i < len; ++i) {
628
2.69k
        T digit;
629
2.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
2.69k
            digit = s[i] - '0';
631
2.69k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
0
            digit = (s[i] - 'a' + 10);
633
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
0
            digit = (s[i] - 'A' + 10);
635
0
        } else {
636
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
0
                *result = PARSE_FAILURE;
640
0
                return 0;
641
0
            }
642
            // skip trailing whitespace.
643
0
            break;
644
0
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
2.69k
        if (digit >= base) {
648
0
            break;
649
0
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
2.69k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
98
            *result = PARSE_OVERFLOW;
654
98
            return static_cast<T>(negative ? -max_val : max_val);
655
98
        }
656
2.59k
        val = val * base + digit;
657
2.59k
    }
658
343
    *result = PARSE_SUCCESS;
659
343
    return static_cast<T>(negative ? -val : val);
660
441
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
604
441
                                       ParseResult* result) {
605
441
    using UnsignedT = MakeUnsignedT<T>;
606
441
    UnsignedT val = 0;
607
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
441
    bool negative = false;
609
441
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
441
    int i = 0;
614
441
    switch (*s) {
615
196
    case '-':
616
196
        negative = true;
617
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
196
        [[fallthrough]];
619
245
    case '+':
620
245
        i = 1;
621
441
    }
622
623
441
    const T max_div_base = max_val / base;
624
441
    const T max_mod_base = max_val % base;
625
626
441
    int first = i;
627
5.09k
    for (; i < len; ++i) {
628
4.75k
        T digit;
629
4.75k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
4.75k
            digit = s[i] - '0';
631
4.75k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
0
            digit = (s[i] - 'a' + 10);
633
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
0
            digit = (s[i] - 'A' + 10);
635
0
        } else {
636
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
0
                *result = PARSE_FAILURE;
640
0
                return 0;
641
0
            }
642
            // skip trailing whitespace.
643
0
            break;
644
0
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
4.75k
        if (digit >= base) {
648
0
            break;
649
0
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
4.75k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
98
            *result = PARSE_OVERFLOW;
654
98
            return static_cast<T>(negative ? -max_val : max_val);
655
98
        }
656
4.65k
        val = val * base + digit;
657
4.65k
    }
658
343
    *result = PARSE_SUCCESS;
659
343
    return static_cast<T>(negative ? -val : val);
660
441
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
604
1
                                       ParseResult* result) {
605
1
    using UnsignedT = MakeUnsignedT<T>;
606
1
    UnsignedT val = 0;
607
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
608
1
    bool negative = false;
609
1
    if (UNLIKELY(len <= 0)) {
610
0
        *result = PARSE_FAILURE;
611
0
        return 0;
612
0
    }
613
1
    int i = 0;
614
1
    switch (*s) {
615
0
    case '-':
616
0
        negative = true;
617
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
618
0
        [[fallthrough]];
619
0
    case '+':
620
0
        i = 1;
621
1
    }
622
623
1
    const T max_div_base = max_val / base;
624
1
    const T max_mod_base = max_val % base;
625
626
1
    int first = i;
627
3
    for (; i < len; ++i) {
628
2
        T digit;
629
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
630
0
            digit = s[i] - '0';
631
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
632
2
            digit = (s[i] - 'a' + 10);
633
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
634
0
            digit = (s[i] - 'A' + 10);
635
0
        } else {
636
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
637
                // Reject the string because either the first char was not an alpha/digit,
638
                // or the remaining chars are not all whitespace
639
0
                *result = PARSE_FAILURE;
640
0
                return 0;
641
0
            }
642
            // skip trailing whitespace.
643
0
            break;
644
0
        }
645
646
        // Bail, if we encounter a digit that is not available in base.
647
2
        if (digit >= base) {
648
0
            break;
649
0
        }
650
651
        // This is a tricky check to see if adding this digit will cause an overflow.
652
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
653
0
            *result = PARSE_OVERFLOW;
654
0
            return static_cast<T>(negative ? -max_val : max_val);
655
0
        }
656
2
        val = val * base + digit;
657
2
    }
658
1
    *result = PARSE_SUCCESS;
659
1
    return static_cast<T>(negative ? -val : val);
660
1
}
661
662
template <typename T, bool enable_strict_mode>
663
240k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
240k
    T val = 0;
665
240k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
240k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
234k
        val = s[0] - '0';
672
234k
    } else {
673
6.33k
        *result = PARSE_FAILURE;
674
6.33k
        return 0;
675
6.33k
    }
676
372k
    for (int i = 1; i < len; ++i) {
677
141k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
137k
            T digit = s[i] - '0';
679
137k
            val = val * 10 + digit;
680
137k
        } else {
681
3.79k
            if constexpr (enable_strict_mode) {
682
1.31k
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
1.17k
                    *result = PARSE_FAILURE;
684
1.17k
                    return 0;
685
1.17k
                }
686
2.47k
            } else {
687
                // Save original position where non-digit was found
688
2.47k
                int remaining_len = len - i;
689
2.47k
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
2.47k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
2.47k
                if ((UNLIKELY(remaining_len != 0 &&
693
2.47k
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
477
                    *result = PARSE_FAILURE;
695
477
                    return 0;
696
477
                }
697
2.47k
            }
698
2.14k
            *result = PARSE_SUCCESS;
699
3.79k
            return val;
700
3.79k
        }
701
141k
    }
702
230k
    *result = PARSE_SUCCESS;
703
230k
    return val;
704
234k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
41.9k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
41.9k
    T val = 0;
665
41.9k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
41.9k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
41.0k
        val = s[0] - '0';
672
41.0k
    } else {
673
914
        *result = PARSE_FAILURE;
674
914
        return 0;
675
914
    }
676
57.7k
    for (int i = 1; i < len; ++i) {
677
17.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
16.6k
            T digit = s[i] - '0';
679
16.6k
            val = val * 10 + digit;
680
16.6k
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
378
            } else {
687
                // Save original position where non-digit was found
688
378
                int remaining_len = len - i;
689
378
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
378
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
378
                if ((UNLIKELY(remaining_len != 0 &&
693
378
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
98
                    *result = PARSE_FAILURE;
695
98
                    return 0;
696
98
                }
697
378
            }
698
280
            *result = PARSE_SUCCESS;
699
378
            return val;
700
378
        }
701
17.0k
    }
702
40.6k
    *result = PARSE_SUCCESS;
703
40.6k
    return val;
704
41.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
60.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
60.5k
    T val = 0;
665
60.5k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
60.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
59.9k
        val = s[0] - '0';
672
59.9k
    } else {
673
582
        *result = PARSE_FAILURE;
674
582
        return 0;
675
582
    }
676
85.3k
    for (int i = 1; i < len; ++i) {
677
25.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
25.3k
            T digit = s[i] - '0';
679
25.3k
            val = val * 10 + digit;
680
25.3k
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
2
            } else {
687
                // Save original position where non-digit was found
688
2
                int remaining_len = len - i;
689
2
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
2
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
2
                if ((UNLIKELY(remaining_len != 0 &&
693
2
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
2
                    *result = PARSE_FAILURE;
695
2
                    return 0;
696
2
                }
697
2
            }
698
0
            *result = PARSE_SUCCESS;
699
2
            return val;
700
2
        }
701
25.3k
    }
702
59.9k
    *result = PARSE_SUCCESS;
703
59.9k
    return val;
704
59.9k
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
51.0k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
51.0k
    T val = 0;
665
51.0k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
51.0k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
50.0k
        val = s[0] - '0';
672
50.0k
    } else {
673
918
        *result = PARSE_FAILURE;
674
918
        return 0;
675
918
    }
676
72.9k
    for (int i = 1; i < len; ++i) {
677
23.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
22.8k
            T digit = s[i] - '0';
679
22.8k
            val = val * 10 + digit;
680
22.8k
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
957
            } else {
687
                // Save original position where non-digit was found
688
957
                int remaining_len = len - i;
689
957
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
957
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
957
                if ((UNLIKELY(remaining_len != 0 &&
693
957
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
74
                    *result = PARSE_FAILURE;
695
74
                    return 0;
696
74
                }
697
957
            }
698
883
            *result = PARSE_SUCCESS;
699
957
            return val;
700
957
        }
701
23.7k
    }
702
49.1k
    *result = PARSE_SUCCESS;
703
49.1k
    return val;
704
50.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
52.4k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
52.4k
    T val = 0;
665
52.4k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
52.4k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
50.2k
        val = s[0] - '0';
672
50.2k
    } else {
673
2.24k
        *result = PARSE_FAILURE;
674
2.24k
        return 0;
675
2.24k
    }
676
85.4k
    for (int i = 1; i < len; ++i) {
677
35.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
35.1k
            T digit = s[i] - '0';
679
35.1k
            val = val * 10 + digit;
680
35.1k
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
527
            } else {
687
                // Save original position where non-digit was found
688
527
                int remaining_len = len - i;
689
527
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
527
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
527
                if ((UNLIKELY(remaining_len != 0 &&
693
527
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
179
                    *result = PARSE_FAILURE;
695
179
                    return 0;
696
179
                }
697
527
            }
698
348
            *result = PARSE_SUCCESS;
699
527
            return val;
700
527
        }
701
35.7k
    }
702
49.7k
    *result = PARSE_SUCCESS;
703
49.7k
    return val;
704
50.2k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
32.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
32.5k
    T val = 0;
665
32.5k
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
32.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
31.5k
        val = s[0] - '0';
672
31.5k
    } else {
673
1.07k
        *result = PARSE_FAILURE;
674
1.07k
        return 0;
675
1.07k
    }
676
66.1k
    for (int i = 1; i < len; ++i) {
677
35.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
34.6k
            T digit = s[i] - '0';
679
34.6k
            val = val * 10 + digit;
680
34.6k
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
615
            } else {
687
                // Save original position where non-digit was found
688
615
                int remaining_len = len - i;
689
615
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
615
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
615
                if ((UNLIKELY(remaining_len != 0 &&
693
615
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
124
                    *result = PARSE_FAILURE;
695
124
                    return 0;
696
124
                }
697
615
            }
698
491
            *result = PARSE_SUCCESS;
699
615
            return val;
700
615
        }
701
35.2k
    }
702
30.8k
    *result = PARSE_SUCCESS;
703
30.8k
    return val;
704
31.5k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
635
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
635
    T val = 0;
665
635
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
635
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
550
        val = s[0] - '0';
672
550
    } else {
673
85
        *result = PARSE_FAILURE;
674
85
        return 0;
675
85
    }
676
1.32k
    for (int i = 1; i < len; ++i) {
677
1.09k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
770
            T digit = s[i] - '0';
679
770
            val = val * 10 + digit;
680
770
        } else {
681
320
            if constexpr (enable_strict_mode) {
682
320
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
292
                    *result = PARSE_FAILURE;
684
292
                    return 0;
685
292
                }
686
            } else {
687
                // Save original position where non-digit was found
688
                int remaining_len = len - i;
689
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
                if ((UNLIKELY(remaining_len != 0 &&
693
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
                    *result = PARSE_FAILURE;
695
                    return 0;
696
                }
697
            }
698
28
            *result = PARSE_SUCCESS;
699
320
            return val;
700
320
        }
701
1.09k
    }
702
230
    *result = PARSE_SUCCESS;
703
230
    return val;
704
550
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
48
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
48
    T val = 0;
665
48
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
48
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
22
        val = s[0] - '0';
672
26
    } else {
673
26
        *result = PARSE_FAILURE;
674
26
        return 0;
675
26
    }
676
22
    for (int i = 1; i < len; ++i) {
677
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
0
            T digit = s[i] - '0';
679
0
            val = val * 10 + digit;
680
2
        } else {
681
2
            if constexpr (enable_strict_mode) {
682
2
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
2
                    *result = PARSE_FAILURE;
684
2
                    return 0;
685
2
                }
686
            } else {
687
                // Save original position where non-digit was found
688
                int remaining_len = len - i;
689
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
                if ((UNLIKELY(remaining_len != 0 &&
693
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
                    *result = PARSE_FAILURE;
695
                    return 0;
696
                }
697
            }
698
0
            *result = PARSE_SUCCESS;
699
2
            return val;
700
2
        }
701
2
    }
702
20
    *result = PARSE_SUCCESS;
703
20
    return val;
704
22
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
168
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
168
    T val = 0;
665
168
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
168
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
130
        val = s[0] - '0';
672
130
    } else {
673
38
        *result = PARSE_FAILURE;
674
38
        return 0;
675
38
    }
676
206
    for (int i = 1; i < len; ++i) {
677
158
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
76
            T digit = s[i] - '0';
679
76
            val = val * 10 + digit;
680
82
        } else {
681
82
            if constexpr (enable_strict_mode) {
682
82
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
82
                    *result = PARSE_FAILURE;
684
82
                    return 0;
685
82
                }
686
            } else {
687
                // Save original position where non-digit was found
688
                int remaining_len = len - i;
689
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
                if ((UNLIKELY(remaining_len != 0 &&
693
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
                    *result = PARSE_FAILURE;
695
                    return 0;
696
                }
697
            }
698
0
            *result = PARSE_SUCCESS;
699
82
            return val;
700
82
        }
701
158
    }
702
48
    *result = PARSE_SUCCESS;
703
48
    return val;
704
130
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
738
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
738
    T val = 0;
665
738
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
738
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
521
        val = s[0] - '0';
672
521
    } else {
673
217
        *result = PARSE_FAILURE;
674
217
        return 0;
675
217
    }
676
1.52k
    for (int i = 1; i < len; ++i) {
677
1.45k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
1.00k
            T digit = s[i] - '0';
679
1.00k
            val = val * 10 + digit;
680
1.00k
        } else {
681
456
            if constexpr (enable_strict_mode) {
682
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
400
                    *result = PARSE_FAILURE;
684
400
                    return 0;
685
400
                }
686
            } else {
687
                // Save original position where non-digit was found
688
                int remaining_len = len - i;
689
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
                if ((UNLIKELY(remaining_len != 0 &&
693
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
                    *result = PARSE_FAILURE;
695
                    return 0;
696
                }
697
            }
698
56
            *result = PARSE_SUCCESS;
699
456
            return val;
700
456
        }
701
1.45k
    }
702
65
    *result = PARSE_SUCCESS;
703
65
    return val;
704
521
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
752
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
752
    T val = 0;
665
752
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
752
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
512
        val = s[0] - '0';
672
512
    } else {
673
240
        *result = PARSE_FAILURE;
674
240
        return 0;
675
240
    }
676
1.49k
    for (int i = 1; i < len; ++i) {
677
1.44k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
986
            T digit = s[i] - '0';
679
986
            val = val * 10 + digit;
680
986
        } else {
681
456
            if constexpr (enable_strict_mode) {
682
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
400
                    *result = PARSE_FAILURE;
684
400
                    return 0;
685
400
                }
686
            } else {
687
                // Save original position where non-digit was found
688
                int remaining_len = len - i;
689
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
                if ((UNLIKELY(remaining_len != 0 &&
693
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
                    *result = PARSE_FAILURE;
695
                    return 0;
696
                }
697
            }
698
56
            *result = PARSE_SUCCESS;
699
456
            return val;
700
456
        }
701
1.44k
    }
702
56
    *result = PARSE_SUCCESS;
703
56
    return val;
704
512
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
663
4
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
664
4
    T val = 0;
665
4
    if (UNLIKELY(len == 0)) {
666
0
        *result = PARSE_SUCCESS;
667
0
        return val;
668
0
    }
669
    // Factor out the first char for error handling speeds up the loop.
670
4
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
671
4
        val = s[0] - '0';
672
4
    } else {
673
0
        *result = PARSE_FAILURE;
674
0
        return 0;
675
0
    }
676
4
    for (int i = 1; i < len; ++i) {
677
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
678
0
            T digit = s[i] - '0';
679
0
            val = val * 10 + digit;
680
0
        } else {
681
            if constexpr (enable_strict_mode) {
682
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
683
                    *result = PARSE_FAILURE;
684
                    return 0;
685
                }
686
0
            } else {
687
                // Save original position where non-digit was found
688
0
                int remaining_len = len - i;
689
0
                const char* remaining_s = s + i;
690
                // Skip trailing whitespaces from the remaining portion
691
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
692
0
                if ((UNLIKELY(remaining_len != 0 &&
693
0
                              !is_float_suffix(remaining_s, remaining_len)))) {
694
0
                    *result = PARSE_FAILURE;
695
0
                    return 0;
696
0
                }
697
0
            }
698
0
            *result = PARSE_SUCCESS;
699
0
            return val;
700
0
        }
701
0
    }
702
4
    *result = PARSE_SUCCESS;
703
4
    return val;
704
4
}
705
706
// at least the first char(if any) must be a digit.
707
template <typename T>
708
T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
709
136k
                                                  ParseResult* result) {
710
136k
    T val = 0;
711
136k
    if (max_len == 0) [[unlikely]] {
712
135k
        *result = PARSE_SUCCESS;
713
135k
        return val;
714
135k
    }
715
    // Factor out the first char for error handling speeds up the loop.
716
1.09k
    if (is_numeric_ascii(s[0])) [[likely]] {
717
1.09k
        val = s[0] - '0';
718
1.09k
    } else {
719
0
        *result = PARSE_FAILURE;
720
0
        return 0;
721
0
    }
722
4.87k
    for (int i = 1; i < max_len; ++i) {
723
3.77k
        if (is_numeric_ascii(s[i])) [[likely]] {
724
3.77k
            T digit = s[i] - '0';
725
3.77k
            val = val * 10 + digit;
726
3.77k
        } else {
727
            // 123abc, return 123
728
0
            *result = PARSE_SUCCESS;
729
0
            return val;
730
0
        }
731
3.77k
    }
732
1.09k
    *result = PARSE_SUCCESS;
733
1.09k
    return val;
734
1.09k
}
735
736
template <typename T>
737
152k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
738
152k
    int i = 0;
739
    // skip leading spaces
740
152k
    for (; i < len; ++i) {
741
152k
        if (!is_whitespace_ascii(s[i])) {
742
152k
            break;
743
152k
        }
744
152k
    }
745
746
    // skip back spaces
747
152k
    int j = len - 1;
748
152k
    for (; j >= i; j--) {
749
152k
        if (!is_whitespace_ascii(s[j])) {
750
152k
            break;
751
152k
        }
752
152k
    }
753
754
    // skip leading '+', from_chars can handle '-'
755
152k
    if (i < len && s[i] == '+') {
756
7.08k
        i++;
757
        // ++ or +- are not valid, but the first + is already skipped,
758
        // if don't check here, from_chars will succeed.
759
        //
760
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
761
        // which may avoid this extra check here.
762
        // e.g.:
763
        // fast_float::chars_format format =
764
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
765
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
766
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
767
20
            *result = PARSE_FAILURE;
768
20
            return 0;
769
20
        }
770
7.08k
    }
771
152k
    if (UNLIKELY(i > j)) {
772
32
        *result = PARSE_FAILURE;
773
32
        return 0;
774
32
    }
775
776
    // Use double here to not lose precision while accumulating the result
777
152k
    double val = 0;
778
152k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
779
780
152k
    if (res.ptr == s + j + 1) {
781
148k
        *result = PARSE_SUCCESS;
782
148k
        return val;
783
148k
    } else {
784
4.61k
        *result = PARSE_FAILURE;
785
4.61k
    }
786
4.61k
    return 0;
787
152k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
737
87.6k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
738
87.6k
    int i = 0;
739
    // skip leading spaces
740
87.6k
    for (; i < len; ++i) {
741
87.6k
        if (!is_whitespace_ascii(s[i])) {
742
87.6k
            break;
743
87.6k
        }
744
87.6k
    }
745
746
    // skip back spaces
747
87.6k
    int j = len - 1;
748
87.6k
    for (; j >= i; j--) {
749
87.6k
        if (!is_whitespace_ascii(s[j])) {
750
87.6k
            break;
751
87.6k
        }
752
87.6k
    }
753
754
    // skip leading '+', from_chars can handle '-'
755
87.6k
    if (i < len && s[i] == '+') {
756
3.54k
        i++;
757
        // ++ or +- are not valid, but the first + is already skipped,
758
        // if don't check here, from_chars will succeed.
759
        //
760
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
761
        // which may avoid this extra check here.
762
        // e.g.:
763
        // fast_float::chars_format format =
764
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
765
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
766
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
767
10
            *result = PARSE_FAILURE;
768
10
            return 0;
769
10
        }
770
3.54k
    }
771
87.6k
    if (UNLIKELY(i > j)) {
772
18
        *result = PARSE_FAILURE;
773
18
        return 0;
774
18
    }
775
776
    // Use double here to not lose precision while accumulating the result
777
87.6k
    double val = 0;
778
87.6k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
779
780
87.6k
    if (res.ptr == s + j + 1) {
781
85.3k
        *result = PARSE_SUCCESS;
782
85.3k
        return val;
783
85.3k
    } else {
784
2.32k
        *result = PARSE_FAILURE;
785
2.32k
    }
786
2.32k
    return 0;
787
87.6k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
737
65.1k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
738
65.1k
    int i = 0;
739
    // skip leading spaces
740
65.1k
    for (; i < len; ++i) {
741
65.1k
        if (!is_whitespace_ascii(s[i])) {
742
65.1k
            break;
743
65.1k
        }
744
65.1k
    }
745
746
    // skip back spaces
747
65.1k
    int j = len - 1;
748
65.1k
    for (; j >= i; j--) {
749
65.1k
        if (!is_whitespace_ascii(s[j])) {
750
65.1k
            break;
751
65.1k
        }
752
65.1k
    }
753
754
    // skip leading '+', from_chars can handle '-'
755
65.1k
    if (i < len && s[i] == '+') {
756
3.54k
        i++;
757
        // ++ or +- are not valid, but the first + is already skipped,
758
        // if don't check here, from_chars will succeed.
759
        //
760
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
761
        // which may avoid this extra check here.
762
        // e.g.:
763
        // fast_float::chars_format format =
764
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
765
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
766
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
767
10
            *result = PARSE_FAILURE;
768
10
            return 0;
769
10
        }
770
3.54k
    }
771
65.1k
    if (UNLIKELY(i > j)) {
772
14
        *result = PARSE_FAILURE;
773
14
        return 0;
774
14
    }
775
776
    // Use double here to not lose precision while accumulating the result
777
65.1k
    double val = 0;
778
65.1k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
779
780
65.1k
    if (res.ptr == s + j + 1) {
781
62.8k
        *result = PARSE_SUCCESS;
782
62.8k
        return val;
783
62.8k
    } else {
784
2.28k
        *result = PARSE_FAILURE;
785
2.28k
    }
786
2.28k
    return 0;
787
65.1k
}
788
789
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
790
11.3k
                                                  ParseResult* result) {
791
11.3k
    *result = PARSE_SUCCESS;
792
793
11.3k
    if (len == 1) {
794
2.66k
        if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
795
333
            return true;
796
333
        }
797
2.32k
        if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
798
934
            return false;
799
934
        }
800
1.39k
        *result = PARSE_FAILURE;
801
1.39k
        return false;
802
2.32k
    }
803
804
8.71k
    if (len == 2) {
805
975
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
806
10
            return true;
807
10
        }
808
965
        if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
809
9
            return false;
810
9
        }
811
965
    }
812
813
8.69k
    if (len == 3) {
814
42
        if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
815
42
            (s[2] == 's' || s[2] == 'S')) {
816
10
            return true;
817
10
        }
818
32
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
819
32
            (s[2] == 'f' || s[2] == 'F')) {
820
9
            return false;
821
9
        }
822
32
    }
823
824
8.67k
    if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
825
8.67k
        (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
826
3.38k
        return true;
827
3.38k
    }
828
829
5.29k
    if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
830
5.29k
        (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
831
5.29k
        (s[4] == 'e' || s[4] == 'E')) {
832
3.42k
        return false;
833
3.42k
    }
834
835
    // No valid boolean value found
836
1.87k
    *result = PARSE_FAILURE;
837
1.87k
    return false;
838
5.29k
}
839
#include "common/compile_check_avoid_end.h"
840
} // end namespace doris