Coverage Report

Created: 2026-05-20 21:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/string_parser.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
#include <sys/types.h>
27
28
#include <algorithm>
29
#include <cstdlib>
30
// IWYU pragma: no_include <bits/std_abs.h>
31
#include <cmath> // IWYU pragma: keep
32
#include <cstdint>
33
#include <limits>
34
#include <map>
35
#include <string>
36
#include <type_traits>
37
#include <utility>
38
39
#include "common/status.h"
40
#include "core/data_type/number_traits.h"
41
#include "core/data_type/primitive_type.h"
42
#include "core/extended_types.h"
43
#include "core/value/large_int_value.h"
44
#include "exec/common/int_exp.h"
45
#include "exec/common/string_utils/string_utils.h"
46
47
namespace doris {
48
#include "common/compile_check_avoid_begin.h"
49
template <DecimalNativeTypeConcept T>
50
struct Decimal;
51
52
// they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not.
53
#ifndef SET_PARAMS_RET_FALSE_IFN
54
#define SET_PARAMS_RET_FALSE_IFN(stmt, ...)                           \
55
1.05M
    do {                                                              \
56
1.05M
        if (!(stmt)) [[unlikely]] {                                   \
57
36.1k
            if constexpr (IsStrict) {                                 \
58
174
                params.status = Status::InvalidArgument(__VA_ARGS__); \
59
174
            }                                                         \
60
36.1k
            return false;                                             \
61
36.1k
        }                                                             \
62
1.05M
    } while (false)
63
#endif
64
65
#ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION
66
#define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \
67
160
    do {                                          \
68
160
        try {                                     \
69
160
            { stmt; }                             \
70
160
        } catch (const doris::Exception& e) {     \
71
15
            if constexpr (IsStrict) {             \
72
2
                params.status = e.to_status();    \
73
2
            }                                     \
74
15
            return false;                         \
75
15
        }                                         \
76
160
    } while (false)
77
#endif
78
79
// skip leading and trailing ascii whitespaces,
80
// return the pointer to the first non-whitespace char,
81
// and update the len to the new length, which does not include
82
// leading and trailing whitespaces
83
template <typename T>
84
545k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
85
1.01M
    while (len > 0 && is_whitespace_ascii(*s)) {
86
469k
        ++s;
87
469k
        --len;
88
469k
    }
89
90
1.01M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
91
465k
        --len;
92
465k
    }
93
94
545k
    return s;
95
545k
}
_ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_
Line
Count
Source
84
516k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
85
910k
    while (len > 0 && is_whitespace_ascii(*s)) {
86
393k
        ++s;
87
393k
        --len;
88
393k
    }
89
90
906k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
91
390k
        --len;
92
390k
    }
93
94
516k
    return s;
95
516k
}
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_
Line
Count
Source
84
1.37k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
85
4.90k
    while (len > 0 && is_whitespace_ascii(*s)) {
86
3.52k
        ++s;
87
3.52k
        --len;
88
3.52k
    }
89
90
4.90k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
91
3.52k
        --len;
92
3.52k
    }
93
94
1.37k
    return s;
95
1.37k
}
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_
Line
Count
Source
84
27.8k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
85
100k
    while (len > 0 && is_whitespace_ascii(*s)) {
86
72.4k
        ++s;
87
72.4k
        --len;
88
72.4k
    }
89
90
99.8k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
91
72.0k
        --len;
92
72.0k
    }
93
94
27.8k
    return s;
95
27.8k
}
96
97
template <typename T>
98
53.9k
inline const char* skip_leading_whitespace(const char* __restrict s, T& len) {
99
157k
    while (len > 0 && is_whitespace_ascii(*s)) {
100
103k
        ++s;
101
103k
        --len;
102
103k
    }
103
104
53.9k
    return s;
105
53.9k
}
106
107
// skip trailing ascii whitespaces,
108
// return the pointer to the first char,
109
// and update the len to the new length, which does not include
110
// trailing whitespaces
111
template <typename T>
112
44.3k
inline const char* skip_trailing_whitespaces(const char* s, T& len) {
113
160k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
114
115k
        --len;
115
115k
    }
116
117
44.3k
    return s;
118
44.3k
}
119
120
template <bool (*Pred)(char)>
121
60.4k
bool range_suite(const char* s, const char* end) {
122
60.4k
    return std::ranges::all_of(s, end, Pred);
123
60.4k
}
_ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_
Line
Count
Source
121
58.1k
bool range_suite(const char* s, const char* end) {
122
58.1k
    return std::ranges::all_of(s, end, Pred);
123
58.1k
}
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_
Line
Count
Source
121
2.28k
bool range_suite(const char* s, const char* end) {
122
2.28k
    return std::ranges::all_of(s, end, Pred);
123
2.28k
}
124
125
inline auto is_digit_range = range_suite<is_numeric_ascii>;
126
inline auto is_space_range = range_suite<is_whitespace_ascii>;
127
128
// combine in_bound and range_suite is ok. won't lead to duplicated calculation.
129
56.9k
inline bool in_bound(const char* s, const char* end, size_t offset) {
130
56.9k
    if (s + offset >= end) [[unlikely]] {
131
3.12k
        return false;
132
3.12k
    }
133
53.8k
    return true;
134
56.9k
}
135
136
// LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
137
// if need result, use StringRef{origin_s, s} outside
138
template <int LEN, bool (*Pred)(char)>
139
498k
bool skip_qualified_char(const char*& s, const char* end) {
140
498k
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
1.12M
        while (s != end && Pred(*s)) {
143
693k
            ++s;
144
693k
        }
145
427k
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
131k
        for (int i = 0; i < LEN; ++i, ++s) {
148
71.3k
            if (s == end || !Pred(*s)) [[unlikely]] {
149
10.8k
                return false;
150
10.8k
            }
151
71.3k
        }
152
71.3k
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
54
        int count = 0;
155
360
        while (s != end && Pred(*s)) {
156
306
            ++s;
157
306
            ++count;
158
306
        }
159
54
        if (count < -LEN) [[unlikely]] {
160
0
            return false;
161
0
        }
162
54
    }
163
60.6k
    return true;
164
498k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
139
158k
bool skip_qualified_char(const char*& s, const char* end) {
140
158k
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
161k
        while (s != end && Pred(*s)) {
143
3.02k
            ++s;
144
3.02k
        }
145
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
        for (int i = 0; i < LEN; ++i, ++s) {
148
            if (s == end || !Pred(*s)) [[unlikely]] {
149
                return false;
150
            }
151
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
158k
    return true;
164
158k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_
Line
Count
Source
139
268k
bool skip_qualified_char(const char*& s, const char* end) {
140
268k
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
958k
        while (s != end && Pred(*s)) {
143
690k
            ++s;
144
690k
        }
145
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
        for (int i = 0; i < LEN; ++i, ++s) {
148
            if (s == end || !Pred(*s)) [[unlikely]] {
149
                return false;
150
            }
151
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
268k
    return true;
164
268k
}
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
139
54
bool skip_qualified_char(const char*& s, const char* end) {
140
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
        while (s != end && Pred(*s)) {
143
            ++s;
144
        }
145
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
        for (int i = 0; i < LEN; ++i, ++s) {
148
            if (s == end || !Pred(*s)) [[unlikely]] {
149
                return false;
150
            }
151
        }
152
54
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
54
        int count = 0;
155
360
        while (s != end && Pred(*s)) {
156
306
            ++s;
157
306
            ++count;
158
306
        }
159
54
        if (count < -LEN) [[unlikely]] {
160
0
            return false;
161
0
        }
162
54
    }
163
54
    return true;
164
54
}
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_
_ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_
Line
Count
Source
139
35.2k
bool skip_qualified_char(const char*& s, const char* end) {
140
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
        while (s != end && Pred(*s)) {
143
            ++s;
144
        }
145
35.2k
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
59.8k
        for (int i = 0; i < LEN; ++i, ++s) {
148
35.2k
            if (s == end || !Pred(*s)) [[unlikely]] {
149
10.6k
                return false;
150
10.6k
            }
151
35.2k
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
24.5k
    return true;
164
35.2k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_
Line
Count
Source
139
2.44k
bool skip_qualified_char(const char*& s, const char* end) {
140
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
        while (s != end && Pred(*s)) {
143
            ++s;
144
        }
145
2.44k
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
4.84k
        for (int i = 0; i < LEN; ++i, ++s) {
148
2.44k
            if (s == end || !Pred(*s)) [[unlikely]] {
149
47
                return false;
150
47
            }
151
2.44k
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
2.39k
    return true;
164
2.44k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_
Line
Count
Source
139
33.5k
bool skip_qualified_char(const char*& s, const char* end) {
140
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
        while (s != end && Pred(*s)) {
143
            ++s;
144
        }
145
33.5k
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
67.0k
        for (int i = 0; i < LEN; ++i, ++s) {
148
33.5k
            if (s == end || !Pred(*s)) [[unlikely]] {
149
42
                return false;
150
42
            }
151
33.5k
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
33.4k
    return true;
164
33.5k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_
Line
Count
Source
139
142
bool skip_qualified_char(const char*& s, const char* end) {
140
    if constexpr (LEN == 0) {
141
        // Consume any length of characters that match the predicate.
142
        while (s != end && Pred(*s)) {
143
            ++s;
144
        }
145
142
    } else if constexpr (LEN > 0) {
146
        // Consume exactly LEN characters that match the predicate.
147
260
        for (int i = 0; i < LEN; ++i, ++s) {
148
142
            if (s == end || !Pred(*s)) [[unlikely]] {
149
24
                return false;
150
24
            }
151
142
        }
152
    } else { // LEN < 0
153
        // Consume at least -LEN characters that match the predicate.
154
        int count = 0;
155
        while (s != end && Pred(*s)) {
156
            ++s;
157
            ++count;
158
        }
159
        if (count < -LEN) [[unlikely]] {
160
            return false;
161
        }
162
    }
163
118
    return true;
164
142
}
165
166
inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
167
inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
168
inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
169
inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
170
inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;
171
172
2.44k
inline bool is_delimiter(char c) {
173
2.44k
    return c == ' ' || c == 'T' || c == ':';
174
2.44k
}
175
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
176
177
55.4k
inline bool is_date_sep(char c) {
178
55.4k
    return c == '-' || c == '/';
179
55.4k
}
180
inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;
181
182
142
inline bool is_colon(char c) {
183
142
    return c == ':';
184
142
}
185
inline auto consume_one_colon = skip_qualified_char<1, is_colon>;
186
187
// only consume a string of digit, not include sign.
188
// when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
189
// LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
190
template <typename T, int LEN = 0, int MAX_LEN = -1>
191
20
bool consume_digit(const char*& s, const char* end, T& out) {
192
20
    static_assert(LEN >= 0);
193
    if constexpr (MAX_LEN > 0) {
194
        out = 0;
195
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
196
            if (s == end || !is_numeric_ascii(*s)) {
197
                if (i < LEN) [[unlikely]] {
198
                    return false;
199
                }
200
                break; // stop consuming if we have consumed enough digits.
201
            }
202
            out = out * 10 + (*s - '0');
203
        }
204
    } else if constexpr (LEN == 0) {
205
        // Consume any length of digits.
206
        out = 0;
207
        while (s != end && is_numeric_ascii(*s)) {
208
            out = out * 10 + (*s - '0');
209
            ++s;
210
        }
211
20
    } else if constexpr (LEN > 0) {
212
        // Consume exactly LEN digits.
213
20
        out = 0;
214
85
        for (int i = 0; i < LEN; ++i, ++s) {
215
65
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
216
0
                return false;
217
0
            }
218
65
            out = out * 10 + (*s - '0');
219
65
        }
220
20
    }
221
20
    return true;
222
20
}
_ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_
Line
Count
Source
191
15
bool consume_digit(const char*& s, const char* end, T& out) {
192
15
    static_assert(LEN >= 0);
193
    if constexpr (MAX_LEN > 0) {
194
        out = 0;
195
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
196
            if (s == end || !is_numeric_ascii(*s)) {
197
                if (i < LEN) [[unlikely]] {
198
                    return false;
199
                }
200
                break; // stop consuming if we have consumed enough digits.
201
            }
202
            out = out * 10 + (*s - '0');
203
        }
204
    } else if constexpr (LEN == 0) {
205
        // Consume any length of digits.
206
        out = 0;
207
        while (s != end && is_numeric_ascii(*s)) {
208
            out = out * 10 + (*s - '0');
209
            ++s;
210
        }
211
15
    } else if constexpr (LEN > 0) {
212
        // Consume exactly LEN digits.
213
15
        out = 0;
214
75
        for (int i = 0; i < LEN; ++i, ++s) {
215
60
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
216
0
                return false;
217
0
            }
218
60
            out = out * 10 + (*s - '0');
219
60
        }
220
15
    }
221
15
    return true;
222
15
}
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_
Line
Count
Source
191
5
bool consume_digit(const char*& s, const char* end, T& out) {
192
5
    static_assert(LEN >= 0);
193
    if constexpr (MAX_LEN > 0) {
194
        out = 0;
195
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
196
            if (s == end || !is_numeric_ascii(*s)) {
197
                if (i < LEN) [[unlikely]] {
198
                    return false;
199
                }
200
                break; // stop consuming if we have consumed enough digits.
201
            }
202
            out = out * 10 + (*s - '0');
203
        }
204
    } else if constexpr (LEN == 0) {
205
        // Consume any length of digits.
206
        out = 0;
207
        while (s != end && is_numeric_ascii(*s)) {
208
            out = out * 10 + (*s - '0');
209
            ++s;
210
        }
211
5
    } else if constexpr (LEN > 0) {
212
        // Consume exactly LEN digits.
213
5
        out = 0;
214
10
        for (int i = 0; i < LEN; ++i, ++s) {
215
5
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
216
0
                return false;
217
0
            }
218
5
            out = out * 10 + (*s - '0');
219
5
        }
220
5
    }
221
5
    return true;
222
5
}
223
224
// specialized version for 2 digits, which is used very often in date/time parsing.
225
template <>
226
112k
inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) {
227
112k
    out = 0;
228
112k
    if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1)))
229
18.2k
            [[unlikely]] {
230
18.2k
        return false;
231
18.2k
    }
232
94.7k
    out = (s[0] - '0') * 10 + (s[1] - '0');
233
94.7k
    s += 2; // consume 2 digits
234
94.7k
    return true;
235
112k
}
236
237
// specialized version for 1 or 2 digits, which is used very often in date/time parsing.
238
template <>
239
60.8k
inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) {
240
60.8k
    out = 0;
241
60.8k
    if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
242
479
        return false;
243
60.3k
    } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) {
244
        // consume 2 digits
245
43.6k
        out = (*s - '0') * 10 + (*(s + 1) - '0');
246
43.6k
        s += 2;
247
43.6k
    } else {
248
        // consume 1 digit
249
16.6k
        out = *s - '0';
250
16.6k
        ++s;
251
16.6k
    }
252
60.3k
    return true;
253
60.8k
}
254
255
template <bool (*Pred)(char)>
256
148
uint32_t count_valid_length(const char* s, const char* end) {
257
148
    DCHECK(s <= end) << "s: " << s << ", end: " << end;
258
148
    uint32_t count = 0;
259
449
    while (s != end && Pred(*s)) {
260
301
        ++count;
261
301
        ++s;
262
301
    }
263
148
    return count;
264
148
}
265
266
inline auto count_digits = count_valid_length<is_numeric_ascii>;
267
268
136
inline std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
269
136
    std::string result(6, '0');
270
136
    result[0] = sign;
271
136
    result[1] = '0' + (hour_offset / 10);
272
136
    result[2] = '0' + (hour_offset % 10);
273
136
    result[3] = ':';
274
136
    result[4] = '0' + (minute_offset / 10);
275
136
    result[5] = '0' + (minute_offset % 10);
276
136
    DCHECK_EQ(result.size(), 6);
277
136
    return result;
278
136
}
279
280
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
281
// this is significantly faster than libc (atoi/strtol and atof/strtod).
282
//
283
// Strings with leading and trailing whitespaces are accepted.
284
// Branching is heavily optimized for the non-whitespace successful case.
285
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
286
// If that first attempt was unsuccessful, these functions retry the parsing after removing
287
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
288
//
289
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
290
// data type.  This is different from hive, which returns NULL for overflow slots for int types
291
// and inf/-inf for float types.
292
//
293
// Things we tried that did not work:
294
//  - lookup table for converting character to digit
295
// Improvements (TODO):
296
//  - Validate input using _simd_compare_ranges
297
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
298
class StringParser {
299
public:
300
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
301
302
    template <typename T>
303
475k
    static T numeric_limits(bool negative) {
304
475k
        if constexpr (std::is_same_v<T, __int128>) {
305
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
306
426k
        } else {
307
426k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
426k
        }
309
475k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
303
48.3k
    static T numeric_limits(bool negative) {
304
48.3k
        if constexpr (std::is_same_v<T, __int128>) {
305
48.3k
            return negative ? MIN_INT128 : MAX_INT128;
306
        } else {
307
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
        }
309
48.3k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
303
165k
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
165k
        } else {
307
165k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
165k
        }
309
165k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
303
76.9k
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
76.9k
        } else {
307
76.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
76.9k
        }
309
76.9k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
303
95.7k
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
95.7k
        } else {
307
95.7k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
95.7k
        }
309
95.7k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
303
87.9k
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
87.9k
        } else {
307
87.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
87.9k
        }
309
87.9k
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
303
148
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
148
        } else {
307
148
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
148
        }
309
148
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
303
29
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
29
        } else {
307
29
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
29
        }
309
29
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
303
4
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
4
        } else {
307
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
4
        }
309
4
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
303
4
    static T numeric_limits(bool negative) {
304
        if constexpr (std::is_same_v<T, __int128>) {
305
            return negative ? MIN_INT128 : MAX_INT128;
306
4
        } else {
307
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
308
4
        }
309
4
    }
310
311
    template <typename T>
312
936k
    static T get_scale_multiplier(int scale) {
313
936k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
314
936k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
315
936k
                      "You can only instantiate as int32_t, int64_t, __int128.");
316
936k
        if constexpr (std::is_same_v<T, int32_t>) {
317
133k
            return common::exp10_i32(scale);
318
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
319
182k
            return common::exp10_i64(scale);
320
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
321
237k
            return common::exp10_i128(scale);
322
382k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
323
382k
            return common::exp10_i256(scale);
324
382k
        }
325
936k
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
312
133k
    static T get_scale_multiplier(int scale) {
313
133k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
314
133k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
315
133k
                      "You can only instantiate as int32_t, int64_t, __int128.");
316
133k
        if constexpr (std::is_same_v<T, int32_t>) {
317
133k
            return common::exp10_i32(scale);
318
        } else if constexpr (std::is_same_v<T, int64_t>) {
319
            return common::exp10_i64(scale);
320
        } else if constexpr (std::is_same_v<T, __int128>) {
321
            return common::exp10_i128(scale);
322
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
323
            return common::exp10_i256(scale);
324
        }
325
133k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
312
182k
    static T get_scale_multiplier(int scale) {
313
182k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
314
182k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
315
182k
                      "You can only instantiate as int32_t, int64_t, __int128.");
316
        if constexpr (std::is_same_v<T, int32_t>) {
317
            return common::exp10_i32(scale);
318
182k
        } else if constexpr (std::is_same_v<T, int64_t>) {
319
182k
            return common::exp10_i64(scale);
320
        } else if constexpr (std::is_same_v<T, __int128>) {
321
            return common::exp10_i128(scale);
322
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
323
            return common::exp10_i256(scale);
324
        }
325
182k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
312
237k
    static T get_scale_multiplier(int scale) {
313
237k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
314
237k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
315
237k
                      "You can only instantiate as int32_t, int64_t, __int128.");
316
        if constexpr (std::is_same_v<T, int32_t>) {
317
            return common::exp10_i32(scale);
318
        } else if constexpr (std::is_same_v<T, int64_t>) {
319
            return common::exp10_i64(scale);
320
237k
        } else if constexpr (std::is_same_v<T, __int128>) {
321
237k
            return common::exp10_i128(scale);
322
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
323
            return common::exp10_i256(scale);
324
        }
325
237k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
312
382k
    static T get_scale_multiplier(int scale) {
313
382k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
314
382k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
315
382k
                      "You can only instantiate as int32_t, int64_t, __int128.");
316
        if constexpr (std::is_same_v<T, int32_t>) {
317
            return common::exp10_i32(scale);
318
        } else if constexpr (std::is_same_v<T, int64_t>) {
319
            return common::exp10_i64(scale);
320
        } else if constexpr (std::is_same_v<T, __int128>) {
321
            return common::exp10_i128(scale);
322
382k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
323
382k
            return common::exp10_i256(scale);
324
382k
        }
325
382k
    }
326
327
    // This is considerably faster than glibc's implementation (25x).
328
    // Assumes s represents a decimal number.
329
    template <typename T, bool enable_strict_mode = false>
330
381k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
381k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
381k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
328k
            return ans;
334
328k
        }
335
53.9k
        s = skip_leading_whitespace(s, len);
336
53.9k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
381k
    }
_ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
45.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
45.3k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
45.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
43.9k
            return ans;
334
43.9k
        }
335
1.33k
        s = skip_leading_whitespace(s, len);
336
1.33k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
45.3k
    }
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
95.4k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
95.4k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
95.4k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
66.2k
            return ans;
334
66.2k
        }
335
29.2k
        s = skip_leading_whitespace(s, len);
336
29.2k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
95.4k
    }
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
1.00k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
1.00k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
1.00k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
88
            return ans;
334
88
        }
335
912
        s = skip_leading_whitespace(s, len);
336
912
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
1.00k
    }
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
66.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
66.3k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
66.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
58.1k
            return ans;
334
58.1k
        }
335
8.12k
        s = skip_leading_whitespace(s, len);
336
8.12k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
66.3k
    }
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
984
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
984
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
984
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
88
            return ans;
334
88
        }
335
896
        s = skip_leading_whitespace(s, len);
336
896
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
984
    }
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
88.9k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
88.9k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
88.9k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
82.6k
            return ans;
334
82.6k
        }
335
6.29k
        s = skip_leading_whitespace(s, len);
336
6.29k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
88.9k
    }
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
968
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
968
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
968
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
88
            return ans;
334
88
        }
335
880
        s = skip_leading_whitespace(s, len);
336
880
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
968
    }
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
81.0k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
81.0k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
81.0k
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
76.4k
            return ans;
334
76.4k
        }
335
4.50k
        s = skip_leading_whitespace(s, len);
336
4.50k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
81.0k
    }
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
961
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
961
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
961
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
94
            return ans;
334
94
        }
335
867
        s = skip_leading_whitespace(s, len);
336
867
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
961
    }
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
936
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
936
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
936
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
88
            return ans;
334
88
        }
335
848
        s = skip_leading_whitespace(s, len);
336
848
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
936
    }
_ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
1
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
1
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
1
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
1
            return ans;
334
1
        }
335
0
        s = skip_leading_whitespace(s, len);
336
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
1
    }
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
28
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
28
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
28
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
28
            return ans;
334
28
        }
335
0
        s = skip_leading_whitespace(s, len);
336
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
28
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
4
            return ans;
334
4
        }
335
0
        s = skip_leading_whitespace(s, len);
336
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
4
    }
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
330
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
331
4
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
332
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
333
4
            return ans;
334
4
        }
335
0
        s = skip_leading_whitespace(s, len);
336
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
337
4
    }
338
339
    // This is considerably faster than glibc's implementation.
340
    // In the case of overflow, the max/min value for the data type will be returned.
341
    // Assumes s represents a decimal number.
342
    template <typename T>
343
1.37k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
344
1.37k
        s = skip_ascii_whitespaces(s, len);
345
1.37k
        return string_to_unsigned_int_internal<T>(s, len, result);
346
1.37k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
343
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
344
343
        s = skip_ascii_whitespaces(s, len);
345
343
        return string_to_unsigned_int_internal<T>(s, len, result);
346
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
343
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
344
343
        s = skip_ascii_whitespaces(s, len);
345
343
        return string_to_unsigned_int_internal<T>(s, len, result);
346
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
343
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
344
343
        s = skip_ascii_whitespaces(s, len);
345
343
        return string_to_unsigned_int_internal<T>(s, len, result);
346
343
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
343
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
344
343
        s = skip_ascii_whitespaces(s, len);
345
343
        return string_to_unsigned_int_internal<T>(s, len, result);
346
343
    }
347
348
    // Convert a string s representing a number in given base into a decimal number.
349
    template <typename T>
350
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
351
27.8k
                                  ParseResult* result) {
352
27.8k
        s = skip_ascii_whitespaces(s, len);
353
27.8k
        return string_to_int_internal<T>(s, len, base, result);
354
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
351
26.4k
                                  ParseResult* result) {
352
26.4k
        s = skip_ascii_whitespaces(s, len);
353
26.4k
        return string_to_int_internal<T>(s, len, base, result);
354
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
351
490
                                  ParseResult* result) {
352
490
        s = skip_ascii_whitespaces(s, len);
353
490
        return string_to_int_internal<T>(s, len, base, result);
354
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
351
441
                                  ParseResult* result) {
352
441
        s = skip_ascii_whitespaces(s, len);
353
441
        return string_to_int_internal<T>(s, len, base, result);
354
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
351
441
                                  ParseResult* result) {
352
441
        s = skip_ascii_whitespaces(s, len);
353
441
        return string_to_int_internal<T>(s, len, base, result);
354
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
351
1
                                  ParseResult* result) {
352
1
        s = skip_ascii_whitespaces(s, len);
353
1
        return string_to_int_internal<T>(s, len, base, result);
354
1
    }
355
356
    template <typename T>
357
153k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
358
153k
        s = skip_ascii_whitespaces(s, len);
359
153k
        return string_to_float_internal<T>(s, len, result);
360
153k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
357
87.9k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
358
87.9k
        s = skip_ascii_whitespaces(s, len);
359
87.9k
        return string_to_float_internal<T>(s, len, result);
360
87.9k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
357
65.4k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
358
65.4k
        s = skip_ascii_whitespaces(s, len);
359
65.4k
        return string_to_float_internal<T>(s, len, result);
360
65.4k
    }
361
362
    // Parses a string for 'true' or 'false', case insensitive.
363
11.3k
    static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
364
11.3k
        s = skip_ascii_whitespaces(s, len);
365
11.3k
        return string_to_bool_internal(s, len, result);
366
11.3k
    }
367
368
    template <PrimitiveType P>
369
    static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
370
            const char* __restrict s, size_t len, int type_precision, int type_scale,
371
            ParseResult* result);
372
373
    template <typename T>
374
    static Status split_string_to_map(const std::string& base, const T element_separator,
375
                                      const T key_value_separator,
376
                                      std::map<std::string, std::string>* result) {
377
        int key_pos = 0;
378
        int key_end;
379
        int val_pos;
380
        int val_end;
381
382
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
383
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
384
                std::string::npos) {
385
                break;
386
            }
387
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
388
                val_end = base.size();
389
            }
390
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
391
                                          base.substr(val_pos, val_end - val_pos)));
392
            key_pos = val_end;
393
            if (key_pos != std::string::npos) {
394
                ++key_pos;
395
            }
396
        }
397
398
        return Status::OK();
399
    }
400
401
    // This is considerably faster than glibc's implementation.
402
    // In the case of overflow, the max/min value for the data type will be returned.
403
    // Assumes s represents a decimal number.
404
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
405
    template <typename T, bool enable_strict_mode = false>
406
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
407
408
    // This is considerably faster than glibc's implementation.
409
    // In the case of overflow, the max/min value for the data type will be returned.
410
    // Assumes s represents a decimal number.
411
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
412
    template <typename T>
413
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
414
                                                    ParseResult* result);
415
416
    // Convert a string s representing a number in given base into a decimal number.
417
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
418
    template <typename T>
419
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
420
                                           ParseResult* result);
421
422
    // Converts an ascii string to an integer of type T assuming it cannot overflow
423
    // and the number is positive.
424
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
425
    template <typename T, bool enable_strict_mode = false>
426
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
427
                                              ParseResult* result);
428
429
    // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
430
    // char is not a digit.
431
    template <typename T>
432
    static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
433
                                                      ParseResult* result);
434
435
    // This is considerably faster than glibc's implementation (>100x why???)
436
    // No special case handling needs to be done for overflows, the floating point spec
437
    // already does it and will cap the values to -inf/inf
438
    // To avoid inaccurate conversions this function falls back to strtod for
439
    // scientific notation.
440
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
441
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
442
    template <typename T>
443
    static inline T string_to_float_internal(const char* __restrict s, int len,
444
                                             ParseResult* result);
445
446
    // parses a string for 'true' or 'false', case insensitive
447
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
448
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
449
                                               ParseResult* result);
450
451
    // Returns true if s only contains whitespace.
452
3.54k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
453
6.44k
        for (int i = 0; i < len; ++i) {
454
6.00k
            if (!LIKELY(is_whitespace_ascii(s[i]))) {
455
3.10k
                return false;
456
3.10k
            }
457
6.00k
        }
458
440
        return true;
459
3.54k
    }
460
461
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
462
3.67k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
463
3.67k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
464
3.67k
    }
465
466
2.67k
    static inline bool is_all_digit(const char* __restrict s, int len) {
467
5.57k
        for (int i = 0; i < len; ++i) {
468
3.05k
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
469
151
                return false;
470
151
            }
471
3.05k
        }
472
2.52k
        return true;
473
2.67k
    }
474
}; // end of class StringParser
475
476
template <typename T, bool enable_strict_mode>
477
436k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
436k
    if (UNLIKELY(len <= 0)) {
479
2.24k
        *result = PARSE_FAILURE;
480
2.24k
        return 0;
481
2.24k
    }
482
483
433k
    using UnsignedT = MakeUnsignedT<T>;
484
433k
    UnsignedT val = 0;
485
433k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
433k
    bool negative = false;
487
433k
    int i = 0;
488
433k
    switch (*s) {
489
102k
    case '-':
490
102k
        negative = true;
491
102k
        max_val += 1;
492
102k
        [[fallthrough]];
493
106k
    case '+':
494
106k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
106k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
433k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
433k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
267k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
267k
        return static_cast<T>(negative ? -val : val);
506
267k
    }
507
508
166k
    const T max_div_10 = max_val / 10;
509
166k
    const T max_mod_10 = max_val % 10;
510
511
166k
    int first = i;
512
1.68M
    for (; i < len; ++i) {
513
1.61M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
1.57M
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
1.57M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
48.4k
                *result = PARSE_OVERFLOW;
518
48.4k
                return negative ? -max_val : max_val;
519
48.4k
            }
520
1.52M
            val = val * 10 + digit;
521
1.52M
        } else {
522
45.9k
            if constexpr (enable_strict_mode) {
523
4.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
3.78k
                    *result = PARSE_FAILURE;
526
3.78k
                    return 0;
527
3.78k
                }
528
41.8k
            } else {
529
                // Save original position where non-digit was found
530
41.8k
                int remaining_len = len - i;
531
41.8k
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
41.8k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
41.8k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
41.8k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
28.9k
                    *result = PARSE_FAILURE;
539
28.9k
                    return 0;
540
28.9k
                }
541
41.8k
            }
542
            // Returning here is slightly faster than breaking the loop.
543
13.1k
            *result = PARSE_SUCCESS;
544
45.9k
            return static_cast<T>(negative ? -val : val);
545
45.9k
        }
546
1.61M
    }
547
71.7k
    *result = PARSE_SUCCESS;
548
71.7k
    return static_cast<T>(negative ? -val : val);
549
166k
}
_ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
46.6k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
46.6k
    if (UNLIKELY(len <= 0)) {
479
44
        *result = PARSE_FAILURE;
480
44
        return 0;
481
44
    }
482
483
46.6k
    using UnsignedT = MakeUnsignedT<T>;
484
46.6k
    UnsignedT val = 0;
485
46.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
46.6k
    bool negative = false;
487
46.6k
    int i = 0;
488
46.6k
    switch (*s) {
489
3.54k
    case '-':
490
3.54k
        negative = true;
491
3.54k
        max_val += 1;
492
3.54k
        [[fallthrough]];
493
3.82k
    case '+':
494
3.82k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
3.82k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
46.6k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
46.6k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
41.9k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
41.9k
        return static_cast<T>(negative ? -val : val);
506
41.9k
    }
507
508
4.65k
    const T max_div_10 = max_val / 10;
509
4.65k
    const T max_mod_10 = max_val % 10;
510
511
4.65k
    int first = i;
512
172k
    for (; i < len; ++i) {
513
169k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
168k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
168k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
512
                *result = PARSE_OVERFLOW;
518
512
                return negative ? -max_val : max_val;
519
512
            }
520
168k
            val = val * 10 + digit;
521
168k
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
536
            } else {
529
                // Save original position where non-digit was found
530
536
                int remaining_len = len - i;
531
536
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
536
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
536
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
376
                    *result = PARSE_FAILURE;
539
376
                    return 0;
540
376
                }
541
536
            }
542
            // Returning here is slightly faster than breaking the loop.
543
160
            *result = PARSE_SUCCESS;
544
536
            return static_cast<T>(negative ? -val : val);
545
536
        }
546
169k
    }
547
3.60k
    *result = PARSE_SUCCESS;
548
3.60k
    return static_cast<T>(negative ? -val : val);
549
4.65k
}
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
124k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
124k
    if (UNLIKELY(len <= 0)) {
479
218
        *result = PARSE_FAILURE;
480
218
        return 0;
481
218
    }
482
483
124k
    using UnsignedT = MakeUnsignedT<T>;
484
124k
    UnsignedT val = 0;
485
124k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
124k
    bool negative = false;
487
124k
    int i = 0;
488
124k
    switch (*s) {
489
22.4k
    case '-':
490
22.4k
        negative = true;
491
22.4k
        max_val += 1;
492
22.4k
        [[fallthrough]];
493
22.9k
    case '+':
494
22.9k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
22.9k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
124k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
124k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
60.4k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
60.4k
        return static_cast<T>(negative ? -val : val);
506
60.4k
    }
507
508
64.0k
    const T max_div_10 = max_val / 10;
509
64.0k
    const T max_mod_10 = max_val % 10;
510
511
64.0k
    int first = i;
512
154k
    for (; i < len; ++i) {
513
147k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
111k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
111k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
21.2k
                *result = PARSE_OVERFLOW;
518
21.2k
                return negative ? -max_val : max_val;
519
21.2k
            }
520
90.2k
            val = val * 10 + digit;
521
90.2k
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
35.9k
            } else {
529
                // Save original position where non-digit was found
530
35.9k
                int remaining_len = len - i;
531
35.9k
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
35.9k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
35.9k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
35.9k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
24.5k
                    *result = PARSE_FAILURE;
539
24.5k
                    return 0;
540
24.5k
                }
541
35.9k
            }
542
            // Returning here is slightly faster than breaking the loop.
543
11.4k
            *result = PARSE_SUCCESS;
544
35.9k
            return static_cast<T>(negative ? -val : val);
545
35.9k
        }
546
147k
    }
547
6.80k
    *result = PARSE_SUCCESS;
548
6.80k
    return static_cast<T>(negative ? -val : val);
549
64.0k
}
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1.91k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1.91k
    if (UNLIKELY(len <= 0)) {
479
8
        *result = PARSE_FAILURE;
480
8
        return 0;
481
8
    }
482
483
1.90k
    using UnsignedT = MakeUnsignedT<T>;
484
1.90k
    UnsignedT val = 0;
485
1.90k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1.90k
    bool negative = false;
487
1.90k
    int i = 0;
488
1.90k
    switch (*s) {
489
632
    case '-':
490
632
        negative = true;
491
632
        max_val += 1;
492
632
        [[fallthrough]];
493
988
    case '+':
494
988
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
988
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1.90k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1.90k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
48
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
48
        return static_cast<T>(negative ? -val : val);
506
48
    }
507
508
1.85k
    const T max_div_10 = max_val / 10;
509
1.85k
    const T max_mod_10 = max_val % 10;
510
511
1.85k
    int first = i;
512
6.58k
    for (; i < len; ++i) {
513
6.51k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
5.32k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
5.32k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
600
                *result = PARSE_OVERFLOW;
518
600
                return negative ? -max_val : max_val;
519
600
            }
520
4.72k
            val = val * 10 + digit;
521
4.72k
        } else {
522
1.18k
            if constexpr (enable_strict_mode) {
523
1.18k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
1.10k
                    *result = PARSE_FAILURE;
526
1.10k
                    return 0;
527
1.10k
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
88
            *result = PARSE_SUCCESS;
544
1.18k
            return static_cast<T>(negative ? -val : val);
545
1.18k
        }
546
6.51k
    }
547
68
    *result = PARSE_SUCCESS;
548
68
    return static_cast<T>(negative ? -val : val);
549
1.85k
}
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
74.4k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
74.4k
    if (UNLIKELY(len <= 0)) {
479
8
        *result = PARSE_FAILURE;
480
8
        return 0;
481
8
    }
482
483
74.4k
    using UnsignedT = MakeUnsignedT<T>;
484
74.4k
    UnsignedT val = 0;
485
74.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
74.4k
    bool negative = false;
487
74.4k
    int i = 0;
488
74.4k
    switch (*s) {
489
12.8k
    case '-':
490
12.8k
        negative = true;
491
12.8k
        max_val += 1;
492
12.8k
        [[fallthrough]];
493
13.1k
    case '+':
494
13.1k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
13.1k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
74.4k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
74.4k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
50.8k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
50.8k
        return static_cast<T>(negative ? -val : val);
506
50.8k
    }
507
508
23.6k
    const T max_div_10 = max_val / 10;
509
23.6k
    const T max_mod_10 = max_val % 10;
510
511
23.6k
    int first = i;
512
123k
    for (; i < len; ++i) {
513
114k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
112k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
112k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
12.9k
                *result = PARSE_OVERFLOW;
518
12.9k
                return negative ? -max_val : max_val;
519
12.9k
            }
520
99.5k
            val = val * 10 + digit;
521
99.5k
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
1.90k
            } else {
529
                // Save original position where non-digit was found
530
1.90k
                int remaining_len = len - i;
531
1.90k
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
1.90k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
1.90k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
1.90k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
1.29k
                    *result = PARSE_FAILURE;
539
1.29k
                    return 0;
540
1.29k
                }
541
1.90k
            }
542
            // Returning here is slightly faster than breaking the loop.
543
610
            *result = PARSE_SUCCESS;
544
1.90k
            return static_cast<T>(negative ? -val : val);
545
1.90k
        }
546
114k
    }
547
8.80k
    *result = PARSE_SUCCESS;
548
8.80k
    return static_cast<T>(negative ? -val : val);
549
23.6k
}
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1.88k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1.88k
    if (UNLIKELY(len <= 0)) {
479
8
        *result = PARSE_FAILURE;
480
8
        return 0;
481
8
    }
482
483
1.87k
    using UnsignedT = MakeUnsignedT<T>;
484
1.87k
    UnsignedT val = 0;
485
1.87k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1.87k
    bool negative = false;
487
1.87k
    int i = 0;
488
1.87k
    switch (*s) {
489
620
    case '-':
490
620
        negative = true;
491
620
        max_val += 1;
492
620
        [[fallthrough]];
493
970
    case '+':
494
970
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
970
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1.87k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1.87k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
168
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
168
        return static_cast<T>(negative ? -val : val);
506
168
    }
507
508
1.70k
    const T max_div_10 = max_val / 10;
509
1.70k
    const T max_mod_10 = max_val % 10;
510
511
1.70k
    int first = i;
512
7.87k
    for (; i < len; ++i) {
513
7.83k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
6.74k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
6.74k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
576
                *result = PARSE_OVERFLOW;
518
576
                return negative ? -max_val : max_val;
519
576
            }
520
6.17k
            val = val * 10 + digit;
521
6.17k
        } else {
522
1.08k
            if constexpr (enable_strict_mode) {
523
1.08k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
1.00k
                    *result = PARSE_FAILURE;
526
1.00k
                    return 0;
527
1.00k
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
88
            *result = PARSE_SUCCESS;
544
1.08k
            return static_cast<T>(negative ? -val : val);
545
1.08k
        }
546
7.83k
    }
547
40
    *result = PARSE_SUCCESS;
548
40
    return static_cast<T>(negative ? -val : val);
549
1.70k
}
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
95.2k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
95.2k
    if (UNLIKELY(len <= 0)) {
479
1.91k
        *result = PARSE_FAILURE;
480
1.91k
        return 0;
481
1.91k
    }
482
483
93.3k
    using UnsignedT = MakeUnsignedT<T>;
484
93.3k
    UnsignedT val = 0;
485
93.3k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
93.3k
    bool negative = false;
487
93.3k
    int i = 0;
488
93.3k
    switch (*s) {
489
10.5k
    case '-':
490
10.5k
        negative = true;
491
10.5k
        max_val += 1;
492
10.5k
        [[fallthrough]];
493
10.9k
    case '+':
494
10.9k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
10.9k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
93.3k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
93.3k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
79.8k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
79.8k
        return static_cast<T>(negative ? -val : val);
506
79.8k
    }
507
508
13.5k
    const T max_div_10 = max_val / 10;
509
13.5k
    const T max_mod_10 = max_val % 10;
510
511
13.5k
    int first = i;
512
128k
    for (; i < len; ++i) {
513
122k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
120k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
120k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
5.79k
                *result = PARSE_OVERFLOW;
518
5.79k
                return negative ? -max_val : max_val;
519
5.79k
            }
520
115k
            val = val * 10 + digit;
521
115k
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
1.91k
            } else {
529
                // Save original position where non-digit was found
530
1.91k
                int remaining_len = len - i;
531
1.91k
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
1.91k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
1.91k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
1.91k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
1.53k
                    *result = PARSE_FAILURE;
539
1.53k
                    return 0;
540
1.53k
                }
541
1.91k
            }
542
            // Returning here is slightly faster than breaking the loop.
543
386
            *result = PARSE_SUCCESS;
544
1.91k
            return static_cast<T>(negative ? -val : val);
545
1.91k
        }
546
122k
    }
547
5.82k
    *result = PARSE_SUCCESS;
548
5.82k
    return static_cast<T>(negative ? -val : val);
549
13.5k
}
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1.84k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1.84k
    if (UNLIKELY(len <= 0)) {
479
8
        *result = PARSE_FAILURE;
480
8
        return 0;
481
8
    }
482
483
1.84k
    using UnsignedT = MakeUnsignedT<T>;
484
1.84k
    UnsignedT val = 0;
485
1.84k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1.84k
    bool negative = false;
487
1.84k
    int i = 0;
488
1.84k
    switch (*s) {
489
608
    case '-':
490
608
        negative = true;
491
608
        max_val += 1;
492
608
        [[fallthrough]];
493
952
    case '+':
494
952
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
952
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1.84k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1.84k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
461
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
461
        return static_cast<T>(negative ? -val : val);
506
461
    }
507
508
1.37k
    const T max_div_10 = max_val / 10;
509
1.37k
    const T max_mod_10 = max_val % 10;
510
511
1.37k
    int first = i;
512
10.7k
    for (; i < len; ++i) {
513
10.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
9.90k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
9.90k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
552
                *result = PARSE_OVERFLOW;
518
552
                return negative ? -max_val : max_val;
519
552
            }
520
9.34k
            val = val * 10 + digit;
521
9.34k
        } else {
522
795
            if constexpr (enable_strict_mode) {
523
795
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
735
                    *result = PARSE_FAILURE;
526
735
                    return 0;
527
735
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
60
            *result = PARSE_SUCCESS;
544
795
            return static_cast<T>(negative ? -val : val);
545
795
        }
546
10.6k
    }
547
32
    *result = PARSE_SUCCESS;
548
32
    return static_cast<T>(negative ? -val : val);
549
1.37k
}
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
85.5k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
85.5k
    if (UNLIKELY(len <= 0)) {
479
14
        *result = PARSE_FAILURE;
480
14
        return 0;
481
14
    }
482
483
85.4k
    using UnsignedT = MakeUnsignedT<T>;
484
85.4k
    UnsignedT val = 0;
485
85.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
85.4k
    bool negative = false;
487
85.4k
    int i = 0;
488
85.4k
    switch (*s) {
489
50.2k
    case '-':
490
50.2k
        negative = true;
491
50.2k
        max_val += 1;
492
50.2k
        [[fallthrough]];
493
50.5k
    case '+':
494
50.5k
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
50.5k
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
85.4k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
85.4k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
32.3k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
32.3k
        return static_cast<T>(negative ? -val : val);
506
32.3k
    }
507
508
53.1k
    const T max_div_10 = max_val / 10;
509
53.1k
    const T max_mod_10 = max_val % 10;
510
511
53.1k
    int first = i;
512
1.03M
    for (; i < len; ++i) {
513
989k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
988k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
988k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
5.22k
                *result = PARSE_OVERFLOW;
518
5.22k
                return negative ? -max_val : max_val;
519
5.22k
            }
520
983k
            val = val * 10 + digit;
521
983k
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
1.49k
            } else {
529
                // Save original position where non-digit was found
530
1.49k
                int remaining_len = len - i;
531
1.49k
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
1.49k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
1.49k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
1.49k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
1.24k
                    *result = PARSE_FAILURE;
539
1.24k
                    return 0;
540
1.24k
                }
541
1.49k
            }
542
            // Returning here is slightly faster than breaking the loop.
543
244
            *result = PARSE_SUCCESS;
544
1.49k
            return static_cast<T>(negative ? -val : val);
545
1.49k
        }
546
989k
    }
547
46.4k
    *result = PARSE_SUCCESS;
548
46.4k
    return static_cast<T>(negative ? -val : val);
549
53.1k
}
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1.82k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1.82k
    if (UNLIKELY(len <= 0)) {
479
10
        *result = PARSE_FAILURE;
480
10
        return 0;
481
10
    }
482
483
1.81k
    using UnsignedT = MakeUnsignedT<T>;
484
1.81k
    UnsignedT val = 0;
485
1.81k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1.81k
    bool negative = false;
487
1.81k
    int i = 0;
488
1.81k
    switch (*s) {
489
596
    case '-':
490
596
        negative = true;
491
596
        max_val += 1;
492
596
        [[fallthrough]];
493
934
    case '+':
494
934
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
934
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1.81k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1.81k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
735
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
735
        return static_cast<T>(negative ? -val : val);
506
735
    }
507
508
1.08k
    const T max_div_10 = max_val / 10;
509
1.08k
    const T max_mod_10 = max_val % 10;
510
511
1.08k
    int first = i;
512
16.9k
    for (; i < len; ++i) {
513
16.8k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
16.3k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
16.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
528
                *result = PARSE_OVERFLOW;
518
528
                return negative ? -max_val : max_val;
519
528
            }
520
15.8k
            val = val * 10 + digit;
521
15.8k
        } else {
522
523
            if constexpr (enable_strict_mode) {
523
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
491
                    *result = PARSE_FAILURE;
526
491
                    return 0;
527
491
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
32
            *result = PARSE_SUCCESS;
544
523
            return static_cast<T>(negative ? -val : val);
545
523
        }
546
16.8k
    }
547
32
    *result = PARSE_SUCCESS;
548
32
    return static_cast<T>(negative ? -val : val);
549
1.08k
}
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1.78k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1.78k
    if (UNLIKELY(len <= 0)) {
479
8
        *result = PARSE_FAILURE;
480
8
        return 0;
481
8
    }
482
483
1.77k
    using UnsignedT = MakeUnsignedT<T>;
484
1.77k
    UnsignedT val = 0;
485
1.77k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1.77k
    bool negative = false;
487
1.77k
    int i = 0;
488
1.77k
    switch (*s) {
489
584
    case '-':
490
584
        negative = true;
491
584
        max_val += 1;
492
584
        [[fallthrough]];
493
916
    case '+':
494
916
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
916
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1.77k
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1.77k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
752
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
752
        return static_cast<T>(negative ? -val : val);
506
752
    }
507
508
1.02k
    const T max_div_10 = max_val / 10;
509
1.02k
    const T max_mod_10 = max_val % 10;
510
511
1.02k
    int first = i;
512
31.3k
    for (; i < len; ++i) {
513
31.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
30.7k
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
30.7k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
504
                *result = PARSE_OVERFLOW;
518
504
                return negative ? -max_val : max_val;
519
504
            }
520
30.2k
            val = val * 10 + digit;
521
30.2k
        } else {
522
488
            if constexpr (enable_strict_mode) {
523
488
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
456
                    *result = PARSE_FAILURE;
526
456
                    return 0;
527
456
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
32
            *result = PARSE_SUCCESS;
544
488
            return static_cast<T>(negative ? -val : val);
545
488
        }
546
31.2k
    }
547
32
    *result = PARSE_SUCCESS;
548
32
    return static_cast<T>(negative ? -val : val);
549
1.02k
}
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
149
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
149
    if (UNLIKELY(len <= 0)) {
479
2
        *result = PARSE_FAILURE;
480
2
        return 0;
481
2
    }
482
483
147
    using UnsignedT = MakeUnsignedT<T>;
484
147
    UnsignedT val = 0;
485
147
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
147
    bool negative = false;
487
147
    int i = 0;
488
147
    switch (*s) {
489
0
    case '-':
490
0
        negative = true;
491
0
        max_val += 1;
492
0
        [[fallthrough]];
493
0
    case '+':
494
0
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
0
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
147
    }
501
502
    // This is the fast path where the string cannot overflow.
503
147
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
147
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
147
        return static_cast<T>(negative ? -val : val);
506
147
    }
507
508
0
    const T max_div_10 = max_val / 10;
509
0
    const T max_mod_10 = max_val % 10;
510
511
0
    int first = i;
512
0
    for (; i < len; ++i) {
513
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
0
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
0
                *result = PARSE_OVERFLOW;
518
0
                return negative ? -max_val : max_val;
519
0
            }
520
0
            val = val * 10 + digit;
521
0
        } else {
522
0
            if constexpr (enable_strict_mode) {
523
0
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
0
                    *result = PARSE_FAILURE;
526
0
                    return 0;
527
0
                }
528
            } else {
529
                // Save original position where non-digit was found
530
                int remaining_len = len - i;
531
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
                    *result = PARSE_FAILURE;
539
                    return 0;
540
                }
541
            }
542
            // Returning here is slightly faster than breaking the loop.
543
0
            *result = PARSE_SUCCESS;
544
0
            return static_cast<T>(negative ? -val : val);
545
0
        }
546
0
    }
547
0
    *result = PARSE_SUCCESS;
548
0
    return static_cast<T>(negative ? -val : val);
549
0
}
_ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
1
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
1
    if (UNLIKELY(len <= 0)) {
479
0
        *result = PARSE_FAILURE;
480
0
        return 0;
481
0
    }
482
483
1
    using UnsignedT = MakeUnsignedT<T>;
484
1
    UnsignedT val = 0;
485
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
1
    bool negative = false;
487
1
    int i = 0;
488
1
    switch (*s) {
489
0
    case '-':
490
0
        negative = true;
491
0
        max_val += 1;
492
0
        [[fallthrough]];
493
0
    case '+':
494
0
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
0
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
1
    }
501
502
    // This is the fast path where the string cannot overflow.
503
1
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
1
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
1
        return static_cast<T>(negative ? -val : val);
506
1
    }
507
508
0
    const T max_div_10 = max_val / 10;
509
0
    const T max_mod_10 = max_val % 10;
510
511
0
    int first = i;
512
0
    for (; i < len; ++i) {
513
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
0
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
0
                *result = PARSE_OVERFLOW;
518
0
                return negative ? -max_val : max_val;
519
0
            }
520
0
            val = val * 10 + digit;
521
0
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
0
            } else {
529
                // Save original position where non-digit was found
530
0
                int remaining_len = len - i;
531
0
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
0
                    *result = PARSE_FAILURE;
539
0
                    return 0;
540
0
                }
541
0
            }
542
            // Returning here is slightly faster than breaking the loop.
543
0
            *result = PARSE_SUCCESS;
544
0
            return static_cast<T>(negative ? -val : val);
545
0
        }
546
0
    }
547
0
    *result = PARSE_SUCCESS;
548
0
    return static_cast<T>(negative ? -val : val);
549
0
}
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
28
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
28
    if (UNLIKELY(len <= 0)) {
479
0
        *result = PARSE_FAILURE;
480
0
        return 0;
481
0
    }
482
483
28
    using UnsignedT = MakeUnsignedT<T>;
484
28
    UnsignedT val = 0;
485
28
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
28
    bool negative = false;
487
28
    int i = 0;
488
28
    switch (*s) {
489
0
    case '-':
490
0
        negative = true;
491
0
        max_val += 1;
492
0
        [[fallthrough]];
493
0
    case '+':
494
0
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
0
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
28
    }
501
502
    // This is the fast path where the string cannot overflow.
503
28
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
28
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
28
        return static_cast<T>(negative ? -val : val);
506
28
    }
507
508
0
    const T max_div_10 = max_val / 10;
509
0
    const T max_mod_10 = max_val % 10;
510
511
0
    int first = i;
512
0
    for (; i < len; ++i) {
513
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
0
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
0
                *result = PARSE_OVERFLOW;
518
0
                return negative ? -max_val : max_val;
519
0
            }
520
0
            val = val * 10 + digit;
521
0
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
0
            } else {
529
                // Save original position where non-digit was found
530
0
                int remaining_len = len - i;
531
0
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
0
                    *result = PARSE_FAILURE;
539
0
                    return 0;
540
0
                }
541
0
            }
542
            // Returning here is slightly faster than breaking the loop.
543
0
            *result = PARSE_SUCCESS;
544
0
            return static_cast<T>(negative ? -val : val);
545
0
        }
546
0
    }
547
0
    *result = PARSE_SUCCESS;
548
0
    return static_cast<T>(negative ? -val : val);
549
0
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
4
    if (UNLIKELY(len <= 0)) {
479
0
        *result = PARSE_FAILURE;
480
0
        return 0;
481
0
    }
482
483
4
    using UnsignedT = MakeUnsignedT<T>;
484
4
    UnsignedT val = 0;
485
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
4
    bool negative = false;
487
4
    int i = 0;
488
4
    switch (*s) {
489
0
    case '-':
490
0
        negative = true;
491
0
        max_val += 1;
492
0
        [[fallthrough]];
493
0
    case '+':
494
0
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
0
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
4
    }
501
502
    // This is the fast path where the string cannot overflow.
503
4
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
4
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
4
        return static_cast<T>(negative ? -val : val);
506
4
    }
507
508
0
    const T max_div_10 = max_val / 10;
509
0
    const T max_mod_10 = max_val % 10;
510
511
0
    int first = i;
512
0
    for (; i < len; ++i) {
513
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
0
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
0
                *result = PARSE_OVERFLOW;
518
0
                return negative ? -max_val : max_val;
519
0
            }
520
0
            val = val * 10 + digit;
521
0
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
0
            } else {
529
                // Save original position where non-digit was found
530
0
                int remaining_len = len - i;
531
0
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
0
                    *result = PARSE_FAILURE;
539
0
                    return 0;
540
0
                }
541
0
            }
542
            // Returning here is slightly faster than breaking the loop.
543
0
            *result = PARSE_SUCCESS;
544
0
            return static_cast<T>(negative ? -val : val);
545
0
        }
546
0
    }
547
0
    *result = PARSE_SUCCESS;
548
0
    return static_cast<T>(negative ? -val : val);
549
0
}
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
477
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
478
4
    if (UNLIKELY(len <= 0)) {
479
0
        *result = PARSE_FAILURE;
480
0
        return 0;
481
0
    }
482
483
4
    using UnsignedT = MakeUnsignedT<T>;
484
4
    UnsignedT val = 0;
485
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
486
4
    bool negative = false;
487
4
    int i = 0;
488
4
    switch (*s) {
489
0
    case '-':
490
0
        negative = true;
491
0
        max_val += 1;
492
0
        [[fallthrough]];
493
0
    case '+':
494
0
        ++i;
495
        // only one '+'/'-' char, so could return failure directly
496
0
        if (UNLIKELY(len == 1)) {
497
0
            *result = PARSE_FAILURE;
498
0
            return 0;
499
0
        }
500
4
    }
501
502
    // This is the fast path where the string cannot overflow.
503
4
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
504
0
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
505
0
        return static_cast<T>(negative ? -val : val);
506
0
    }
507
508
4
    const T max_div_10 = max_val / 10;
509
4
    const T max_mod_10 = max_val % 10;
510
511
4
    int first = i;
512
84
    for (; i < len; ++i) {
513
80
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
514
80
            T digit = s[i] - '0';
515
            // This is a tricky check to see if adding this digit will cause an overflow.
516
80
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
517
0
                *result = PARSE_OVERFLOW;
518
0
                return negative ? -max_val : max_val;
519
0
            }
520
80
            val = val * 10 + digit;
521
80
        } else {
522
            if constexpr (enable_strict_mode) {
523
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
524
                    // Reject the string because the remaining chars are not all whitespace
525
                    *result = PARSE_FAILURE;
526
                    return 0;
527
                }
528
0
            } else {
529
                // Save original position where non-digit was found
530
0
                int remaining_len = len - i;
531
0
                const char* remaining_s = s + i;
532
                // Skip trailing whitespaces from the remaining portion
533
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
534
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
535
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
536
                    // Reject the string because either the first char was not a digit,
537
                    // or the remaining chars are not all whitespace
538
0
                    *result = PARSE_FAILURE;
539
0
                    return 0;
540
0
                }
541
0
            }
542
            // Returning here is slightly faster than breaking the loop.
543
0
            *result = PARSE_SUCCESS;
544
0
            return static_cast<T>(negative ? -val : val);
545
0
        }
546
80
    }
547
4
    *result = PARSE_SUCCESS;
548
4
    return static_cast<T>(negative ? -val : val);
549
4
}
550
551
template <typename T>
552
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
553
1.37k
                                                ParseResult* result) {
554
1.37k
    if (UNLIKELY(len <= 0)) {
555
0
        *result = PARSE_FAILURE;
556
0
        return 0;
557
0
    }
558
559
1.37k
    T val = 0;
560
1.37k
    T max_val = std::numeric_limits<T>::max();
561
1.37k
    int i = 0;
562
563
1.37k
    using signedT = MakeSignedT<T>;
564
    // This is the fast path where the string cannot overflow.
565
1.37k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
566
784
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
567
784
        return val;
568
784
    }
569
570
588
    const T max_div_10 = max_val / 10;
571
588
    const T max_mod_10 = max_val % 10;
572
573
588
    int first = i;
574
4.65k
    for (; i < len; ++i) {
575
4.31k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
576
4.26k
            T digit = s[i] - '0';
577
            // This is a tricky check to see if adding this digit will cause an overflow.
578
4.26k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
579
196
                *result = PARSE_OVERFLOW;
580
196
                return max_val;
581
196
            }
582
4.06k
            val = val * 10 + digit;
583
4.06k
        } else {
584
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
585
                // Reject the string because either the first char was not a digit,
586
                // or the remaining chars are not all whitespace
587
49
                *result = PARSE_FAILURE;
588
49
                return 0;
589
49
            }
590
            // Returning here is slightly faster than breaking the loop.
591
0
            *result = PARSE_SUCCESS;
592
0
            return val;
593
49
        }
594
4.31k
    }
595
343
    *result = PARSE_SUCCESS;
596
343
    return val;
597
588
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
553
343
                                                ParseResult* result) {
554
343
    if (UNLIKELY(len <= 0)) {
555
0
        *result = PARSE_FAILURE;
556
0
        return 0;
557
0
    }
558
559
343
    T val = 0;
560
343
    T max_val = std::numeric_limits<T>::max();
561
343
    int i = 0;
562
563
343
    using signedT = MakeSignedT<T>;
564
    // This is the fast path where the string cannot overflow.
565
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
566
98
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
567
98
        return val;
568
98
    }
569
570
245
    const T max_div_10 = max_val / 10;
571
245
    const T max_mod_10 = max_val % 10;
572
573
245
    int first = i;
574
784
    for (; i < len; ++i) {
575
637
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
576
588
            T digit = s[i] - '0';
577
            // This is a tricky check to see if adding this digit will cause an overflow.
578
588
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
579
49
                *result = PARSE_OVERFLOW;
580
49
                return max_val;
581
49
            }
582
539
            val = val * 10 + digit;
583
539
        } else {
584
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
585
                // Reject the string because either the first char was not a digit,
586
                // or the remaining chars are not all whitespace
587
49
                *result = PARSE_FAILURE;
588
49
                return 0;
589
49
            }
590
            // Returning here is slightly faster than breaking the loop.
591
0
            *result = PARSE_SUCCESS;
592
0
            return val;
593
49
        }
594
637
    }
595
147
    *result = PARSE_SUCCESS;
596
147
    return val;
597
245
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
553
343
                                                ParseResult* result) {
554
343
    if (UNLIKELY(len <= 0)) {
555
0
        *result = PARSE_FAILURE;
556
0
        return 0;
557
0
    }
558
559
343
    T val = 0;
560
343
    T max_val = std::numeric_limits<T>::max();
561
343
    int i = 0;
562
563
343
    using signedT = MakeSignedT<T>;
564
    // This is the fast path where the string cannot overflow.
565
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
566
196
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
567
196
        return val;
568
196
    }
569
570
147
    const T max_div_10 = max_val / 10;
571
147
    const T max_mod_10 = max_val % 10;
572
573
147
    int first = i;
574
833
    for (; i < len; ++i) {
575
735
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
576
735
            T digit = s[i] - '0';
577
            // This is a tricky check to see if adding this digit will cause an overflow.
578
735
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
579
49
                *result = PARSE_OVERFLOW;
580
49
                return max_val;
581
49
            }
582
686
            val = val * 10 + digit;
583
686
        } else {
584
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
585
                // Reject the string because either the first char was not a digit,
586
                // or the remaining chars are not all whitespace
587
0
                *result = PARSE_FAILURE;
588
0
                return 0;
589
0
            }
590
            // Returning here is slightly faster than breaking the loop.
591
0
            *result = PARSE_SUCCESS;
592
0
            return val;
593
0
        }
594
735
    }
595
98
    *result = PARSE_SUCCESS;
596
98
    return val;
597
147
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
553
343
                                                ParseResult* result) {
554
343
    if (UNLIKELY(len <= 0)) {
555
0
        *result = PARSE_FAILURE;
556
0
        return 0;
557
0
    }
558
559
343
    T val = 0;
560
343
    T max_val = std::numeric_limits<T>::max();
561
343
    int i = 0;
562
563
343
    using signedT = MakeSignedT<T>;
564
    // This is the fast path where the string cannot overflow.
565
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
566
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
567
245
        return val;
568
245
    }
569
570
98
    const T max_div_10 = max_val / 10;
571
98
    const T max_mod_10 = max_val % 10;
572
573
98
    int first = i;
574
1.02k
    for (; i < len; ++i) {
575
980
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
576
980
            T digit = s[i] - '0';
577
            // This is a tricky check to see if adding this digit will cause an overflow.
578
980
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
579
49
                *result = PARSE_OVERFLOW;
580
49
                return max_val;
581
49
            }
582
931
            val = val * 10 + digit;
583
931
        } else {
584
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
585
                // Reject the string because either the first char was not a digit,
586
                // or the remaining chars are not all whitespace
587
0
                *result = PARSE_FAILURE;
588
0
                return 0;
589
0
            }
590
            // Returning here is slightly faster than breaking the loop.
591
0
            *result = PARSE_SUCCESS;
592
0
            return val;
593
0
        }
594
980
    }
595
49
    *result = PARSE_SUCCESS;
596
49
    return val;
597
98
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
553
343
                                                ParseResult* result) {
554
343
    if (UNLIKELY(len <= 0)) {
555
0
        *result = PARSE_FAILURE;
556
0
        return 0;
557
0
    }
558
559
343
    T val = 0;
560
343
    T max_val = std::numeric_limits<T>::max();
561
343
    int i = 0;
562
563
343
    using signedT = MakeSignedT<T>;
564
    // This is the fast path where the string cannot overflow.
565
343
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
566
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
567
245
        return val;
568
245
    }
569
570
98
    const T max_div_10 = max_val / 10;
571
98
    const T max_mod_10 = max_val % 10;
572
573
98
    int first = i;
574
2.00k
    for (; i < len; ++i) {
575
1.96k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
576
1.96k
            T digit = s[i] - '0';
577
            // This is a tricky check to see if adding this digit will cause an overflow.
578
1.96k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
579
49
                *result = PARSE_OVERFLOW;
580
49
                return max_val;
581
49
            }
582
1.91k
            val = val * 10 + digit;
583
1.91k
        } else {
584
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
585
                // Reject the string because either the first char was not a digit,
586
                // or the remaining chars are not all whitespace
587
0
                *result = PARSE_FAILURE;
588
0
                return 0;
589
0
            }
590
            // Returning here is slightly faster than breaking the loop.
591
0
            *result = PARSE_SUCCESS;
592
0
            return val;
593
0
        }
594
1.96k
    }
595
49
    *result = PARSE_SUCCESS;
596
49
    return val;
597
98
}
598
599
template <typename T>
600
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
601
27.8k
                                       ParseResult* result) {
602
27.8k
    using UnsignedT = MakeUnsignedT<T>;
603
27.8k
    UnsignedT val = 0;
604
27.8k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
27.8k
    bool negative = false;
606
27.8k
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
27.8k
    int i = 0;
611
27.8k
    switch (*s) {
612
13.4k
    case '-':
613
13.4k
        negative = true;
614
13.4k
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
13.4k
        [[fallthrough]];
616
13.7k
    case '+':
617
13.7k
        i = 1;
618
27.8k
    }
619
620
27.8k
    const T max_div_base = max_val / base;
621
27.8k
    const T max_mod_base = max_val % base;
622
623
27.8k
    int first = i;
624
90.9k
    for (; i < len; ++i) {
625
76.6k
        T digit;
626
76.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
75.7k
            digit = s[i] - '0';
628
75.7k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
639
            digit = (s[i] - 'a' + 10);
630
639
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
98
            digit = (s[i] - 'A' + 10);
632
147
        } else {
633
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
147
                *result = PARSE_FAILURE;
637
147
                return 0;
638
147
            }
639
            // skip trailing whitespace.
640
0
            break;
641
147
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
76.4k
        if (digit >= base) {
645
392
            break;
646
392
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
76.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
12.9k
            *result = PARSE_OVERFLOW;
651
12.9k
            return static_cast<T>(negative ? -max_val : max_val);
652
12.9k
        }
653
63.1k
        val = val * base + digit;
654
63.1k
    }
655
14.7k
    *result = PARSE_SUCCESS;
656
14.7k
    return static_cast<T>(negative ? -val : val);
657
27.8k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
601
26.4k
                                       ParseResult* result) {
602
26.4k
    using UnsignedT = MakeUnsignedT<T>;
603
26.4k
    UnsignedT val = 0;
604
26.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
26.4k
    bool negative = false;
606
26.4k
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
26.4k
    int i = 0;
611
26.4k
    switch (*s) {
612
12.8k
    case '-':
613
12.8k
        negative = true;
614
12.8k
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
12.8k
        [[fallthrough]];
616
12.9k
    case '+':
617
12.9k
        i = 1;
618
26.4k
    }
619
620
26.4k
    const T max_div_base = max_val / base;
621
26.4k
    const T max_mod_base = max_val % base;
622
623
26.4k
    int first = i;
624
80.7k
    for (; i < len; ++i) {
625
67.4k
        T digit;
626
67.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
66.6k
            digit = s[i] - '0';
628
66.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
539
            digit = (s[i] - 'a' + 10);
630
539
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
98
            digit = (s[i] - 'A' + 10);
632
147
        } else {
633
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
147
                *result = PARSE_FAILURE;
637
147
                return 0;
638
147
            }
639
            // skip trailing whitespace.
640
0
            break;
641
147
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
67.3k
        if (digit >= base) {
645
392
            break;
646
392
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
66.9k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
12.6k
            *result = PARSE_OVERFLOW;
651
12.6k
            return static_cast<T>(negative ? -max_val : max_val);
652
12.6k
        }
653
54.2k
        val = val * base + digit;
654
54.2k
    }
655
13.6k
    *result = PARSE_SUCCESS;
656
13.6k
    return static_cast<T>(negative ? -val : val);
657
26.4k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
601
490
                                       ParseResult* result) {
602
490
    using UnsignedT = MakeUnsignedT<T>;
603
490
    UnsignedT val = 0;
604
490
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
490
    bool negative = false;
606
490
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
490
    int i = 0;
611
490
    switch (*s) {
612
196
    case '-':
613
196
        negative = true;
614
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
196
        [[fallthrough]];
616
245
    case '+':
617
245
        i = 1;
618
490
    }
619
620
490
    const T max_div_base = max_val / base;
621
490
    const T max_mod_base = max_val % base;
622
623
490
    int first = i;
624
2.10k
    for (; i < len; ++i) {
625
1.71k
        T digit;
626
1.71k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
1.61k
            digit = s[i] - '0';
628
1.61k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
98
            digit = (s[i] - 'a' + 10);
630
98
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
0
            digit = (s[i] - 'A' + 10);
632
0
        } else {
633
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
0
                *result = PARSE_FAILURE;
637
0
                return 0;
638
0
            }
639
            // skip trailing whitespace.
640
0
            break;
641
0
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
1.71k
        if (digit >= base) {
645
0
            break;
646
0
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
1.71k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
98
            *result = PARSE_OVERFLOW;
651
98
            return static_cast<T>(negative ? -max_val : max_val);
652
98
        }
653
1.61k
        val = val * base + digit;
654
1.61k
    }
655
392
    *result = PARSE_SUCCESS;
656
392
    return static_cast<T>(negative ? -val : val);
657
490
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
601
441
                                       ParseResult* result) {
602
441
    using UnsignedT = MakeUnsignedT<T>;
603
441
    UnsignedT val = 0;
604
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
441
    bool negative = false;
606
441
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
441
    int i = 0;
611
441
    switch (*s) {
612
147
    case '-':
613
147
        negative = true;
614
147
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
147
        [[fallthrough]];
616
245
    case '+':
617
245
        i = 1;
618
441
    }
619
620
441
    const T max_div_base = max_val / base;
621
441
    const T max_mod_base = max_val % base;
622
623
441
    int first = i;
624
3.03k
    for (; i < len; ++i) {
625
2.69k
        T digit;
626
2.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
2.69k
            digit = s[i] - '0';
628
2.69k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
0
            digit = (s[i] - 'a' + 10);
630
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
0
            digit = (s[i] - 'A' + 10);
632
0
        } else {
633
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
0
                *result = PARSE_FAILURE;
637
0
                return 0;
638
0
            }
639
            // skip trailing whitespace.
640
0
            break;
641
0
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
2.69k
        if (digit >= base) {
645
0
            break;
646
0
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
2.69k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
98
            *result = PARSE_OVERFLOW;
651
98
            return static_cast<T>(negative ? -max_val : max_val);
652
98
        }
653
2.59k
        val = val * base + digit;
654
2.59k
    }
655
343
    *result = PARSE_SUCCESS;
656
343
    return static_cast<T>(negative ? -val : val);
657
441
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
601
441
                                       ParseResult* result) {
602
441
    using UnsignedT = MakeUnsignedT<T>;
603
441
    UnsignedT val = 0;
604
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
441
    bool negative = false;
606
441
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
441
    int i = 0;
611
441
    switch (*s) {
612
196
    case '-':
613
196
        negative = true;
614
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
196
        [[fallthrough]];
616
245
    case '+':
617
245
        i = 1;
618
441
    }
619
620
441
    const T max_div_base = max_val / base;
621
441
    const T max_mod_base = max_val % base;
622
623
441
    int first = i;
624
5.09k
    for (; i < len; ++i) {
625
4.75k
        T digit;
626
4.75k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
4.75k
            digit = s[i] - '0';
628
4.75k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
0
            digit = (s[i] - 'a' + 10);
630
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
0
            digit = (s[i] - 'A' + 10);
632
0
        } else {
633
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
0
                *result = PARSE_FAILURE;
637
0
                return 0;
638
0
            }
639
            // skip trailing whitespace.
640
0
            break;
641
0
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
4.75k
        if (digit >= base) {
645
0
            break;
646
0
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
4.75k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
98
            *result = PARSE_OVERFLOW;
651
98
            return static_cast<T>(negative ? -max_val : max_val);
652
98
        }
653
4.65k
        val = val * base + digit;
654
4.65k
    }
655
343
    *result = PARSE_SUCCESS;
656
343
    return static_cast<T>(negative ? -val : val);
657
441
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
601
1
                                       ParseResult* result) {
602
1
    using UnsignedT = MakeUnsignedT<T>;
603
1
    UnsignedT val = 0;
604
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
605
1
    bool negative = false;
606
1
    if (UNLIKELY(len <= 0)) {
607
0
        *result = PARSE_FAILURE;
608
0
        return 0;
609
0
    }
610
1
    int i = 0;
611
1
    switch (*s) {
612
0
    case '-':
613
0
        negative = true;
614
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
615
0
        [[fallthrough]];
616
0
    case '+':
617
0
        i = 1;
618
1
    }
619
620
1
    const T max_div_base = max_val / base;
621
1
    const T max_mod_base = max_val % base;
622
623
1
    int first = i;
624
3
    for (; i < len; ++i) {
625
2
        T digit;
626
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
627
0
            digit = s[i] - '0';
628
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
629
2
            digit = (s[i] - 'a' + 10);
630
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
631
0
            digit = (s[i] - 'A' + 10);
632
0
        } else {
633
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
634
                // Reject the string because either the first char was not an alpha/digit,
635
                // or the remaining chars are not all whitespace
636
0
                *result = PARSE_FAILURE;
637
0
                return 0;
638
0
            }
639
            // skip trailing whitespace.
640
0
            break;
641
0
        }
642
643
        // Bail, if we encounter a digit that is not available in base.
644
2
        if (digit >= base) {
645
0
            break;
646
0
        }
647
648
        // This is a tricky check to see if adding this digit will cause an overflow.
649
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
650
0
            *result = PARSE_OVERFLOW;
651
0
            return static_cast<T>(negative ? -max_val : max_val);
652
0
        }
653
2
        val = val * base + digit;
654
2
    }
655
1
    *result = PARSE_SUCCESS;
656
1
    return static_cast<T>(negative ? -val : val);
657
1
}
658
659
template <typename T, bool enable_strict_mode>
660
268k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
268k
    T val = 0;
662
268k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
268k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
262k
        val = s[0] - '0';
669
262k
    } else {
670
6.25k
        *result = PARSE_FAILURE;
671
6.25k
        return 0;
672
6.25k
    }
673
505k
    for (int i = 1; i < len; ++i) {
674
247k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
243k
            T digit = s[i] - '0';
676
243k
            val = val * 10 + digit;
677
243k
        } else {
678
3.82k
            if constexpr (enable_strict_mode) {
679
1.31k
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
1.17k
                    *result = PARSE_FAILURE;
681
1.17k
                    return 0;
682
1.17k
                }
683
2.50k
            } else {
684
                // Save original position where non-digit was found
685
2.50k
                int remaining_len = len - i;
686
2.50k
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
2.50k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
2.50k
                if ((UNLIKELY(remaining_len != 0 &&
690
2.50k
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
506
                    *result = PARSE_FAILURE;
692
506
                    return 0;
693
506
                }
694
2.50k
            }
695
2.14k
            *result = PARSE_SUCCESS;
696
3.82k
            return val;
697
3.82k
        }
698
247k
    }
699
258k
    *result = PARSE_SUCCESS;
700
258k
    return val;
701
262k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
41.9k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
41.9k
    T val = 0;
662
41.9k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
41.9k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
41.0k
        val = s[0] - '0';
669
41.0k
    } else {
670
914
        *result = PARSE_FAILURE;
671
914
        return 0;
672
914
    }
673
57.7k
    for (int i = 1; i < len; ++i) {
674
17.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
16.6k
            T digit = s[i] - '0';
676
16.6k
            val = val * 10 + digit;
677
16.6k
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
378
            } else {
684
                // Save original position where non-digit was found
685
378
                int remaining_len = len - i;
686
378
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
378
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
378
                if ((UNLIKELY(remaining_len != 0 &&
690
378
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
98
                    *result = PARSE_FAILURE;
692
98
                    return 0;
693
98
                }
694
378
            }
695
280
            *result = PARSE_SUCCESS;
696
378
            return val;
697
378
        }
698
17.0k
    }
699
40.6k
    *result = PARSE_SUCCESS;
700
40.6k
    return val;
701
41.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
60.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
60.5k
    T val = 0;
662
60.5k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
60.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
59.9k
        val = s[0] - '0';
669
59.9k
    } else {
670
582
        *result = PARSE_FAILURE;
671
582
        return 0;
672
582
    }
673
85.3k
    for (int i = 1; i < len; ++i) {
674
25.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
25.3k
            T digit = s[i] - '0';
676
25.3k
            val = val * 10 + digit;
677
25.3k
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
2
            } else {
684
                // Save original position where non-digit was found
685
2
                int remaining_len = len - i;
686
2
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
2
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
2
                if ((UNLIKELY(remaining_len != 0 &&
690
2
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
2
                    *result = PARSE_FAILURE;
692
2
                    return 0;
693
2
                }
694
2
            }
695
0
            *result = PARSE_SUCCESS;
696
2
            return val;
697
2
        }
698
25.3k
    }
699
59.9k
    *result = PARSE_SUCCESS;
700
59.9k
    return val;
701
59.9k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
48
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
48
    T val = 0;
662
48
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
48
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
22
        val = s[0] - '0';
669
26
    } else {
670
26
        *result = PARSE_FAILURE;
671
26
        return 0;
672
26
    }
673
22
    for (int i = 1; i < len; ++i) {
674
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
0
            T digit = s[i] - '0';
676
0
            val = val * 10 + digit;
677
2
        } else {
678
2
            if constexpr (enable_strict_mode) {
679
2
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
2
                    *result = PARSE_FAILURE;
681
2
                    return 0;
682
2
                }
683
            } else {
684
                // Save original position where non-digit was found
685
                int remaining_len = len - i;
686
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
                if ((UNLIKELY(remaining_len != 0 &&
690
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
                    *result = PARSE_FAILURE;
692
                    return 0;
693
                }
694
            }
695
0
            *result = PARSE_SUCCESS;
696
2
            return val;
697
2
        }
698
2
    }
699
20
    *result = PARSE_SUCCESS;
700
20
    return val;
701
22
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
51.0k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
51.0k
    T val = 0;
662
51.0k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
51.0k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
50.0k
        val = s[0] - '0';
669
50.0k
    } else {
670
918
        *result = PARSE_FAILURE;
671
918
        return 0;
672
918
    }
673
72.9k
    for (int i = 1; i < len; ++i) {
674
23.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
22.8k
            T digit = s[i] - '0';
676
22.8k
            val = val * 10 + digit;
677
22.8k
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
957
            } else {
684
                // Save original position where non-digit was found
685
957
                int remaining_len = len - i;
686
957
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
957
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
957
                if ((UNLIKELY(remaining_len != 0 &&
690
957
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
74
                    *result = PARSE_FAILURE;
692
74
                    return 0;
693
74
                }
694
957
            }
695
883
            *result = PARSE_SUCCESS;
696
957
            return val;
697
957
        }
698
23.7k
    }
699
49.1k
    *result = PARSE_SUCCESS;
700
49.1k
    return val;
701
50.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
168
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
168
    T val = 0;
662
168
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
168
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
130
        val = s[0] - '0';
669
130
    } else {
670
38
        *result = PARSE_FAILURE;
671
38
        return 0;
672
38
    }
673
206
    for (int i = 1; i < len; ++i) {
674
158
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
76
            T digit = s[i] - '0';
676
76
            val = val * 10 + digit;
677
82
        } else {
678
82
            if constexpr (enable_strict_mode) {
679
82
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
82
                    *result = PARSE_FAILURE;
681
82
                    return 0;
682
82
                }
683
            } else {
684
                // Save original position where non-digit was found
685
                int remaining_len = len - i;
686
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
                if ((UNLIKELY(remaining_len != 0 &&
690
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
                    *result = PARSE_FAILURE;
692
                    return 0;
693
                }
694
            }
695
0
            *result = PARSE_SUCCESS;
696
82
            return val;
697
82
        }
698
158
    }
699
48
    *result = PARSE_SUCCESS;
700
48
    return val;
701
130
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
80.0k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
80.0k
    T val = 0;
662
80.0k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
80.0k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
77.9k
        val = s[0] - '0';
669
77.9k
    } else {
670
2.16k
        *result = PARSE_FAILURE;
671
2.16k
        return 0;
672
2.16k
    }
673
218k
    for (int i = 1; i < len; ++i) {
674
141k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
140k
            T digit = s[i] - '0';
676
140k
            val = val * 10 + digit;
677
140k
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
556
            } else {
684
                // Save original position where non-digit was found
685
556
                int remaining_len = len - i;
686
556
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
556
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
556
                if ((UNLIKELY(remaining_len != 0 &&
690
556
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
208
                    *result = PARSE_FAILURE;
692
208
                    return 0;
693
208
                }
694
556
            }
695
348
            *result = PARSE_SUCCESS;
696
556
            return val;
697
556
        }
698
141k
    }
699
77.3k
    *result = PARSE_SUCCESS;
700
77.3k
    return val;
701
77.9k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
608
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
608
    T val = 0;
662
608
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
608
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
523
        val = s[0] - '0';
669
523
    } else {
670
85
        *result = PARSE_FAILURE;
671
85
        return 0;
672
85
    }
673
1.28k
    for (int i = 1; i < len; ++i) {
674
1.08k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
764
            T digit = s[i] - '0';
676
764
            val = val * 10 + digit;
677
764
        } else {
678
320
            if constexpr (enable_strict_mode) {
679
320
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
292
                    *result = PARSE_FAILURE;
681
292
                    return 0;
682
292
                }
683
            } else {
684
                // Save original position where non-digit was found
685
                int remaining_len = len - i;
686
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
                if ((UNLIKELY(remaining_len != 0 &&
690
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
                    *result = PARSE_FAILURE;
692
                    return 0;
693
                }
694
            }
695
28
            *result = PARSE_SUCCESS;
696
320
            return val;
697
320
        }
698
1.08k
    }
699
203
    *result = PARSE_SUCCESS;
700
203
    return val;
701
523
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
32.5k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
32.5k
    T val = 0;
662
32.5k
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
32.5k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
31.5k
        val = s[0] - '0';
669
31.5k
    } else {
670
1.07k
        *result = PARSE_FAILURE;
671
1.07k
        return 0;
672
1.07k
    }
673
66.2k
    for (int i = 1; i < len; ++i) {
674
35.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
34.6k
            T digit = s[i] - '0';
676
34.6k
            val = val * 10 + digit;
677
34.6k
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
615
            } else {
684
                // Save original position where non-digit was found
685
615
                int remaining_len = len - i;
686
615
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
615
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
615
                if ((UNLIKELY(remaining_len != 0 &&
690
615
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
124
                    *result = PARSE_FAILURE;
692
124
                    return 0;
693
124
                }
694
615
            }
695
491
            *result = PARSE_SUCCESS;
696
615
            return val;
697
615
        }
698
35.3k
    }
699
30.8k
    *result = PARSE_SUCCESS;
700
30.8k
    return val;
701
31.5k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
735
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
735
    T val = 0;
662
735
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
735
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
518
        val = s[0] - '0';
669
518
    } else {
670
217
        *result = PARSE_FAILURE;
671
217
        return 0;
672
217
    }
673
1.51k
    for (int i = 1; i < len; ++i) {
674
1.45k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
999
            T digit = s[i] - '0';
676
999
            val = val * 10 + digit;
677
999
        } else {
678
456
            if constexpr (enable_strict_mode) {
679
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
400
                    *result = PARSE_FAILURE;
681
400
                    return 0;
682
400
                }
683
            } else {
684
                // Save original position where non-digit was found
685
                int remaining_len = len - i;
686
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
                if ((UNLIKELY(remaining_len != 0 &&
690
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
                    *result = PARSE_FAILURE;
692
                    return 0;
693
                }
694
            }
695
56
            *result = PARSE_SUCCESS;
696
456
            return val;
697
456
        }
698
1.45k
    }
699
62
    *result = PARSE_SUCCESS;
700
62
    return val;
701
518
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
752
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
752
    T val = 0;
662
752
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
752
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
512
        val = s[0] - '0';
669
512
    } else {
670
240
        *result = PARSE_FAILURE;
671
240
        return 0;
672
240
    }
673
1.49k
    for (int i = 1; i < len; ++i) {
674
1.44k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
986
            T digit = s[i] - '0';
676
986
            val = val * 10 + digit;
677
986
        } else {
678
456
            if constexpr (enable_strict_mode) {
679
456
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
400
                    *result = PARSE_FAILURE;
681
400
                    return 0;
682
400
                }
683
            } else {
684
                // Save original position where non-digit was found
685
                int remaining_len = len - i;
686
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
                if ((UNLIKELY(remaining_len != 0 &&
690
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
                    *result = PARSE_FAILURE;
692
                    return 0;
693
                }
694
            }
695
56
            *result = PARSE_SUCCESS;
696
456
            return val;
697
456
        }
698
1.44k
    }
699
56
    *result = PARSE_SUCCESS;
700
56
    return val;
701
512
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
660
4
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
661
4
    T val = 0;
662
4
    if (UNLIKELY(len == 0)) {
663
0
        *result = PARSE_SUCCESS;
664
0
        return val;
665
0
    }
666
    // Factor out the first char for error handling speeds up the loop.
667
4
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
668
4
        val = s[0] - '0';
669
4
    } else {
670
0
        *result = PARSE_FAILURE;
671
0
        return 0;
672
0
    }
673
4
    for (int i = 1; i < len; ++i) {
674
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
675
0
            T digit = s[i] - '0';
676
0
            val = val * 10 + digit;
677
0
        } else {
678
            if constexpr (enable_strict_mode) {
679
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
680
                    *result = PARSE_FAILURE;
681
                    return 0;
682
                }
683
0
            } else {
684
                // Save original position where non-digit was found
685
0
                int remaining_len = len - i;
686
0
                const char* remaining_s = s + i;
687
                // Skip trailing whitespaces from the remaining portion
688
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
689
0
                if ((UNLIKELY(remaining_len != 0 &&
690
0
                              !is_float_suffix(remaining_s, remaining_len)))) {
691
0
                    *result = PARSE_FAILURE;
692
0
                    return 0;
693
0
                }
694
0
            }
695
0
            *result = PARSE_SUCCESS;
696
0
            return val;
697
0
        }
698
0
    }
699
4
    *result = PARSE_SUCCESS;
700
4
    return val;
701
4
}
702
703
// at least the first char(if any) must be a digit.
704
template <typename T>
705
T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
706
136k
                                                  ParseResult* result) {
707
136k
    T val = 0;
708
136k
    if (max_len == 0) [[unlikely]] {
709
135k
        *result = PARSE_SUCCESS;
710
135k
        return val;
711
135k
    }
712
    // Factor out the first char for error handling speeds up the loop.
713
1.13k
    if (is_numeric_ascii(s[0])) [[likely]] {
714
1.13k
        val = s[0] - '0';
715
1.13k
    } else {
716
0
        *result = PARSE_FAILURE;
717
0
        return 0;
718
0
    }
719
5.11k
    for (int i = 1; i < max_len; ++i) {
720
3.97k
        if (is_numeric_ascii(s[i])) [[likely]] {
721
3.97k
            T digit = s[i] - '0';
722
3.97k
            val = val * 10 + digit;
723
3.97k
        } else {
724
            // 123abc, return 123
725
0
            *result = PARSE_SUCCESS;
726
0
            return val;
727
0
        }
728
3.97k
    }
729
1.13k
    *result = PARSE_SUCCESS;
730
1.13k
    return val;
731
1.13k
}
732
733
template <typename T>
734
153k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
735
153k
    int i = 0;
736
    // skip leading spaces
737
153k
    for (; i < len; ++i) {
738
153k
        if (!is_whitespace_ascii(s[i])) {
739
153k
            break;
740
153k
        }
741
153k
    }
742
743
    // skip back spaces
744
153k
    int j = len - 1;
745
153k
    for (; j >= i; j--) {
746
153k
        if (!is_whitespace_ascii(s[j])) {
747
153k
            break;
748
153k
        }
749
153k
    }
750
751
    // skip leading '+', from_chars can handle '-'
752
153k
    if (i < len && s[i] == '+') {
753
7.08k
        i++;
754
        // ++ or +- are not valid, but the first + is already skipped,
755
        // if don't check here, from_chars will succeed.
756
        //
757
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
758
        // which may avoid this extra check here.
759
        // e.g.:
760
        // fast_float::chars_format format =
761
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
762
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
763
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
764
20
            *result = PARSE_FAILURE;
765
20
            return 0;
766
20
        }
767
7.08k
    }
768
153k
    if (UNLIKELY(i > j)) {
769
32
        *result = PARSE_FAILURE;
770
32
        return 0;
771
32
    }
772
773
    // Use double here to not lose precision while accumulating the result
774
153k
    double val = 0;
775
153k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
776
777
153k
    if (res.ptr == s + j + 1) {
778
148k
        *result = PARSE_SUCCESS;
779
148k
        return val;
780
148k
    } else {
781
4.61k
        *result = PARSE_FAILURE;
782
4.61k
    }
783
4.61k
    return 0;
784
153k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
734
87.9k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
735
87.9k
    int i = 0;
736
    // skip leading spaces
737
87.9k
    for (; i < len; ++i) {
738
87.9k
        if (!is_whitespace_ascii(s[i])) {
739
87.9k
            break;
740
87.9k
        }
741
87.9k
    }
742
743
    // skip back spaces
744
87.9k
    int j = len - 1;
745
87.9k
    for (; j >= i; j--) {
746
87.9k
        if (!is_whitespace_ascii(s[j])) {
747
87.9k
            break;
748
87.9k
        }
749
87.9k
    }
750
751
    // skip leading '+', from_chars can handle '-'
752
87.9k
    if (i < len && s[i] == '+') {
753
3.54k
        i++;
754
        // ++ or +- are not valid, but the first + is already skipped,
755
        // if don't check here, from_chars will succeed.
756
        //
757
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
758
        // which may avoid this extra check here.
759
        // e.g.:
760
        // fast_float::chars_format format =
761
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
762
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
763
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
764
10
            *result = PARSE_FAILURE;
765
10
            return 0;
766
10
        }
767
3.54k
    }
768
87.9k
    if (UNLIKELY(i > j)) {
769
18
        *result = PARSE_FAILURE;
770
18
        return 0;
771
18
    }
772
773
    // Use double here to not lose precision while accumulating the result
774
87.9k
    double val = 0;
775
87.9k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
776
777
87.9k
    if (res.ptr == s + j + 1) {
778
85.6k
        *result = PARSE_SUCCESS;
779
85.6k
        return val;
780
85.6k
    } else {
781
2.32k
        *result = PARSE_FAILURE;
782
2.32k
    }
783
2.32k
    return 0;
784
87.9k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
734
65.4k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
735
65.4k
    int i = 0;
736
    // skip leading spaces
737
65.4k
    for (; i < len; ++i) {
738
65.3k
        if (!is_whitespace_ascii(s[i])) {
739
65.3k
            break;
740
65.3k
        }
741
65.3k
    }
742
743
    // skip back spaces
744
65.4k
    int j = len - 1;
745
65.4k
    for (; j >= i; j--) {
746
65.3k
        if (!is_whitespace_ascii(s[j])) {
747
65.3k
            break;
748
65.3k
        }
749
65.3k
    }
750
751
    // skip leading '+', from_chars can handle '-'
752
65.4k
    if (i < len && s[i] == '+') {
753
3.54k
        i++;
754
        // ++ or +- are not valid, but the first + is already skipped,
755
        // if don't check here, from_chars will succeed.
756
        //
757
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
758
        // which may avoid this extra check here.
759
        // e.g.:
760
        // fast_float::chars_format format =
761
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
762
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
763
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
764
10
            *result = PARSE_FAILURE;
765
10
            return 0;
766
10
        }
767
3.54k
    }
768
65.4k
    if (UNLIKELY(i > j)) {
769
14
        *result = PARSE_FAILURE;
770
14
        return 0;
771
14
    }
772
773
    // Use double here to not lose precision while accumulating the result
774
65.3k
    double val = 0;
775
65.3k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
776
777
65.3k
    if (res.ptr == s + j + 1) {
778
63.1k
        *result = PARSE_SUCCESS;
779
63.1k
        return val;
780
63.1k
    } else {
781
2.28k
        *result = PARSE_FAILURE;
782
2.28k
    }
783
2.28k
    return 0;
784
65.3k
}
785
786
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
787
11.3k
                                                  ParseResult* result) {
788
11.3k
    *result = PARSE_SUCCESS;
789
790
11.3k
    if (len == 1) {
791
2.66k
        if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
792
333
            return true;
793
333
        }
794
2.32k
        if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
795
934
            return false;
796
934
        }
797
1.39k
        *result = PARSE_FAILURE;
798
1.39k
        return false;
799
2.32k
    }
800
801
8.71k
    if (len == 2) {
802
975
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
803
10
            return true;
804
10
        }
805
965
        if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
806
9
            return false;
807
9
        }
808
965
    }
809
810
8.69k
    if (len == 3) {
811
42
        if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
812
42
            (s[2] == 's' || s[2] == 'S')) {
813
10
            return true;
814
10
        }
815
32
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
816
32
            (s[2] == 'f' || s[2] == 'F')) {
817
9
            return false;
818
9
        }
819
32
    }
820
821
8.67k
    if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
822
8.67k
        (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
823
3.38k
        return true;
824
3.38k
    }
825
826
5.29k
    if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
827
5.29k
        (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
828
5.29k
        (s[4] == 'e' || s[4] == 'E')) {
829
3.42k
        return false;
830
3.42k
    }
831
832
    // No valid boolean value found
833
1.87k
    *result = PARSE_FAILURE;
834
1.87k
    return false;
835
5.29k
}
836
#include "common/compile_check_avoid_end.h"
837
} // end namespace doris