Coverage Report

Created: 2026-03-24 20:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/string_parser.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
#include <sys/types.h>
27
28
#include <algorithm>
29
#include <cstdlib>
30
// IWYU pragma: no_include <bits/std_abs.h>
31
#include <cmath> // IWYU pragma: keep
32
#include <cstdint>
33
#include <limits>
34
#include <map>
35
#include <string>
36
#include <type_traits>
37
#include <utility>
38
39
#include "common/compiler_util.h" // IWYU pragma: keep
40
#include "common/status.h"
41
#include "core/data_type/number_traits.h"
42
#include "core/data_type/primitive_type.h"
43
#include "core/extended_types.h"
44
#include "core/value/large_int_value.h"
45
#include "exec/common/int_exp.h"
46
#include "exec/common/string_utils/string_utils.h"
47
48
namespace doris {
49
#include "common/compile_check_avoid_begin.h"
50
template <DecimalNativeTypeConcept T>
51
struct Decimal;
52
53
// they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not.
54
#ifndef SET_PARAMS_RET_FALSE_IFN
55
#define SET_PARAMS_RET_FALSE_IFN(stmt, ...)                           \
56
7.87M
    do {                                                              \
57
7.87M
        if (!(stmt)) [[unlikely]] {                                   \
58
72.2k
            if constexpr (IsStrict) {                                 \
59
246
                params.status = Status::InvalidArgument(__VA_ARGS__); \
60
246
            }                                                         \
61
72.2k
            return false;                                             \
62
72.2k
        }                                                             \
63
7.87M
    } while (false)
64
#endif
65
66
#ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION
67
#define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \
68
314
    do {                                          \
69
314
        try {                                     \
70
314
            { stmt; }                             \
71
314
        } catch (const doris::Exception& e) {     \
72
30
            if constexpr (IsStrict) {             \
73
10
                params.status = e.to_status();    \
74
10
            }                                     \
75
30
            return false;                         \
76
30
        }                                         \
77
314
    } while (false)
78
#endif
79
80
// skip leading and trailing ascii whitespaces,
81
// return the pointer to the first non-whitespace char,
82
// and update the len to the new length, which does not include
83
// leading and trailing whitespaces
84
template <typename T>
85
1.08M
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
2.02M
    while (len > 0 && is_whitespace_ascii(*s)) {
87
939k
        ++s;
88
939k
        --len;
89
939k
    }
90
91
2.02M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
931k
        --len;
93
931k
    }
94
95
1.08M
    return s;
96
1.08M
}
_ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_
Line
Count
Source
85
1.03M
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
1.81M
    while (len > 0 && is_whitespace_ascii(*s)) {
87
787k
        ++s;
88
787k
        --len;
89
787k
    }
90
91
1.81M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
780k
        --len;
93
780k
    }
94
95
1.03M
    return s;
96
1.03M
}
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_
Line
Count
Source
85
2.74k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
9.80k
    while (len > 0 && is_whitespace_ascii(*s)) {
87
7.05k
        ++s;
88
7.05k
        --len;
89
7.05k
    }
90
91
9.80k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
7.05k
        --len;
93
7.05k
    }
94
95
2.74k
    return s;
96
2.74k
}
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_
Line
Count
Source
85
55.6k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
86
200k
    while (len > 0 && is_whitespace_ascii(*s)) {
87
144k
        ++s;
88
144k
        --len;
89
144k
    }
90
91
199k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
92
144k
        --len;
93
144k
    }
94
95
55.6k
    return s;
96
55.6k
}
97
98
template <typename T>
99
107k
inline const char* skip_leading_whitespace(const char* __restrict s, T& len) {
100
315k
    while (len > 0 && is_whitespace_ascii(*s)) {
101
207k
        ++s;
102
207k
        --len;
103
207k
    }
104
105
107k
    return s;
106
107k
}
107
108
// skip trailing ascii whitespaces,
109
// return the pointer to the first char,
110
// and update the len to the new length, which does not include
111
// trailing whitespaces
112
template <typename T>
113
88.6k
inline const char* skip_trailing_whitespaces(const char* s, T& len) {
114
320k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
115
231k
        --len;
116
231k
    }
117
118
88.6k
    return s;
119
88.6k
}
120
121
template <bool (*Pred)(char)>
122
874k
bool range_suite(const char* s, const char* end) {
123
874k
    return std::ranges::all_of(s, end, Pred);
124
874k
}
_ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_
Line
Count
Source
122
869k
bool range_suite(const char* s, const char* end) {
123
869k
    return std::ranges::all_of(s, end, Pred);
124
869k
}
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_
Line
Count
Source
122
4.57k
bool range_suite(const char* s, const char* end) {
123
4.57k
    return std::ranges::all_of(s, end, Pred);
124
4.57k
}
125
126
inline auto is_digit_range = range_suite<is_numeric_ascii>;
127
inline auto is_space_range = range_suite<is_whitespace_ascii>;
128
129
// combine in_bound and range_suite is ok. won't lead to duplicated calculation.
130
925k
inline bool in_bound(const char* s, const char* end, size_t offset) {
131
925k
    if (s + offset >= end) [[unlikely]] {
132
6.24k
        return false;
133
6.24k
    }
134
919k
    return true;
135
925k
}
136
137
// LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
138
// if need result, use StringRef{origin_s, s} outside
139
template <int LEN, bool (*Pred)(char)>
140
3.01M
bool skip_qualified_char(const char*& s, const char* end) {
141
3.01M
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
2.39M
        while (s != end && Pred(*s)) {
144
1.38M
            ++s;
145
1.38M
        }
146
1.99M
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
3.97M
        for (int i = 0; i < LEN; ++i, ++s) {
149
1.99M
            if (s == end || !Pred(*s)) [[unlikely]] {
150
21.6k
                return false;
151
21.6k
            }
152
1.99M
        }
153
1.99M
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
108
        int count = 0;
156
720
        while (s != end && Pred(*s)) {
157
612
            ++s;
158
612
            ++count;
159
612
        }
160
108
        if (count < -LEN) [[unlikely]] {
161
0
            return false;
162
0
        }
163
108
    }
164
1.97M
    return true;
165
3.01M
}
_ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
140
397k
bool skip_qualified_char(const char*& s, const char* end) {
141
397k
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
403k
        while (s != end && Pred(*s)) {
144
6.06k
            ++s;
145
6.06k
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
397k
    return true;
165
397k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_
Line
Count
Source
140
616k
bool skip_qualified_char(const char*& s, const char* end) {
141
616k
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
1.99M
        while (s != end && Pred(*s)) {
144
1.38M
            ++s;
145
1.38M
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
616k
    return true;
165
616k
}
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_
Line
Count
Source
140
108
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
        for (int i = 0; i < LEN; ++i, ++s) {
149
            if (s == end || !Pred(*s)) [[unlikely]] {
150
                return false;
151
            }
152
        }
153
108
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
108
        int count = 0;
156
720
        while (s != end && Pred(*s)) {
157
612
            ++s;
158
612
            ++count;
159
612
        }
160
108
        if (count < -LEN) [[unlikely]] {
161
0
            return false;
162
0
        }
163
108
    }
164
108
    return true;
165
108
}
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_
_ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_
Line
Count
Source
140
70.6k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
70.6k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
119k
        for (int i = 0; i < LEN; ++i, ++s) {
149
70.6k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
21.3k
                return false;
151
21.3k
            }
152
70.6k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
49.2k
    return true;
165
70.6k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_
Line
Count
Source
140
352k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
352k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
704k
        for (int i = 0; i < LEN; ++i, ++s) {
149
352k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
96
                return false;
151
96
            }
152
352k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
352k
    return true;
165
352k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_
Line
Count
Source
140
878k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
878k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
1.75M
        for (int i = 0; i < LEN; ++i, ++s) {
149
878k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
84
                return false;
151
84
            }
152
878k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
878k
    return true;
165
878k
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_
Line
Count
Source
140
694k
bool skip_qualified_char(const char*& s, const char* end) {
141
    if constexpr (LEN == 0) {
142
        // Consume any length of characters that match the predicate.
143
        while (s != end && Pred(*s)) {
144
            ++s;
145
        }
146
694k
    } else if constexpr (LEN > 0) {
147
        // Consume exactly LEN characters that match the predicate.
148
1.38M
        for (int i = 0; i < LEN; ++i, ++s) {
149
694k
            if (s == end || !Pred(*s)) [[unlikely]] {
150
48
                return false;
151
48
            }
152
694k
        }
153
    } else { // LEN < 0
154
        // Consume at least -LEN characters that match the predicate.
155
        int count = 0;
156
        while (s != end && Pred(*s)) {
157
            ++s;
158
            ++count;
159
        }
160
        if (count < -LEN) [[unlikely]] {
161
            return false;
162
        }
163
    }
164
694k
    return true;
165
694k
}
166
167
inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
168
inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
169
inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
170
inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
171
inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;
172
173
352k
inline bool is_delimiter(char c) {
174
352k
    return c == ' ' || c == 'T' || c == ':';
175
352k
}
176
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
177
178
1.32M
inline bool is_date_sep(char c) {
179
1.32M
    return c == '-' || c == '/';
180
1.32M
}
181
inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>;
182
183
694k
inline bool is_colon(char c) {
184
694k
    return c == ':';
185
694k
}
186
inline auto consume_one_colon = skip_qualified_char<1, is_colon>;
187
188
// only consume a string of digit, not include sign.
189
// when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
190
// LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
191
template <typename T, int LEN = 0, int MAX_LEN = -1>
192
40
bool consume_digit(const char*& s, const char* end, T& out) {
193
40
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
40
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
40
        out = 0;
215
170
        for (int i = 0; i < LEN; ++i, ++s) {
216
130
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
130
            out = out * 10 + (*s - '0');
220
130
        }
221
40
    }
222
40
    return true;
223
40
}
_ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_
Line
Count
Source
192
30
bool consume_digit(const char*& s, const char* end, T& out) {
193
30
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
30
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
30
        out = 0;
215
150
        for (int i = 0; i < LEN; ++i, ++s) {
216
120
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
120
            out = out * 10 + (*s - '0');
220
120
        }
221
30
    }
222
30
    return true;
223
30
}
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_
Line
Count
Source
192
10
bool consume_digit(const char*& s, const char* end, T& out) {
193
10
    static_assert(LEN >= 0);
194
    if constexpr (MAX_LEN > 0) {
195
        out = 0;
196
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
197
            if (s == end || !is_numeric_ascii(*s)) {
198
                if (i < LEN) [[unlikely]] {
199
                    return false;
200
                }
201
                break; // stop consuming if we have consumed enough digits.
202
            }
203
            out = out * 10 + (*s - '0');
204
        }
205
    } else if constexpr (LEN == 0) {
206
        // Consume any length of digits.
207
        out = 0;
208
        while (s != end && is_numeric_ascii(*s)) {
209
            out = out * 10 + (*s - '0');
210
            ++s;
211
        }
212
10
    } else if constexpr (LEN > 0) {
213
        // Consume exactly LEN digits.
214
10
        out = 0;
215
20
        for (int i = 0; i < LEN; ++i, ++s) {
216
10
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
217
0
                return false;
218
0
            }
219
10
            out = out * 10 + (*s - '0');
220
10
        }
221
10
    }
222
10
    return true;
223
10
}
224
225
// specialized version for 2 digits, which is used very often in date/time parsing.
226
template <>
227
1.03M
inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) {
228
1.03M
    out = 0;
229
1.03M
    if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1)))
230
36.4k
            [[unlikely]] {
231
36.4k
        return false;
232
36.4k
    }
233
1.00M
    out = (s[0] - '0') * 10 + (s[1] - '0');
234
1.00M
    s += 2; // consume 2 digits
235
1.00M
    return true;
236
1.03M
}
237
238
// specialized version for 1 or 2 digits, which is used very often in date/time parsing.
239
template <>
240
1.97M
inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) {
241
1.97M
    out = 0;
242
1.97M
    if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
243
960
        return false;
244
1.97M
    } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) {
245
        // consume 2 digits
246
1.94M
        out = (*s - '0') * 10 + (*(s + 1) - '0');
247
1.94M
        s += 2;
248
1.94M
    } else {
249
        // consume 1 digit
250
33.3k
        out = *s - '0';
251
33.3k
        ++s;
252
33.3k
    }
253
1.97M
    return true;
254
1.97M
}
255
256
template <bool (*Pred)(char)>
257
296
uint32_t count_valid_length(const char* s, const char* end) {
258
296
    DCHECK(s <= end) << "s: " << s << ", end: " << end;
259
296
    uint32_t count = 0;
260
898
    while (s != end && Pred(*s)) {
261
602
        ++count;
262
602
        ++s;
263
602
    }
264
296
    return count;
265
296
}
266
267
inline auto count_digits = count_valid_length<is_numeric_ascii>;
268
269
272
inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
270
272
    std::string result(6, '0');
271
272
    result[0] = sign;
272
272
    result[1] = '0' + (hour_offset / 10);
273
272
    result[2] = '0' + (hour_offset % 10);
274
272
    result[3] = ':';
275
272
    result[4] = '0' + (minute_offset / 10);
276
272
    result[5] = '0' + (minute_offset % 10);
277
272
    DCHECK_EQ(result.size(), 6);
278
272
    return result;
279
272
}
280
281
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
282
// this is significantly faster than libc (atoi/strtol and atof/strtod).
283
//
284
// Strings with leading and trailing whitespaces are accepted.
285
// Branching is heavily optimized for the non-whitespace successful case.
286
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
287
// If that first attempt was unsuccessful, these functions retry the parsing after removing
288
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
289
//
290
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
291
// data type.  This is different from hive, which returns NULL for overflow slots for int types
292
// and inf/-inf for float types.
293
//
294
// Things we tried that did not work:
295
//  - lookup table for converting character to digit
296
// Improvements (TODO):
297
//  - Validate input using _simd_compare_ranges
298
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
299
class StringParser {
300
public:
301
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
302
303
    template <typename T>
304
969k
    static T numeric_limits(bool negative) {
305
969k
        if constexpr (std::is_same_v<T, __int128>) {
306
96.7k
            return negative ? MIN_INT128 : MAX_INT128;
307
873k
        } else {
308
873k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
873k
        }
310
969k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
304
96.7k
    static T numeric_limits(bool negative) {
305
96.7k
        if constexpr (std::is_same_v<T, __int128>) {
306
96.7k
            return negative ? MIN_INT128 : MAX_INT128;
307
        } else {
308
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
        }
310
96.7k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
304
331k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
331k
        } else {
308
331k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
331k
        }
310
331k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
304
153k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
153k
        } else {
308
153k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
153k
        }
310
153k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
304
211k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
211k
        } else {
308
211k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
211k
        }
310
211k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
304
175k
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
175k
        } else {
308
175k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
175k
        }
310
175k
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
304
294
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
294
        } else {
308
294
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
294
        }
310
294
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
304
42
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
42
        } else {
308
42
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
42
        }
310
42
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
304
8
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
8
        } else {
308
8
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
8
        }
310
8
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
304
8
    static T numeric_limits(bool negative) {
305
        if constexpr (std::is_same_v<T, __int128>) {
306
            return negative ? MIN_INT128 : MAX_INT128;
307
8
        } else {
308
8
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
309
8
        }
310
8
    }
311
312
    template <typename T>
313
1.74M
    static T get_scale_multiplier(int scale) {
314
1.74M
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
1.74M
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
1.74M
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
1.74M
        if constexpr (std::is_same_v<T, int32_t>) {
318
266k
            return common::exp10_i32(scale);
319
365k
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
365k
            return common::exp10_i64(scale);
321
475k
        } else if constexpr (std::is_same_v<T, __int128>) {
322
475k
            return common::exp10_i128(scale);
323
640k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
640k
            return common::exp10_i256(scale);
325
640k
        }
326
1.74M
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
313
266k
    static T get_scale_multiplier(int scale) {
314
266k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
266k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
266k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
266k
        if constexpr (std::is_same_v<T, int32_t>) {
318
266k
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
266k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
313
365k
    static T get_scale_multiplier(int scale) {
314
365k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
365k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
365k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
365k
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
365k
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
365k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
313
475k
    static T get_scale_multiplier(int scale) {
314
475k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
475k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
475k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
475k
        } else if constexpr (std::is_same_v<T, __int128>) {
322
475k
            return common::exp10_i128(scale);
323
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
            return common::exp10_i256(scale);
325
        }
326
475k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
313
640k
    static T get_scale_multiplier(int scale) {
314
640k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
315
640k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
316
640k
                      "You can only instantiate as int32_t, int64_t, __int128.");
317
        if constexpr (std::is_same_v<T, int32_t>) {
318
            return common::exp10_i32(scale);
319
        } else if constexpr (std::is_same_v<T, int64_t>) {
320
            return common::exp10_i64(scale);
321
        } else if constexpr (std::is_same_v<T, __int128>) {
322
            return common::exp10_i128(scale);
323
640k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
324
640k
            return common::exp10_i256(scale);
325
640k
        }
326
640k
    }
327
328
    // This is considerably faster than glibc's implementation (25x).
329
    // Assumes s represents a decimal number.
330
    template <typename T, bool enable_strict_mode = false>
331
783k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
783k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
783k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
675k
            return ans;
335
675k
        }
336
107k
        s = skip_leading_whitespace(s, len);
337
107k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
783k
    }
_ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
90.5k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
90.5k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
90.5k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
87.9k
            return ans;
335
87.9k
        }
336
2.67k
        s = skip_leading_whitespace(s, len);
337
2.67k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
90.5k
    }
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
190k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
190k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
190k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
132k
            return ans;
335
132k
        }
336
58.4k
        s = skip_leading_whitespace(s, len);
337
58.4k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
190k
    }
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
2.00k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
2.00k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
2.00k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
176
            return ans;
335
176
        }
336
1.82k
        s = skip_leading_whitespace(s, len);
337
1.82k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
2.00k
    }
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
132k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
132k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
132k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
116k
            return ans;
335
116k
        }
336
16.2k
        s = skip_leading_whitespace(s, len);
337
16.2k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
132k
    }
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
1.96k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
1.96k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
1.96k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
176
            return ans;
335
176
        }
336
1.79k
        s = skip_leading_whitespace(s, len);
337
1.79k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
1.96k
    }
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
197k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
197k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
197k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
185k
            return ans;
335
185k
        }
336
12.6k
        s = skip_leading_whitespace(s, len);
337
12.6k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
197k
    }
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
1.93k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
1.93k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
1.93k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
176
            return ans;
335
176
        }
336
1.76k
        s = skip_leading_whitespace(s, len);
337
1.76k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
1.93k
    }
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
162k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
162k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
162k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
152k
            return ans;
335
152k
        }
336
9.00k
        s = skip_leading_whitespace(s, len);
337
9.00k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
162k
    }
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
1.92k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
1.92k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
1.92k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
188
            return ans;
335
188
        }
336
1.73k
        s = skip_leading_whitespace(s, len);
337
1.73k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
1.92k
    }
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
1.87k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
1.87k
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
1.87k
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
176
            return ans;
335
176
        }
336
1.69k
        s = skip_leading_whitespace(s, len);
337
1.69k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
1.87k
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
40
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
40
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
40
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
40
            return ans;
335
40
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
40
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
8
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
8
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
8
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
8
            return ans;
335
8
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
8
    }
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
331
8
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
332
8
        T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result);
333
8
        if (LIKELY(*result == PARSE_SUCCESS)) {
334
8
            return ans;
335
8
        }
336
0
        s = skip_leading_whitespace(s, len);
337
0
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
338
8
    }
339
340
    // This is considerably faster than glibc's implementation.
341
    // In the case of overflow, the max/min value for the data type will be returned.
342
    // Assumes s represents a decimal number.
343
    template <typename T>
344
2.74k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
2.74k
        s = skip_ascii_whitespaces(s, len);
346
2.74k
        return string_to_unsigned_int_internal<T>(s, len, result);
347
2.74k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
686
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
686
        s = skip_ascii_whitespaces(s, len);
346
686
        return string_to_unsigned_int_internal<T>(s, len, result);
347
686
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
686
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
686
        s = skip_ascii_whitespaces(s, len);
346
686
        return string_to_unsigned_int_internal<T>(s, len, result);
347
686
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
686
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
686
        s = skip_ascii_whitespaces(s, len);
346
686
        return string_to_unsigned_int_internal<T>(s, len, result);
347
686
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
344
686
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
345
686
        s = skip_ascii_whitespaces(s, len);
346
686
        return string_to_unsigned_int_internal<T>(s, len, result);
347
686
    }
348
349
    // Convert a string s representing a number in given base into a decimal number.
350
    template <typename T>
351
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
352
55.6k
                                  ParseResult* result) {
353
55.6k
        s = skip_ascii_whitespaces(s, len);
354
55.6k
        return string_to_int_internal<T>(s, len, base, result);
355
55.6k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
52.9k
                                  ParseResult* result) {
353
52.9k
        s = skip_ascii_whitespaces(s, len);
354
52.9k
        return string_to_int_internal<T>(s, len, base, result);
355
52.9k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
980
                                  ParseResult* result) {
353
980
        s = skip_ascii_whitespaces(s, len);
354
980
        return string_to_int_internal<T>(s, len, base, result);
355
980
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
882
                                  ParseResult* result) {
353
882
        s = skip_ascii_whitespaces(s, len);
354
882
        return string_to_int_internal<T>(s, len, base, result);
355
882
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
882
                                  ParseResult* result) {
353
882
        s = skip_ascii_whitespaces(s, len);
354
882
        return string_to_int_internal<T>(s, len, base, result);
355
882
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
352
2
                                  ParseResult* result) {
353
2
        s = skip_ascii_whitespaces(s, len);
354
2
        return string_to_int_internal<T>(s, len, base, result);
355
2
    }
356
357
    template <typename T>
358
305k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
305k
        s = skip_ascii_whitespaces(s, len);
360
305k
        return string_to_float_internal<T>(s, len, result);
361
305k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
358
175k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
175k
        s = skip_ascii_whitespaces(s, len);
360
175k
        return string_to_float_internal<T>(s, len, result);
361
175k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
358
130k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
359
130k
        s = skip_ascii_whitespaces(s, len);
360
130k
        return string_to_float_internal<T>(s, len, result);
361
130k
    }
362
363
    // Parses a string for 'true' or 'false', case insensitive.
364
22.7k
    static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
365
22.7k
        s = skip_ascii_whitespaces(s, len);
366
22.7k
        return string_to_bool_internal(s, len, result);
367
22.7k
    }
368
369
    template <PrimitiveType P>
370
    static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
371
            const char* __restrict s, size_t len, int type_precision, int type_scale,
372
            ParseResult* result);
373
374
    template <typename T>
375
    static Status split_string_to_map(const std::string& base, const T element_separator,
376
                                      const T key_value_separator,
377
                                      std::map<std::string, std::string>* result) {
378
        int key_pos = 0;
379
        int key_end;
380
        int val_pos;
381
        int val_end;
382
383
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
384
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
385
                std::string::npos) {
386
                break;
387
            }
388
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
389
                val_end = base.size();
390
            }
391
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
392
                                          base.substr(val_pos, val_end - val_pos)));
393
            key_pos = val_end;
394
            if (key_pos != std::string::npos) {
395
                ++key_pos;
396
            }
397
        }
398
399
        return Status::OK();
400
    }
401
402
    // This is considerably faster than glibc's implementation.
403
    // In the case of overflow, the max/min value for the data type will be returned.
404
    // Assumes s represents a decimal number.
405
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
406
    template <typename T, bool enable_strict_mode = false>
407
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
408
409
    // This is considerably faster than glibc's implementation.
410
    // In the case of overflow, the max/min value for the data type will be returned.
411
    // Assumes s represents a decimal number.
412
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
413
    template <typename T>
414
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
415
                                                    ParseResult* result);
416
417
    // Convert a string s representing a number in given base into a decimal number.
418
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
419
    template <typename T>
420
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
421
                                           ParseResult* result);
422
423
    // Converts an ascii string to an integer of type T assuming it cannot overflow
424
    // and the number is positive.
425
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
426
    template <typename T, bool enable_strict_mode = false>
427
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
428
                                              ParseResult* result);
429
430
    // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
431
    // char is not a digit.
432
    template <typename T>
433
    static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
434
                                                      ParseResult* result);
435
436
    // This is considerably faster than glibc's implementation (>100x why???)
437
    // No special case handling needs to be done for overflows, the floating point spec
438
    // already does it and will cap the values to -inf/inf
439
    // To avoid inaccurate conversions this function falls back to strtod for
440
    // scientific notation.
441
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
442
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
443
    template <typename T>
444
    static inline T string_to_float_internal(const char* __restrict s, int len,
445
                                             ParseResult* result);
446
447
    // parses a string for 'true' or 'false', case insensitive
448
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
449
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
450
                                               ParseResult* result);
451
452
    // Returns true if s only contains whitespace.
453
7.09k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
454
12.8k
        for (int i = 0; i < len; ++i) {
455
12.0k
            if (!LIKELY(is_whitespace_ascii(s[i]))) {
456
6.21k
                return false;
457
6.21k
            }
458
12.0k
        }
459
880
        return true;
460
7.09k
    }
461
462
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
463
7.31k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
464
7.31k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
465
7.31k
    }
466
467
5.34k
    static inline bool is_all_digit(const char* __restrict s, int len) {
468
11.1k
        for (int i = 0; i < len; ++i) {
469
6.10k
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
470
302
                return false;
471
302
            }
472
6.10k
        }
473
5.04k
        return true;
474
5.34k
    }
475
}; // end of class StringParser
476
477
template <typename T, bool enable_strict_mode>
478
892k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
892k
    if (UNLIKELY(len <= 0)) {
480
4.61k
        *result = PARSE_FAILURE;
481
4.61k
        return 0;
482
4.61k
    }
483
484
887k
    using UnsignedT = MakeUnsignedT<T>;
485
887k
    UnsignedT val = 0;
486
887k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
887k
    bool negative = false;
488
887k
    int i = 0;
489
887k
    switch (*s) {
490
205k
    case '-':
491
205k
        negative = true;
492
205k
        max_val += 1;
493
205k
        [[fallthrough]];
494
212k
    case '+':
495
212k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
212k
        if (UNLIKELY(len == 1)) {
498
18
            *result = PARSE_FAILURE;
499
18
            return 0;
500
18
        }
501
887k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
887k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
555k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
555k
        return static_cast<T>(negative ? -val : val);
507
555k
    }
508
509
332k
    const T max_div_10 = max_val / 10;
510
332k
    const T max_mod_10 = max_val % 10;
511
512
332k
    int first = i;
513
3.37M
    for (; i < len; ++i) {
514
3.23M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
3.14M
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
3.14M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
96.8k
                *result = PARSE_OVERFLOW;
519
96.8k
                return negative ? -max_val : max_val;
520
96.8k
            }
521
3.04M
            val = val * 10 + digit;
522
3.04M
        } else {
523
91.7k
            if constexpr (enable_strict_mode) {
524
8.16k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
7.56k
                    *result = PARSE_FAILURE;
527
7.56k
                    return 0;
528
7.56k
                }
529
83.6k
            } else {
530
                // Save original position where non-digit was found
531
83.6k
                int remaining_len = len - i;
532
83.6k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
83.6k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
83.6k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
83.6k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
57.8k
                    *result = PARSE_FAILURE;
540
57.8k
                    return 0;
541
57.8k
                }
542
83.6k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
26.3k
            *result = PARSE_SUCCESS;
545
91.7k
            return static_cast<T>(negative ? -val : val);
546
91.7k
        }
547
3.23M
    }
548
143k
    *result = PARSE_SUCCESS;
549
143k
    return static_cast<T>(negative ? -val : val);
550
332k
}
_ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
93.2k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
93.2k
    if (UNLIKELY(len <= 0)) {
480
88
        *result = PARSE_FAILURE;
481
88
        return 0;
482
88
    }
483
484
93.1k
    using UnsignedT = MakeUnsignedT<T>;
485
93.1k
    UnsignedT val = 0;
486
93.1k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
93.1k
    bool negative = false;
488
93.1k
    int i = 0;
489
93.1k
    switch (*s) {
490
7.09k
    case '-':
491
7.09k
        negative = true;
492
7.09k
        max_val += 1;
493
7.09k
        [[fallthrough]];
494
7.65k
    case '+':
495
7.65k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
7.65k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
93.1k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
93.1k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
83.8k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
83.8k
        return static_cast<T>(negative ? -val : val);
507
83.8k
    }
508
509
9.30k
    const T max_div_10 = max_val / 10;
510
9.30k
    const T max_mod_10 = max_val % 10;
511
512
9.30k
    int first = i;
513
345k
    for (; i < len; ++i) {
514
338k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
337k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
337k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.02k
                *result = PARSE_OVERFLOW;
519
1.02k
                return negative ? -max_val : max_val;
520
1.02k
            }
521
336k
            val = val * 10 + digit;
522
336k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
1.07k
            } else {
530
                // Save original position where non-digit was found
531
1.07k
                int remaining_len = len - i;
532
1.07k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
1.07k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
1.07k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
1.07k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
752
                    *result = PARSE_FAILURE;
540
752
                    return 0;
541
752
                }
542
1.07k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
320
            *result = PARSE_SUCCESS;
545
1.07k
            return static_cast<T>(negative ? -val : val);
546
1.07k
        }
547
338k
    }
548
7.21k
    *result = PARSE_SUCCESS;
549
7.21k
    return static_cast<T>(negative ? -val : val);
550
9.30k
}
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
249k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
249k
    if (UNLIKELY(len <= 0)) {
480
436
        *result = PARSE_FAILURE;
481
436
        return 0;
482
436
    }
483
484
248k
    using UnsignedT = MakeUnsignedT<T>;
485
248k
    UnsignedT val = 0;
486
248k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
248k
    bool negative = false;
488
248k
    int i = 0;
489
248k
    switch (*s) {
490
44.9k
    case '-':
491
44.9k
        negative = true;
492
44.9k
        max_val += 1;
493
44.9k
        [[fallthrough]];
494
45.8k
    case '+':
495
45.8k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
45.8k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
248k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
248k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
120k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
120k
        return static_cast<T>(negative ? -val : val);
507
120k
    }
508
509
128k
    const T max_div_10 = max_val / 10;
510
128k
    const T max_mod_10 = max_val % 10;
511
512
128k
    int first = i;
513
308k
    for (; i < len; ++i) {
514
295k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
223k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
223k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
42.4k
                *result = PARSE_OVERFLOW;
519
42.4k
                return negative ? -max_val : max_val;
520
42.4k
            }
521
180k
            val = val * 10 + digit;
522
180k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
71.9k
            } else {
530
                // Save original position where non-digit was found
531
71.9k
                int remaining_len = len - i;
532
71.9k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
71.9k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
71.9k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
71.9k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
49.0k
                    *result = PARSE_FAILURE;
540
49.0k
                    return 0;
541
49.0k
                }
542
71.9k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
22.9k
            *result = PARSE_SUCCESS;
545
71.9k
            return static_cast<T>(negative ? -val : val);
546
71.9k
        }
547
295k
    }
548
13.6k
    *result = PARSE_SUCCESS;
549
13.6k
    return static_cast<T>(negative ? -val : val);
550
128k
}
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
3.82k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
3.82k
    if (UNLIKELY(len <= 0)) {
480
16
        *result = PARSE_FAILURE;
481
16
        return 0;
482
16
    }
483
484
3.80k
    using UnsignedT = MakeUnsignedT<T>;
485
3.80k
    UnsignedT val = 0;
486
3.80k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
3.80k
    bool negative = false;
488
3.80k
    int i = 0;
489
3.80k
    switch (*s) {
490
1.26k
    case '-':
491
1.26k
        negative = true;
492
1.26k
        max_val += 1;
493
1.26k
        [[fallthrough]];
494
1.97k
    case '+':
495
1.97k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
1.97k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
3.80k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
3.80k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
96
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
96
        return static_cast<T>(negative ? -val : val);
507
96
    }
508
509
3.71k
    const T max_div_10 = max_val / 10;
510
3.71k
    const T max_mod_10 = max_val % 10;
511
512
3.71k
    int first = i;
513
13.1k
    for (; i < len; ++i) {
514
13.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
10.6k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
10.6k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.20k
                *result = PARSE_OVERFLOW;
519
1.20k
                return negative ? -max_val : max_val;
520
1.20k
            }
521
9.45k
            val = val * 10 + digit;
522
9.45k
        } else {
523
2.37k
            if constexpr (enable_strict_mode) {
524
2.37k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
2.20k
                    *result = PARSE_FAILURE;
527
2.20k
                    return 0;
528
2.20k
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
176
            *result = PARSE_SUCCESS;
545
2.37k
            return static_cast<T>(negative ? -val : val);
546
2.37k
        }
547
13.0k
    }
548
136
    *result = PARSE_SUCCESS;
549
136
    return static_cast<T>(negative ? -val : val);
550
3.71k
}
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
148k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
148k
    if (UNLIKELY(len <= 0)) {
480
16
        *result = PARSE_FAILURE;
481
16
        return 0;
482
16
    }
483
484
148k
    using UnsignedT = MakeUnsignedT<T>;
485
148k
    UnsignedT val = 0;
486
148k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
148k
    bool negative = false;
488
148k
    int i = 0;
489
148k
    switch (*s) {
490
25.6k
    case '-':
491
25.6k
        negative = true;
492
25.6k
        max_val += 1;
493
25.6k
        [[fallthrough]];
494
26.3k
    case '+':
495
26.3k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
26.3k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
148k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
148k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
101k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
101k
        return static_cast<T>(negative ? -val : val);
507
101k
    }
508
509
47.2k
    const T max_div_10 = max_val / 10;
510
47.2k
    const T max_mod_10 = max_val % 10;
511
512
47.2k
    int first = i;
513
246k
    for (; i < len; ++i) {
514
228k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
224k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
224k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
25.8k
                *result = PARSE_OVERFLOW;
519
25.8k
                return negative ? -max_val : max_val;
520
25.8k
            }
521
199k
            val = val * 10 + digit;
522
199k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
3.81k
            } else {
530
                // Save original position where non-digit was found
531
3.81k
                int remaining_len = len - i;
532
3.81k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
3.81k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
3.81k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
3.81k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
2.59k
                    *result = PARSE_FAILURE;
540
2.59k
                    return 0;
541
2.59k
                }
542
3.81k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
1.22k
            *result = PARSE_SUCCESS;
545
3.81k
            return static_cast<T>(negative ? -val : val);
546
3.81k
        }
547
228k
    }
548
17.6k
    *result = PARSE_SUCCESS;
549
17.6k
    return static_cast<T>(negative ? -val : val);
550
47.2k
}
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
3.76k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
3.76k
    if (UNLIKELY(len <= 0)) {
480
16
        *result = PARSE_FAILURE;
481
16
        return 0;
482
16
    }
483
484
3.74k
    using UnsignedT = MakeUnsignedT<T>;
485
3.74k
    UnsignedT val = 0;
486
3.74k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
3.74k
    bool negative = false;
488
3.74k
    int i = 0;
489
3.74k
    switch (*s) {
490
1.24k
    case '-':
491
1.24k
        negative = true;
492
1.24k
        max_val += 1;
493
1.24k
        [[fallthrough]];
494
1.94k
    case '+':
495
1.94k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
1.94k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
3.74k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
3.74k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
336
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
336
        return static_cast<T>(negative ? -val : val);
507
336
    }
508
509
3.40k
    const T max_div_10 = max_val / 10;
510
3.40k
    const T max_mod_10 = max_val % 10;
511
512
3.40k
    int first = i;
513
15.7k
    for (; i < len; ++i) {
514
15.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
13.4k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
13.4k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.15k
                *result = PARSE_OVERFLOW;
519
1.15k
                return negative ? -max_val : max_val;
520
1.15k
            }
521
12.3k
            val = val * 10 + digit;
522
12.3k
        } else {
523
2.17k
            if constexpr (enable_strict_mode) {
524
2.17k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
2.00k
                    *result = PARSE_FAILURE;
527
2.00k
                    return 0;
528
2.00k
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
176
            *result = PARSE_SUCCESS;
545
2.17k
            return static_cast<T>(negative ? -val : val);
546
2.17k
        }
547
15.6k
    }
548
80
    *result = PARSE_SUCCESS;
549
80
    return static_cast<T>(negative ? -val : val);
550
3.40k
}
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
210k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
210k
    if (UNLIKELY(len <= 0)) {
480
3.95k
        *result = PARSE_FAILURE;
481
3.95k
        return 0;
482
3.95k
    }
483
484
206k
    using UnsignedT = MakeUnsignedT<T>;
485
206k
    UnsignedT val = 0;
486
206k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
206k
    bool negative = false;
488
206k
    int i = 0;
489
206k
    switch (*s) {
490
21.3k
    case '-':
491
21.3k
        negative = true;
492
21.3k
        max_val += 1;
493
21.3k
        [[fallthrough]];
494
22.1k
    case '+':
495
22.1k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
22.1k
        if (UNLIKELY(len == 1)) {
498
18
            *result = PARSE_FAILURE;
499
18
            return 0;
500
18
        }
501
206k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
206k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
179k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
179k
        return static_cast<T>(negative ? -val : val);
507
179k
    }
508
509
27.0k
    const T max_div_10 = max_val / 10;
510
27.0k
    const T max_mod_10 = max_val % 10;
511
512
27.0k
    int first = i;
513
257k
    for (; i < len; ++i) {
514
245k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
241k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
241k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
11.5k
                *result = PARSE_OVERFLOW;
519
11.5k
                return negative ? -max_val : max_val;
520
11.5k
            }
521
230k
            val = val * 10 + digit;
522
230k
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
3.76k
            } else {
530
                // Save original position where non-digit was found
531
3.76k
                int remaining_len = len - i;
532
3.76k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
3.76k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
3.76k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
3.76k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
2.99k
                    *result = PARSE_FAILURE;
540
2.99k
                    return 0;
541
2.99k
                }
542
3.76k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
772
            *result = PARSE_SUCCESS;
545
3.76k
            return static_cast<T>(negative ? -val : val);
546
3.76k
        }
547
245k
    }
548
11.6k
    *result = PARSE_SUCCESS;
549
11.6k
    return static_cast<T>(negative ? -val : val);
550
27.0k
}
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
3.69k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
3.69k
    if (UNLIKELY(len <= 0)) {
480
16
        *result = PARSE_FAILURE;
481
16
        return 0;
482
16
    }
483
484
3.68k
    using UnsignedT = MakeUnsignedT<T>;
485
3.68k
    UnsignedT val = 0;
486
3.68k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
3.68k
    bool negative = false;
488
3.68k
    int i = 0;
489
3.68k
    switch (*s) {
490
1.21k
    case '-':
491
1.21k
        negative = true;
492
1.21k
        max_val += 1;
493
1.21k
        [[fallthrough]];
494
1.90k
    case '+':
495
1.90k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
1.90k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
3.68k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
3.68k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
922
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
922
        return static_cast<T>(negative ? -val : val);
507
922
    }
508
509
2.75k
    const T max_div_10 = max_val / 10;
510
2.75k
    const T max_mod_10 = max_val % 10;
511
512
2.75k
    int first = i;
513
21.4k
    for (; i < len; ++i) {
514
21.3k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
19.8k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
19.8k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.10k
                *result = PARSE_OVERFLOW;
519
1.10k
                return negative ? -max_val : max_val;
520
1.10k
            }
521
18.6k
            val = val * 10 + digit;
522
18.6k
        } else {
523
1.59k
            if constexpr (enable_strict_mode) {
524
1.59k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
1.47k
                    *result = PARSE_FAILURE;
527
1.47k
                    return 0;
528
1.47k
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
120
            *result = PARSE_SUCCESS;
545
1.59k
            return static_cast<T>(negative ? -val : val);
546
1.59k
        }
547
21.3k
    }
548
64
    *result = PARSE_SUCCESS;
549
64
    return static_cast<T>(negative ? -val : val);
550
2.75k
}
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
171k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
171k
    if (UNLIKELY(len <= 0)) {
480
28
        *result = PARSE_FAILURE;
481
28
        return 0;
482
28
    }
483
484
170k
    using UnsignedT = MakeUnsignedT<T>;
485
170k
    UnsignedT val = 0;
486
170k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
170k
    bool negative = false;
488
170k
    int i = 0;
489
170k
    switch (*s) {
490
100k
    case '-':
491
100k
        negative = true;
492
100k
        max_val += 1;
493
100k
        [[fallthrough]];
494
101k
    case '+':
495
101k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
101k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
170k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
170k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
64.6k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
64.6k
        return static_cast<T>(negative ? -val : val);
507
64.6k
    }
508
509
106k
    const T max_div_10 = max_val / 10;
510
106k
    const T max_mod_10 = max_val % 10;
511
512
106k
    int first = i;
513
2.07M
    for (; i < len; ++i) {
514
1.97M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
1.97M
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
1.97M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
10.4k
                *result = PARSE_OVERFLOW;
519
10.4k
                return negative ? -max_val : max_val;
520
10.4k
            }
521
1.96M
            val = val * 10 + digit;
522
1.96M
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
2.98k
            } else {
530
                // Save original position where non-digit was found
531
2.98k
                int remaining_len = len - i;
532
2.98k
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
2.98k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
2.98k
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
2.98k
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
2.49k
                    *result = PARSE_FAILURE;
540
2.49k
                    return 0;
541
2.49k
                }
542
2.98k
            }
543
            // Returning here is slightly faster than breaking the loop.
544
488
            *result = PARSE_SUCCESS;
545
2.98k
            return static_cast<T>(negative ? -val : val);
546
2.98k
        }
547
1.97M
    }
548
92.9k
    *result = PARSE_SUCCESS;
549
92.9k
    return static_cast<T>(negative ? -val : val);
550
106k
}
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
3.65k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
3.65k
    if (UNLIKELY(len <= 0)) {
480
20
        *result = PARSE_FAILURE;
481
20
        return 0;
482
20
    }
483
484
3.63k
    using UnsignedT = MakeUnsignedT<T>;
485
3.63k
    UnsignedT val = 0;
486
3.63k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
3.63k
    bool negative = false;
488
3.63k
    int i = 0;
489
3.63k
    switch (*s) {
490
1.19k
    case '-':
491
1.19k
        negative = true;
492
1.19k
        max_val += 1;
493
1.19k
        [[fallthrough]];
494
1.86k
    case '+':
495
1.86k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
1.86k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
3.63k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
3.63k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
1.47k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
1.47k
        return static_cast<T>(negative ? -val : val);
507
1.47k
    }
508
509
2.16k
    const T max_div_10 = max_val / 10;
510
2.16k
    const T max_mod_10 = max_val % 10;
511
512
2.16k
    int first = i;
513
33.8k
    for (; i < len; ++i) {
514
33.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
32.7k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
32.7k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.05k
                *result = PARSE_OVERFLOW;
519
1.05k
                return negative ? -max_val : max_val;
520
1.05k
            }
521
31.6k
            val = val * 10 + digit;
522
31.6k
        } else {
523
1.04k
            if constexpr (enable_strict_mode) {
524
1.04k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
982
                    *result = PARSE_FAILURE;
527
982
                    return 0;
528
982
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
64
            *result = PARSE_SUCCESS;
545
1.04k
            return static_cast<T>(negative ? -val : val);
546
1.04k
        }
547
33.7k
    }
548
64
    *result = PARSE_SUCCESS;
549
64
    return static_cast<T>(negative ? -val : val);
550
2.16k
}
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
3.56k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
3.56k
    if (UNLIKELY(len <= 0)) {
480
16
        *result = PARSE_FAILURE;
481
16
        return 0;
482
16
    }
483
484
3.55k
    using UnsignedT = MakeUnsignedT<T>;
485
3.55k
    UnsignedT val = 0;
486
3.55k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
3.55k
    bool negative = false;
488
3.55k
    int i = 0;
489
3.55k
    switch (*s) {
490
1.16k
    case '-':
491
1.16k
        negative = true;
492
1.16k
        max_val += 1;
493
1.16k
        [[fallthrough]];
494
1.83k
    case '+':
495
1.83k
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
1.83k
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
3.55k
    }
502
503
    // This is the fast path where the string cannot overflow.
504
3.55k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
1.50k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
1.50k
        return static_cast<T>(negative ? -val : val);
507
1.50k
    }
508
509
2.04k
    const T max_div_10 = max_val / 10;
510
2.04k
    const T max_mod_10 = max_val % 10;
511
512
2.04k
    int first = i;
513
62.6k
    for (; i < len; ++i) {
514
62.5k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
61.5k
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
61.5k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
1.00k
                *result = PARSE_OVERFLOW;
519
1.00k
                return negative ? -max_val : max_val;
520
1.00k
            }
521
60.5k
            val = val * 10 + digit;
522
60.5k
        } else {
523
976
            if constexpr (enable_strict_mode) {
524
976
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
912
                    *result = PARSE_FAILURE;
527
912
                    return 0;
528
912
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
64
            *result = PARSE_SUCCESS;
545
976
            return static_cast<T>(negative ? -val : val);
546
976
        }
547
62.5k
    }
548
64
    *result = PARSE_SUCCESS;
549
64
    return static_cast<T>(negative ? -val : val);
550
2.04k
}
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
298
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
298
    if (UNLIKELY(len <= 0)) {
480
4
        *result = PARSE_FAILURE;
481
4
        return 0;
482
4
    }
483
484
294
    using UnsignedT = MakeUnsignedT<T>;
485
294
    UnsignedT val = 0;
486
294
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
294
    bool negative = false;
488
294
    int i = 0;
489
294
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
294
    }
502
503
    // This is the fast path where the string cannot overflow.
504
294
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
294
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
294
        return static_cast<T>(negative ? -val : val);
507
294
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
0
            if constexpr (enable_strict_mode) {
524
0
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
0
                    *result = PARSE_FAILURE;
527
0
                    return 0;
528
0
                }
529
            } else {
530
                // Save original position where non-digit was found
531
                int remaining_len = len - i;
532
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
                    *result = PARSE_FAILURE;
540
                    return 0;
541
                }
542
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
40
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
40
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
40
    using UnsignedT = MakeUnsignedT<T>;
485
40
    UnsignedT val = 0;
486
40
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
40
    bool negative = false;
488
40
    int i = 0;
489
40
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
40
    }
502
503
    // This is the fast path where the string cannot overflow.
504
40
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
40
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
40
        return static_cast<T>(negative ? -val : val);
507
40
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
8
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
8
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
8
    using UnsignedT = MakeUnsignedT<T>;
485
8
    UnsignedT val = 0;
486
8
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
8
    bool negative = false;
488
8
    int i = 0;
489
8
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
8
    }
502
503
    // This is the fast path where the string cannot overflow.
504
8
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
8
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
8
        return static_cast<T>(negative ? -val : val);
507
8
    }
508
509
0
    const T max_div_10 = max_val / 10;
510
0
    const T max_mod_10 = max_val % 10;
511
512
0
    int first = i;
513
0
    for (; i < len; ++i) {
514
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
0
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
0
            val = val * 10 + digit;
522
0
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
0
    }
548
0
    *result = PARSE_SUCCESS;
549
0
    return static_cast<T>(negative ? -val : val);
550
0
}
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
478
8
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
479
8
    if (UNLIKELY(len <= 0)) {
480
0
        *result = PARSE_FAILURE;
481
0
        return 0;
482
0
    }
483
484
8
    using UnsignedT = MakeUnsignedT<T>;
485
8
    UnsignedT val = 0;
486
8
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
487
8
    bool negative = false;
488
8
    int i = 0;
489
8
    switch (*s) {
490
0
    case '-':
491
0
        negative = true;
492
0
        max_val += 1;
493
0
        [[fallthrough]];
494
0
    case '+':
495
0
        ++i;
496
        // only one '+'/'-' char, so could return failure directly
497
0
        if (UNLIKELY(len == 1)) {
498
0
            *result = PARSE_FAILURE;
499
0
            return 0;
500
0
        }
501
8
    }
502
503
    // This is the fast path where the string cannot overflow.
504
8
    if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) {
505
0
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
506
0
        return static_cast<T>(negative ? -val : val);
507
0
    }
508
509
8
    const T max_div_10 = max_val / 10;
510
8
    const T max_mod_10 = max_val % 10;
511
512
8
    int first = i;
513
168
    for (; i < len; ++i) {
514
160
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
515
160
            T digit = s[i] - '0';
516
            // This is a tricky check to see if adding this digit will cause an overflow.
517
160
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
518
0
                *result = PARSE_OVERFLOW;
519
0
                return negative ? -max_val : max_val;
520
0
            }
521
160
            val = val * 10 + digit;
522
160
        } else {
523
            if constexpr (enable_strict_mode) {
524
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
525
                    // Reject the string because the remaining chars are not all whitespace
526
                    *result = PARSE_FAILURE;
527
                    return 0;
528
                }
529
0
            } else {
530
                // Save original position where non-digit was found
531
0
                int remaining_len = len - i;
532
0
                const char* remaining_s = s + i;
533
                // Skip trailing whitespaces from the remaining portion
534
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
535
0
                if ((UNLIKELY(i == first || (remaining_len != 0 &&
536
0
                                             !is_float_suffix(remaining_s, remaining_len))))) {
537
                    // Reject the string because either the first char was not a digit,
538
                    // or the remaining chars are not all whitespace
539
0
                    *result = PARSE_FAILURE;
540
0
                    return 0;
541
0
                }
542
0
            }
543
            // Returning here is slightly faster than breaking the loop.
544
0
            *result = PARSE_SUCCESS;
545
0
            return static_cast<T>(negative ? -val : val);
546
0
        }
547
160
    }
548
8
    *result = PARSE_SUCCESS;
549
8
    return static_cast<T>(negative ? -val : val);
550
8
}
551
552
template <typename T>
553
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
554
2.74k
                                                ParseResult* result) {
555
2.74k
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
2.74k
    T val = 0;
561
2.74k
    T max_val = std::numeric_limits<T>::max();
562
2.74k
    int i = 0;
563
564
2.74k
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
2.74k
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
1.56k
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
1.56k
        return val;
569
1.56k
    }
570
571
1.17k
    const T max_div_10 = max_val / 10;
572
1.17k
    const T max_mod_10 = max_val % 10;
573
574
1.17k
    int first = i;
575
9.31k
    for (; i < len; ++i) {
576
8.62k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
8.52k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
8.52k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
392
                *result = PARSE_OVERFLOW;
581
392
                return max_val;
582
392
            }
583
8.13k
            val = val * 10 + digit;
584
8.13k
        } else {
585
98
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
98
                *result = PARSE_FAILURE;
589
98
                return 0;
590
98
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
98
        }
595
8.62k
    }
596
686
    *result = PARSE_SUCCESS;
597
686
    return val;
598
1.17k
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
686
                                                ParseResult* result) {
555
686
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
686
    T val = 0;
561
686
    T max_val = std::numeric_limits<T>::max();
562
686
    int i = 0;
563
564
686
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
686
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
196
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
196
        return val;
569
196
    }
570
571
490
    const T max_div_10 = max_val / 10;
572
490
    const T max_mod_10 = max_val % 10;
573
574
490
    int first = i;
575
1.56k
    for (; i < len; ++i) {
576
1.27k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
1.17k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
1.17k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
98
                *result = PARSE_OVERFLOW;
581
98
                return max_val;
582
98
            }
583
1.07k
            val = val * 10 + digit;
584
1.07k
        } else {
585
98
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
98
                *result = PARSE_FAILURE;
589
98
                return 0;
590
98
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
98
        }
595
1.27k
    }
596
294
    *result = PARSE_SUCCESS;
597
294
    return val;
598
490
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
686
                                                ParseResult* result) {
555
686
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
686
    T val = 0;
561
686
    T max_val = std::numeric_limits<T>::max();
562
686
    int i = 0;
563
564
686
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
686
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
392
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
392
        return val;
569
392
    }
570
571
294
    const T max_div_10 = max_val / 10;
572
294
    const T max_mod_10 = max_val % 10;
573
574
294
    int first = i;
575
1.66k
    for (; i < len; ++i) {
576
1.47k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
1.47k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
1.47k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
98
                *result = PARSE_OVERFLOW;
581
98
                return max_val;
582
98
            }
583
1.37k
            val = val * 10 + digit;
584
1.37k
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
1.47k
    }
596
196
    *result = PARSE_SUCCESS;
597
196
    return val;
598
294
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
686
                                                ParseResult* result) {
555
686
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
686
    T val = 0;
561
686
    T max_val = std::numeric_limits<T>::max();
562
686
    int i = 0;
563
564
686
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
686
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
490
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
490
        return val;
569
490
    }
570
571
196
    const T max_div_10 = max_val / 10;
572
196
    const T max_mod_10 = max_val % 10;
573
574
196
    int first = i;
575
2.05k
    for (; i < len; ++i) {
576
1.96k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
1.96k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
1.96k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
98
                *result = PARSE_OVERFLOW;
581
98
                return max_val;
582
98
            }
583
1.86k
            val = val * 10 + digit;
584
1.86k
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
1.96k
    }
596
98
    *result = PARSE_SUCCESS;
597
98
    return val;
598
196
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
554
686
                                                ParseResult* result) {
555
686
    if (UNLIKELY(len <= 0)) {
556
0
        *result = PARSE_FAILURE;
557
0
        return 0;
558
0
    }
559
560
686
    T val = 0;
561
686
    T max_val = std::numeric_limits<T>::max();
562
686
    int i = 0;
563
564
686
    using signedT = MakeSignedT<T>;
565
    // This is the fast path where the string cannot overflow.
566
686
    if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) {
567
490
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
568
490
        return val;
569
490
    }
570
571
196
    const T max_div_10 = max_val / 10;
572
196
    const T max_mod_10 = max_val % 10;
573
574
196
    int first = i;
575
4.01k
    for (; i < len; ++i) {
576
3.92k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
577
3.92k
            T digit = s[i] - '0';
578
            // This is a tricky check to see if adding this digit will cause an overflow.
579
3.92k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
580
98
                *result = PARSE_OVERFLOW;
581
98
                return max_val;
582
98
            }
583
3.82k
            val = val * 10 + digit;
584
3.82k
        } else {
585
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
586
                // Reject the string because either the first char was not a digit,
587
                // or the remaining chars are not all whitespace
588
0
                *result = PARSE_FAILURE;
589
0
                return 0;
590
0
            }
591
            // Returning here is slightly faster than breaking the loop.
592
0
            *result = PARSE_SUCCESS;
593
0
            return val;
594
0
        }
595
3.92k
    }
596
98
    *result = PARSE_SUCCESS;
597
98
    return val;
598
196
}
599
600
template <typename T>
601
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
602
55.6k
                                       ParseResult* result) {
603
55.6k
    using UnsignedT = MakeUnsignedT<T>;
604
55.6k
    UnsignedT val = 0;
605
55.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
55.6k
    bool negative = false;
607
55.6k
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
55.6k
    int i = 0;
612
55.6k
    switch (*s) {
613
26.8k
    case '-':
614
26.8k
        negative = true;
615
26.8k
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
26.8k
        [[fallthrough]];
617
27.4k
    case '+':
618
27.4k
        i = 1;
619
55.6k
    }
620
621
55.6k
    const T max_div_base = max_val / base;
622
55.6k
    const T max_mod_base = max_val % base;
623
624
55.6k
    int first = i;
625
181k
    for (; i < len; ++i) {
626
153k
        T digit;
627
153k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
151k
            digit = s[i] - '0';
629
151k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
1.27k
            digit = (s[i] - 'a' + 10);
631
1.27k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
196
            digit = (s[i] - 'A' + 10);
633
294
        } else {
634
294
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
294
                *result = PARSE_FAILURE;
638
294
                return 0;
639
294
            }
640
            // skip trailing whitespace.
641
0
            break;
642
294
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
152k
        if (digit >= base) {
646
784
            break;
647
784
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
152k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
25.9k
            *result = PARSE_OVERFLOW;
652
25.9k
            return static_cast<T>(negative ? -max_val : max_val);
653
25.9k
        }
654
126k
        val = val * base + digit;
655
126k
    }
656
29.4k
    *result = PARSE_SUCCESS;
657
29.4k
    return static_cast<T>(negative ? -val : val);
658
55.6k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
52.9k
                                       ParseResult* result) {
603
52.9k
    using UnsignedT = MakeUnsignedT<T>;
604
52.9k
    UnsignedT val = 0;
605
52.9k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
52.9k
    bool negative = false;
607
52.9k
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
52.9k
    int i = 0;
612
52.9k
    switch (*s) {
613
25.7k
    case '-':
614
25.7k
        negative = true;
615
25.7k
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
25.7k
        [[fallthrough]];
617
25.9k
    case '+':
618
25.9k
        i = 1;
619
52.9k
    }
620
621
52.9k
    const T max_div_base = max_val / base;
622
52.9k
    const T max_mod_base = max_val % base;
623
624
52.9k
    int first = i;
625
161k
    for (; i < len; ++i) {
626
134k
        T digit;
627
134k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
133k
            digit = s[i] - '0';
629
133k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
1.07k
            digit = (s[i] - 'a' + 10);
631
1.07k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
196
            digit = (s[i] - 'A' + 10);
633
294
        } else {
634
294
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
294
                *result = PARSE_FAILURE;
638
294
                return 0;
639
294
            }
640
            // skip trailing whitespace.
641
0
            break;
642
294
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
134k
        if (digit >= base) {
646
784
            break;
647
784
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
133k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
25.3k
            *result = PARSE_OVERFLOW;
652
25.3k
            return static_cast<T>(negative ? -max_val : max_val);
653
25.3k
        }
654
108k
        val = val * base + digit;
655
108k
    }
656
27.2k
    *result = PARSE_SUCCESS;
657
27.2k
    return static_cast<T>(negative ? -val : val);
658
52.9k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
980
                                       ParseResult* result) {
603
980
    using UnsignedT = MakeUnsignedT<T>;
604
980
    UnsignedT val = 0;
605
980
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
980
    bool negative = false;
607
980
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
980
    int i = 0;
612
980
    switch (*s) {
613
392
    case '-':
614
392
        negative = true;
615
392
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
392
        [[fallthrough]];
617
490
    case '+':
618
490
        i = 1;
619
980
    }
620
621
980
    const T max_div_base = max_val / base;
622
980
    const T max_mod_base = max_val % base;
623
624
980
    int first = i;
625
4.21k
    for (; i < len; ++i) {
626
3.43k
        T digit;
627
3.43k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
3.23k
            digit = s[i] - '0';
629
3.23k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
196
            digit = (s[i] - 'a' + 10);
631
196
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
3.43k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
3.43k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
196
            *result = PARSE_OVERFLOW;
652
196
            return static_cast<T>(negative ? -max_val : max_val);
653
196
        }
654
3.23k
        val = val * base + digit;
655
3.23k
    }
656
784
    *result = PARSE_SUCCESS;
657
784
    return static_cast<T>(negative ? -val : val);
658
980
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
882
                                       ParseResult* result) {
603
882
    using UnsignedT = MakeUnsignedT<T>;
604
882
    UnsignedT val = 0;
605
882
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
882
    bool negative = false;
607
882
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
882
    int i = 0;
612
882
    switch (*s) {
613
294
    case '-':
614
294
        negative = true;
615
294
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
294
        [[fallthrough]];
617
490
    case '+':
618
490
        i = 1;
619
882
    }
620
621
882
    const T max_div_base = max_val / base;
622
882
    const T max_mod_base = max_val % base;
623
624
882
    int first = i;
625
6.07k
    for (; i < len; ++i) {
626
5.39k
        T digit;
627
5.39k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
5.39k
            digit = s[i] - '0';
629
5.39k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
0
            digit = (s[i] - 'a' + 10);
631
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
5.39k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
5.39k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
196
            *result = PARSE_OVERFLOW;
652
196
            return static_cast<T>(negative ? -max_val : max_val);
653
196
        }
654
5.19k
        val = val * base + digit;
655
5.19k
    }
656
686
    *result = PARSE_SUCCESS;
657
686
    return static_cast<T>(negative ? -val : val);
658
882
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
882
                                       ParseResult* result) {
603
882
    using UnsignedT = MakeUnsignedT<T>;
604
882
    UnsignedT val = 0;
605
882
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
882
    bool negative = false;
607
882
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
882
    int i = 0;
612
882
    switch (*s) {
613
392
    case '-':
614
392
        negative = true;
615
392
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
392
        [[fallthrough]];
617
490
    case '+':
618
490
        i = 1;
619
882
    }
620
621
882
    const T max_div_base = max_val / base;
622
882
    const T max_mod_base = max_val % base;
623
624
882
    int first = i;
625
10.1k
    for (; i < len; ++i) {
626
9.50k
        T digit;
627
9.50k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
9.50k
            digit = s[i] - '0';
629
9.50k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
0
            digit = (s[i] - 'a' + 10);
631
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
9.50k
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
9.50k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
196
            *result = PARSE_OVERFLOW;
652
196
            return static_cast<T>(negative ? -max_val : max_val);
653
196
        }
654
9.31k
        val = val * base + digit;
655
9.31k
    }
656
686
    *result = PARSE_SUCCESS;
657
686
    return static_cast<T>(negative ? -val : val);
658
882
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
602
2
                                       ParseResult* result) {
603
2
    using UnsignedT = MakeUnsignedT<T>;
604
2
    UnsignedT val = 0;
605
2
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
606
2
    bool negative = false;
607
2
    if (UNLIKELY(len <= 0)) {
608
0
        *result = PARSE_FAILURE;
609
0
        return 0;
610
0
    }
611
2
    int i = 0;
612
2
    switch (*s) {
613
0
    case '-':
614
0
        negative = true;
615
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
616
0
        [[fallthrough]];
617
0
    case '+':
618
0
        i = 1;
619
2
    }
620
621
2
    const T max_div_base = max_val / base;
622
2
    const T max_mod_base = max_val % base;
623
624
2
    int first = i;
625
6
    for (; i < len; ++i) {
626
4
        T digit;
627
4
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
628
0
            digit = s[i] - '0';
629
4
        } else if (s[i] >= 'a' && s[i] <= 'z') {
630
4
            digit = (s[i] - 'a' + 10);
631
4
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
632
0
            digit = (s[i] - 'A' + 10);
633
0
        } else {
634
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
635
                // Reject the string because either the first char was not an alpha/digit,
636
                // or the remaining chars are not all whitespace
637
0
                *result = PARSE_FAILURE;
638
0
                return 0;
639
0
            }
640
            // skip trailing whitespace.
641
0
            break;
642
0
        }
643
644
        // Bail, if we encounter a digit that is not available in base.
645
4
        if (digit >= base) {
646
0
            break;
647
0
        }
648
649
        // This is a tricky check to see if adding this digit will cause an overflow.
650
4
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
651
0
            *result = PARSE_OVERFLOW;
652
0
            return static_cast<T>(negative ? -max_val : max_val);
653
0
        }
654
4
        val = val * base + digit;
655
4
    }
656
2
    *result = PARSE_SUCCESS;
657
2
    return static_cast<T>(negative ? -val : val);
658
2
}
659
660
template <typename T, bool enable_strict_mode>
661
556k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
556k
    T val = 0;
663
556k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
556k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
544k
        val = s[0] - '0';
670
544k
    } else {
671
12.6k
        *result = PARSE_FAILURE;
672
12.6k
        return 0;
673
12.6k
    }
674
1.10M
    for (int i = 1; i < len; ++i) {
675
567k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
560k
            T digit = s[i] - '0';
677
560k
            val = val * 10 + digit;
678
560k
        } else {
679
7.64k
            if constexpr (enable_strict_mode) {
680
2.63k
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
2.35k
                    *result = PARSE_FAILURE;
682
2.35k
                    return 0;
683
2.35k
                }
684
5.01k
            } else {
685
                // Save original position where non-digit was found
686
5.01k
                int remaining_len = len - i;
687
5.01k
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
5.01k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
5.01k
                if ((UNLIKELY(remaining_len != 0 &&
691
5.01k
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
1.00k
                    *result = PARSE_FAILURE;
693
1.00k
                    return 0;
694
1.00k
                }
695
5.01k
            }
696
4.28k
            *result = PARSE_SUCCESS;
697
7.64k
            return val;
698
7.64k
        }
699
567k
    }
700
536k
    *result = PARSE_SUCCESS;
701
536k
    return val;
702
544k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
83.8k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
83.8k
    T val = 0;
663
83.8k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
83.8k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
82.0k
        val = s[0] - '0';
670
82.0k
    } else {
671
1.82k
        *result = PARSE_FAILURE;
672
1.82k
        return 0;
673
1.82k
    }
674
115k
    for (int i = 1; i < len; ++i) {
675
34.1k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
33.3k
            T digit = s[i] - '0';
677
33.3k
            val = val * 10 + digit;
678
33.3k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
756
            } else {
685
                // Save original position where non-digit was found
686
756
                int remaining_len = len - i;
687
756
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
756
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
756
                if ((UNLIKELY(remaining_len != 0 &&
691
756
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
196
                    *result = PARSE_FAILURE;
693
196
                    return 0;
694
196
                }
695
756
            }
696
560
            *result = PARSE_SUCCESS;
697
756
            return val;
698
756
        }
699
34.1k
    }
700
81.2k
    *result = PARSE_SUCCESS;
701
81.2k
    return val;
702
82.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
121k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
121k
    T val = 0;
663
121k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
121k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
119k
        val = s[0] - '0';
670
119k
    } else {
671
1.16k
        *result = PARSE_FAILURE;
672
1.16k
        return 0;
673
1.16k
    }
674
170k
    for (int i = 1; i < len; ++i) {
675
50.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
50.7k
            T digit = s[i] - '0';
677
50.7k
            val = val * 10 + digit;
678
50.7k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
4
            } else {
685
                // Save original position where non-digit was found
686
4
                int remaining_len = len - i;
687
4
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
4
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
4
                if ((UNLIKELY(remaining_len != 0 &&
691
4
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
4
                    *result = PARSE_FAILURE;
693
4
                    return 0;
694
4
                }
695
4
            }
696
0
            *result = PARSE_SUCCESS;
697
4
            return val;
698
4
        }
699
50.7k
    }
700
119k
    *result = PARSE_SUCCESS;
701
119k
    return val;
702
119k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
96
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
96
    T val = 0;
663
96
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
96
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
44
        val = s[0] - '0';
670
52
    } else {
671
52
        *result = PARSE_FAILURE;
672
52
        return 0;
673
52
    }
674
44
    for (int i = 1; i < len; ++i) {
675
4
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
0
            T digit = s[i] - '0';
677
0
            val = val * 10 + digit;
678
4
        } else {
679
4
            if constexpr (enable_strict_mode) {
680
4
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
4
                    *result = PARSE_FAILURE;
682
4
                    return 0;
683
4
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
0
            *result = PARSE_SUCCESS;
697
4
            return val;
698
4
        }
699
4
    }
700
40
    *result = PARSE_SUCCESS;
701
40
    return val;
702
44
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
102k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
102k
    T val = 0;
663
102k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
102k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
100k
        val = s[0] - '0';
670
100k
    } else {
671
1.83k
        *result = PARSE_FAILURE;
672
1.83k
        return 0;
673
1.83k
    }
674
145k
    for (int i = 1; i < len; ++i) {
675
47.5k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
45.6k
            T digit = s[i] - '0';
677
45.6k
            val = val * 10 + digit;
678
45.6k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
1.91k
            } else {
685
                // Save original position where non-digit was found
686
1.91k
                int remaining_len = len - i;
687
1.91k
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
1.91k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
1.91k
                if ((UNLIKELY(remaining_len != 0 &&
691
1.91k
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
148
                    *result = PARSE_FAILURE;
693
148
                    return 0;
694
148
                }
695
1.91k
            }
696
1.76k
            *result = PARSE_SUCCESS;
697
1.91k
            return val;
698
1.91k
        }
699
47.5k
    }
700
98.2k
    *result = PARSE_SUCCESS;
701
98.2k
    return val;
702
100k
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
336
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
336
    T val = 0;
663
336
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
336
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
260
        val = s[0] - '0';
670
260
    } else {
671
76
        *result = PARSE_FAILURE;
672
76
        return 0;
673
76
    }
674
412
    for (int i = 1; i < len; ++i) {
675
316
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
152
            T digit = s[i] - '0';
677
152
            val = val * 10 + digit;
678
164
        } else {
679
164
            if constexpr (enable_strict_mode) {
680
164
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
164
                    *result = PARSE_FAILURE;
682
164
                    return 0;
683
164
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
0
            *result = PARSE_SUCCESS;
697
164
            return val;
698
164
        }
699
316
    }
700
96
    *result = PARSE_SUCCESS;
701
96
    return val;
702
260
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
180k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
180k
    T val = 0;
663
180k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
180k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
175k
        val = s[0] - '0';
670
175k
    } else {
671
4.43k
        *result = PARSE_FAILURE;
672
4.43k
        return 0;
673
4.43k
    }
674
531k
    for (int i = 1; i < len; ++i) {
675
356k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
355k
            T digit = s[i] - '0';
677
355k
            val = val * 10 + digit;
678
355k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
1.10k
            } else {
685
                // Save original position where non-digit was found
686
1.10k
                int remaining_len = len - i;
687
1.10k
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
1.10k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
1.10k
                if ((UNLIKELY(remaining_len != 0 &&
691
1.10k
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
410
                    *result = PARSE_FAILURE;
693
410
                    return 0;
694
410
                }
695
1.10k
            }
696
696
            *result = PARSE_SUCCESS;
697
1.10k
            return val;
698
1.10k
        }
699
356k
    }
700
174k
    *result = PARSE_SUCCESS;
701
174k
    return val;
702
175k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
1.21k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
1.21k
    T val = 0;
663
1.21k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
1.21k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
1.04k
        val = s[0] - '0';
670
1.04k
    } else {
671
170
        *result = PARSE_FAILURE;
672
170
        return 0;
673
170
    }
674
2.57k
    for (int i = 1; i < len; ++i) {
675
2.16k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
1.52k
            T digit = s[i] - '0';
677
1.52k
            val = val * 10 + digit;
678
1.52k
        } else {
679
640
            if constexpr (enable_strict_mode) {
680
640
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
584
                    *result = PARSE_FAILURE;
682
584
                    return 0;
683
584
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
56
            *result = PARSE_SUCCESS;
697
640
            return val;
698
640
        }
699
2.16k
    }
700
406
    *result = PARSE_SUCCESS;
701
406
    return val;
702
1.04k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
65.1k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
65.1k
    T val = 0;
663
65.1k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
65.1k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
63.0k
        val = s[0] - '0';
670
63.0k
    } else {
671
2.15k
        *result = PARSE_FAILURE;
672
2.15k
        return 0;
673
2.15k
    }
674
132k
    for (int i = 1; i < len; ++i) {
675
70.5k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
69.3k
            T digit = s[i] - '0';
677
69.3k
            val = val * 10 + digit;
678
69.3k
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
1.23k
            } else {
685
                // Save original position where non-digit was found
686
1.23k
                int remaining_len = len - i;
687
1.23k
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
1.23k
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
1.23k
                if ((UNLIKELY(remaining_len != 0 &&
691
1.23k
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
248
                    *result = PARSE_FAILURE;
693
248
                    return 0;
694
248
                }
695
1.23k
            }
696
982
            *result = PARSE_SUCCESS;
697
1.23k
            return val;
698
1.23k
        }
699
70.5k
    }
700
61.7k
    *result = PARSE_SUCCESS;
701
61.7k
    return val;
702
63.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
1.47k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
1.47k
    T val = 0;
663
1.47k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
1.47k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
1.03k
        val = s[0] - '0';
670
1.03k
    } else {
671
434
        *result = PARSE_FAILURE;
672
434
        return 0;
673
434
    }
674
3.03k
    for (int i = 1; i < len; ++i) {
675
2.91k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
1.99k
            T digit = s[i] - '0';
677
1.99k
            val = val * 10 + digit;
678
1.99k
        } else {
679
912
            if constexpr (enable_strict_mode) {
680
912
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
800
                    *result = PARSE_FAILURE;
682
800
                    return 0;
683
800
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
112
            *result = PARSE_SUCCESS;
697
912
            return val;
698
912
        }
699
2.91k
    }
700
124
    *result = PARSE_SUCCESS;
701
124
    return val;
702
1.03k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
1.50k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
1.50k
    T val = 0;
663
1.50k
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
1.50k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
1.02k
        val = s[0] - '0';
670
1.02k
    } else {
671
480
        *result = PARSE_FAILURE;
672
480
        return 0;
673
480
    }
674
2.99k
    for (int i = 1; i < len; ++i) {
675
2.88k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
1.97k
            T digit = s[i] - '0';
677
1.97k
            val = val * 10 + digit;
678
1.97k
        } else {
679
912
            if constexpr (enable_strict_mode) {
680
912
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
800
                    *result = PARSE_FAILURE;
682
800
                    return 0;
683
800
                }
684
            } else {
685
                // Save original position where non-digit was found
686
                int remaining_len = len - i;
687
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
                if ((UNLIKELY(remaining_len != 0 &&
691
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
                    *result = PARSE_FAILURE;
693
                    return 0;
694
                }
695
            }
696
112
            *result = PARSE_SUCCESS;
697
912
            return val;
698
912
        }
699
2.88k
    }
700
112
    *result = PARSE_SUCCESS;
701
112
    return val;
702
1.02k
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
661
8
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
662
8
    T val = 0;
663
8
    if (UNLIKELY(len == 0)) {
664
0
        *result = PARSE_SUCCESS;
665
0
        return val;
666
0
    }
667
    // Factor out the first char for error handling speeds up the loop.
668
8
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
669
8
        val = s[0] - '0';
670
8
    } else {
671
0
        *result = PARSE_FAILURE;
672
0
        return 0;
673
0
    }
674
8
    for (int i = 1; i < len; ++i) {
675
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
676
0
            T digit = s[i] - '0';
677
0
            val = val * 10 + digit;
678
0
        } else {
679
            if constexpr (enable_strict_mode) {
680
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
681
                    *result = PARSE_FAILURE;
682
                    return 0;
683
                }
684
0
            } else {
685
                // Save original position where non-digit was found
686
0
                int remaining_len = len - i;
687
0
                const char* remaining_s = s + i;
688
                // Skip trailing whitespaces from the remaining portion
689
0
                remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len);
690
0
                if ((UNLIKELY(remaining_len != 0 &&
691
0
                              !is_float_suffix(remaining_s, remaining_len)))) {
692
0
                    *result = PARSE_FAILURE;
693
0
                    return 0;
694
0
                }
695
0
            }
696
0
            *result = PARSE_SUCCESS;
697
0
            return val;
698
0
        }
699
0
    }
700
8
    *result = PARSE_SUCCESS;
701
8
    return val;
702
8
}
703
704
// at least the first char(if any) must be a digit.
705
template <typename T>
706
T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
707
272k
                                                  ParseResult* result) {
708
272k
    T val = 0;
709
272k
    if (max_len == 0) [[unlikely]] {
710
270k
        *result = PARSE_SUCCESS;
711
270k
        return val;
712
270k
    }
713
    // Factor out the first char for error handling speeds up the loop.
714
2.28k
    if (is_numeric_ascii(s[0])) [[likely]] {
715
2.28k
        val = s[0] - '0';
716
2.28k
    } else {
717
0
        *result = PARSE_FAILURE;
718
0
        return 0;
719
0
    }
720
10.2k
    for (int i = 1; i < max_len; ++i) {
721
7.99k
        if (is_numeric_ascii(s[i])) [[likely]] {
722
7.99k
            T digit = s[i] - '0';
723
7.99k
            val = val * 10 + digit;
724
7.99k
        } else {
725
            // 123abc, return 123
726
0
            *result = PARSE_SUCCESS;
727
0
            return val;
728
0
        }
729
7.99k
    }
730
2.28k
    *result = PARSE_SUCCESS;
731
2.28k
    return val;
732
2.28k
}
733
734
template <typename T>
735
305k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
305k
    int i = 0;
737
    // skip leading spaces
738
305k
    for (; i < len; ++i) {
739
305k
        if (!is_whitespace_ascii(s[i])) {
740
305k
            break;
741
305k
        }
742
305k
    }
743
744
    // skip back spaces
745
305k
    int j = len - 1;
746
305k
    for (; j >= i; j--) {
747
305k
        if (!is_whitespace_ascii(s[j])) {
748
305k
            break;
749
305k
        }
750
305k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
305k
    if (i < len && s[i] == '+') {
754
14.1k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
14.1k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
40
            *result = PARSE_FAILURE;
766
40
            return 0;
767
40
        }
768
14.1k
    }
769
305k
    if (UNLIKELY(i > j)) {
770
64
        *result = PARSE_FAILURE;
771
64
        return 0;
772
64
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
305k
    double val = 0;
776
305k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
305k
    if (res.ptr == s + j + 1) {
779
296k
        *result = PARSE_SUCCESS;
780
296k
        return val;
781
296k
    } else {
782
9.22k
        *result = PARSE_FAILURE;
783
9.22k
    }
784
9.22k
    return 0;
785
305k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
735
175k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
175k
    int i = 0;
737
    // skip leading spaces
738
175k
    for (; i < len; ++i) {
739
175k
        if (!is_whitespace_ascii(s[i])) {
740
175k
            break;
741
175k
        }
742
175k
    }
743
744
    // skip back spaces
745
175k
    int j = len - 1;
746
175k
    for (; j >= i; j--) {
747
175k
        if (!is_whitespace_ascii(s[j])) {
748
175k
            break;
749
175k
        }
750
175k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
175k
    if (i < len && s[i] == '+') {
754
7.08k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
20
            *result = PARSE_FAILURE;
766
20
            return 0;
767
20
        }
768
7.08k
    }
769
175k
    if (UNLIKELY(i > j)) {
770
36
        *result = PARSE_FAILURE;
771
36
        return 0;
772
36
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
175k
    double val = 0;
776
175k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
175k
    if (res.ptr == s + j + 1) {
779
170k
        *result = PARSE_SUCCESS;
780
170k
        return val;
781
170k
    } else {
782
4.65k
        *result = PARSE_FAILURE;
783
4.65k
    }
784
4.65k
    return 0;
785
175k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
735
130k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
736
130k
    int i = 0;
737
    // skip leading spaces
738
130k
    for (; i < len; ++i) {
739
130k
        if (!is_whitespace_ascii(s[i])) {
740
130k
            break;
741
130k
        }
742
130k
    }
743
744
    // skip back spaces
745
130k
    int j = len - 1;
746
130k
    for (; j >= i; j--) {
747
130k
        if (!is_whitespace_ascii(s[j])) {
748
130k
            break;
749
130k
        }
750
130k
    }
751
752
    // skip leading '+', from_chars can handle '-'
753
130k
    if (i < len && s[i] == '+') {
754
7.08k
        i++;
755
        // ++ or +- are not valid, but the first + is already skipped,
756
        // if don't check here, from_chars will succeed.
757
        //
758
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
759
        // which may avoid this extra check here.
760
        // e.g.:
761
        // fast_float::chars_format format =
762
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
763
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
764
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
765
20
            *result = PARSE_FAILURE;
766
20
            return 0;
767
20
        }
768
7.08k
    }
769
130k
    if (UNLIKELY(i > j)) {
770
28
        *result = PARSE_FAILURE;
771
28
        return 0;
772
28
    }
773
774
    // Use double here to not lose precision while accumulating the result
775
130k
    double val = 0;
776
130k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
777
778
130k
    if (res.ptr == s + j + 1) {
779
125k
        *result = PARSE_SUCCESS;
780
125k
        return val;
781
125k
    } else {
782
4.57k
        *result = PARSE_FAILURE;
783
4.57k
    }
784
4.57k
    return 0;
785
130k
}
786
787
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
788
22.7k
                                                  ParseResult* result) {
789
22.7k
    *result = PARSE_SUCCESS;
790
791
22.7k
    if (len == 1) {
792
5.32k
        if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
793
666
            return true;
794
666
        }
795
4.65k
        if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
796
1.86k
            return false;
797
1.86k
        }
798
2.78k
        *result = PARSE_FAILURE;
799
2.78k
        return false;
800
4.65k
    }
801
802
17.4k
    if (len == 2) {
803
1.95k
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
804
20
            return true;
805
20
        }
806
1.93k
        if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
807
18
            return false;
808
18
        }
809
1.93k
    }
810
811
17.3k
    if (len == 3) {
812
84
        if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
813
84
            (s[2] == 's' || s[2] == 'S')) {
814
20
            return true;
815
20
        }
816
64
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
817
64
            (s[2] == 'f' || s[2] == 'F')) {
818
18
            return false;
819
18
        }
820
64
    }
821
822
17.3k
    if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
823
17.3k
        (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
824
6.76k
        return true;
825
6.76k
    }
826
827
10.5k
    if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
828
10.5k
        (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
829
10.5k
        (s[4] == 'e' || s[4] == 'E')) {
830
6.85k
        return false;
831
6.85k
    }
832
833
    // No valid boolean value found
834
3.74k
    *result = PARSE_FAILURE;
835
3.74k
    return false;
836
10.5k
}
837
#include "common/compile_check_avoid_end.h"
838
} // end namespace doris