Coverage Report

Created: 2025-07-24 14:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/string_parser.hpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
27
#include <algorithm>
28
#include <cstdlib>
29
// IWYU pragma: no_include <bits/std_abs.h>
30
#include <cmath> // IWYU pragma: keep
31
#include <cstdint>
32
#include <limits>
33
#include <map>
34
#include <string>
35
#include <type_traits>
36
#include <utility>
37
38
#include "common/compiler_util.h" // IWYU pragma: keep
39
#include "common/status.h"
40
#include "runtime/large_int_value.h"
41
#include "runtime/primitive_type.h"
42
#include "vec/common/int_exp.h"
43
#include "vec/common/string_utils/string_utils.h"
44
#include "vec/core/extended_types.h"
45
#include "vec/data_types/number_traits.h"
46
47
namespace doris {
48
namespace vectorized {
49
template <DecimalNativeTypeConcept T>
50
struct Decimal;
51
} // namespace vectorized
52
53
#define RETURN_INVALID_ARG_IF_NOT(stmt, ...)             \
54
1.57k
    do {                                                 \
55
1.57k
        if (UNLIKELY(!(stmt))) {                         \
56
55
            return Status::InvalidArgument(__VA_ARGS__); \
57
55
        }                                                \
58
1.57k
    } while (false)
59
60
// skip leading and trailing ascii whitespaces,
61
// return the pointer to the first non-whitespace char,
62
// and update the len to the new length, which does not include
63
// leading and trailing whitespaces
64
template <typename T>
65
901k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
66
1.47M
    while (len > 0 && is_whitespace_ascii(*s)) {
67
573k
        ++s;
68
573k
        --len;
69
573k
    }
70
71
1.46M
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
72
563k
        --len;
73
563k
    }
74
75
901k
    return s;
76
901k
}
_ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_
Line
Count
Source
65
520k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
66
688k
    while (len > 0 && is_whitespace_ascii(*s)) {
67
168k
        ++s;
68
168k
        --len;
69
168k
    }
70
71
681k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
72
161k
        --len;
73
161k
    }
74
75
520k
    return s;
76
520k
}
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_
Line
Count
Source
65
353k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
66
686k
    while (len > 0 && is_whitespace_ascii(*s)) {
67
333k
        ++s;
68
333k
        --len;
69
333k
    }
70
71
684k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
72
330k
        --len;
73
330k
    }
74
75
353k
    return s;
76
353k
}
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_
Line
Count
Source
65
27.8k
inline const char* skip_ascii_whitespaces(const char* s, T& len) {
66
100k
    while (len > 0 && is_whitespace_ascii(*s)) {
67
72.4k
        ++s;
68
72.4k
        --len;
69
72.4k
    }
70
71
99.8k
    while (len > 0 && is_whitespace_ascii(s[len - 1])) {
72
72.0k
        --len;
73
72.0k
    }
74
75
27.8k
    return s;
76
27.8k
}
77
78
template <bool (*Pred)(char)>
79
259
bool range_suite(const char* s, const char* end) {
80
259
    return std::ranges::all_of(s, end, Pred);
81
259
}
82
83
inline auto is_digit_range = range_suite<is_numeric_ascii>;
84
85
552
inline Status assert_within_bound(const char* s, const char* end, size_t offset) {
86
552
    if (s + offset >= end) [[unlikely]] {
87
44
        return Status::InvalidArgument(
88
44
                "StringParser: failed because we need at least {} but only got '{}'", offset,
89
44
                std::string {s, end});
90
44
    }
91
508
    return Status::OK();
92
552
}
93
94
// LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more.
95
// if need result, use StringRef{origin_s, s} outside
96
template <int LEN, bool (*Pred)(char)>
97
1.33k
Status skip_qualified_char(const char*& s, const char* end) {
98
1.33k
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
1.75k
        while (s != end && Pred(*s)) {
101
950
            ++s;
102
950
        }
103
807
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
928
        for (int i = 0; i < LEN; ++i, ++s) {
106
497
            if (s == end || !Pred(*s)) [[unlikely]] {
107
66
                return Status::InvalidArgument(
108
66
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
66
                        std::string {s, end});
110
66
            }
111
497
        }
112
497
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
28
        int count = 0;
115
170
        while (s != end && Pred(*s)) {
116
142
            ++s;
117
142
            ++count;
118
142
        }
119
28
        if (count < -LEN) [[unlikely]] {
120
0
            return Status::InvalidArgument(
121
0
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
0
                    -LEN - count, std::string {s, end});
123
0
        }
124
28
    }
125
459
    return Status::OK();
126
1.33k
}
_ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEENS_6StatusERPKcS3_
Line
Count
Source
97
406
Status skip_qualified_char(const char*& s, const char* end) {
98
406
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
421
        while (s != end && Pred(*s)) {
101
15
            ++s;
102
15
        }
103
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
        for (int i = 0; i < LEN; ++i, ++s) {
106
            if (s == end || !Pred(*s)) [[unlikely]] {
107
                return Status::InvalidArgument(
108
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
                        std::string {s, end});
110
            }
111
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
406
    return Status::OK();
126
406
}
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEENS_6StatusERPKcS3_
Line
Count
Source
97
401
Status skip_qualified_char(const char*& s, const char* end) {
98
401
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
1.33k
        while (s != end && Pred(*s)) {
101
935
            ++s;
102
935
        }
103
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
        for (int i = 0; i < LEN; ++i, ++s) {
106
            if (s == end || !Pred(*s)) [[unlikely]] {
107
                return Status::InvalidArgument(
108
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
                        std::string {s, end});
110
            }
111
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
401
    return Status::OK();
126
401
}
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEENS_6StatusERPKcS3_
Line
Count
Source
97
28
Status skip_qualified_char(const char*& s, const char* end) {
98
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
        while (s != end && Pred(*s)) {
101
            ++s;
102
        }
103
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
        for (int i = 0; i < LEN; ++i, ++s) {
106
            if (s == end || !Pred(*s)) [[unlikely]] {
107
                return Status::InvalidArgument(
108
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
                        std::string {s, end});
110
            }
111
        }
112
28
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
28
        int count = 0;
115
170
        while (s != end && Pred(*s)) {
116
142
            ++s;
117
142
            ++count;
118
142
        }
119
28
        if (count < -LEN) [[unlikely]] {
120
0
            return Status::InvalidArgument(
121
0
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
0
                    -LEN - count, std::string {s, end});
123
0
        }
124
28
    }
125
28
    return Status::OK();
126
28
}
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEENS_6StatusERPKcS3_
_ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEENS_6StatusERPKcS3_
Line
Count
Source
97
176
Status skip_qualified_char(const char*& s, const char* end) {
98
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
        while (s != end && Pred(*s)) {
101
            ++s;
102
        }
103
176
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
324
        for (int i = 0; i < LEN; ++i, ++s) {
106
176
            if (s == end || !Pred(*s)) [[unlikely]] {
107
28
                return Status::InvalidArgument(
108
28
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
28
                        std::string {s, end});
110
28
            }
111
176
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
148
    return Status::OK();
126
176
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEENS_6StatusERPKcS3_
Line
Count
Source
97
111
Status skip_qualified_char(const char*& s, const char* end) {
98
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
        while (s != end && Pred(*s)) {
101
            ++s;
102
        }
103
111
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
212
        for (int i = 0; i < LEN; ++i, ++s) {
106
111
            if (s == end || !Pred(*s)) [[unlikely]] {
107
10
                return Status::InvalidArgument(
108
10
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
10
                        std::string {s, end});
110
10
            }
111
111
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
101
    return Status::OK();
126
111
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_6is_barEcEEEENS_6StatusERPKcS3_
Line
Count
Source
97
136
Status skip_qualified_char(const char*& s, const char* end) {
98
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
        while (s != end && Pred(*s)) {
101
            ++s;
102
        }
103
136
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
256
        for (int i = 0; i < LEN; ++i, ++s) {
106
136
            if (s == end || !Pred(*s)) [[unlikely]] {
107
16
                return Status::InvalidArgument(
108
16
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
16
                        std::string {s, end});
110
16
            }
111
136
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
120
    return Status::OK();
126
136
}
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEENS_6StatusERPKcS3_
Line
Count
Source
97
74
Status skip_qualified_char(const char*& s, const char* end) {
98
    if constexpr (LEN == 0) {
99
        // Consume any length of characters that match the predicate.
100
        while (s != end && Pred(*s)) {
101
            ++s;
102
        }
103
74
    } else if constexpr (LEN > 0) {
104
        // Consume exactly LEN characters that match the predicate.
105
136
        for (int i = 0; i < LEN; ++i, ++s) {
106
74
            if (s == end || !Pred(*s)) [[unlikely]] {
107
12
                return Status::InvalidArgument(
108
12
                        "StringParser: failed to consume {} characters, got '{}'", LEN - i,
109
12
                        std::string {s, end});
110
12
            }
111
74
        }
112
    } else { // LEN < 0
113
        // Consume at least -LEN characters that match the predicate.
114
        int count = 0;
115
        while (s != end && Pred(*s)) {
116
            ++s;
117
            ++count;
118
        }
119
        if (count < -LEN) [[unlikely]] {
120
            return Status::InvalidArgument(
121
                    "StringParser: failed to consume at least {} characters, got '{}'",
122
                    -LEN - count, std::string {s, end});
123
        }
124
    }
125
62
    return Status::OK();
126
74
}
127
128
inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>;
129
inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>;
130
inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>;
131
inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>;
132
inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>;
133
134
111
inline bool is_delimiter(char c) {
135
111
    return c == ' ' || c == 'T';
136
111
}
137
inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>;
138
139
136
inline bool is_bar(char c) {
140
136
    return c == '-';
141
136
}
142
inline auto consume_one_bar = skip_qualified_char<1, is_bar>;
143
144
74
inline bool is_colon(char c) {
145
74
    return c == ':';
146
74
}
147
inline auto consume_one_colon = skip_qualified_char<1, is_colon>;
148
149
// only consume a string of digit, not include sign.
150
// when has MAX_LEN > 0, do greedy match but at most MAX_LEN.
151
// LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits.
152
template <typename T, int LEN = 0, int MAX_LEN = -1>
153
1.12k
Status consume_digit(const char*& s, const char* end, T& out) {
154
1.12k
    static_assert(LEN >= 0);
155
1.12k
    if constexpr (MAX_LEN > 0) {
156
653
        out = 0;
157
1.89k
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
158
1.30k
            if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] {
159
69
                if (i < LEN) [[unlikely]] {
160
0
                    return Status::InvalidArgument(
161
0
                            "StringParser: got \"{}\" before get at least {} digit",
162
0
                            std::string {s, end}, LEN - i);
163
0
                }
164
69
                break; // stop consuming if we have consumed enough digits.
165
69
            }
166
1.23k
            out = out * 10 + (*s - '0');
167
1.23k
        }
168
    } else if constexpr (LEN == 0) {
169
        // Consume any length of digits.
170
        out = 0;
171
        while (s != end && is_numeric_ascii(*s)) {
172
            out = out * 10 + (*s - '0');
173
            ++s;
174
        }
175
475
    } else if constexpr (LEN > 0) {
176
        // Consume exactly LEN digits.
177
475
        out = 0;
178
1.37k
        for (int i = 0; i < LEN; ++i, ++s) {
179
927
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
180
29
                return Status::InvalidArgument(
181
29
                        "StringParser: failed to consume {} digits, got '{}'", LEN - i,
182
29
                        std::string {s, end});
183
29
            }
184
898
            out = out * 10 + (*s - '0');
185
898
        }
186
475
    }
187
1.09k
    return Status::OK();
188
1.12k
}
_ZN5doris13consume_digitIjLi2ELin1EEENS_6StatusERPKcS3_RT_
Line
Count
Source
153
469
Status consume_digit(const char*& s, const char* end, T& out) {
154
469
    static_assert(LEN >= 0);
155
    if constexpr (MAX_LEN > 0) {
156
        out = 0;
157
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
158
            if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] {
159
                if (i < LEN) [[unlikely]] {
160
                    return Status::InvalidArgument(
161
                            "StringParser: got \"{}\" before get at least {} digit",
162
                            std::string {s, end}, LEN - i);
163
                }
164
                break; // stop consuming if we have consumed enough digits.
165
            }
166
            out = out * 10 + (*s - '0');
167
        }
168
    } else if constexpr (LEN == 0) {
169
        // Consume any length of digits.
170
        out = 0;
171
        while (s != end && is_numeric_ascii(*s)) {
172
            out = out * 10 + (*s - '0');
173
            ++s;
174
        }
175
469
    } else if constexpr (LEN > 0) {
176
        // Consume exactly LEN digits.
177
469
        out = 0;
178
1.35k
        for (int i = 0; i < LEN; ++i, ++s) {
179
915
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
180
29
                return Status::InvalidArgument(
181
29
                        "StringParser: failed to consume {} digits, got '{}'", LEN - i,
182
29
                        std::string {s, end});
183
29
            }
184
886
            out = out * 10 + (*s - '0');
185
886
        }
186
469
    }
187
440
    return Status::OK();
188
469
}
_ZN5doris13consume_digitIjLi1ELi2EEENS_6StatusERPKcS3_RT_
Line
Count
Source
153
653
Status consume_digit(const char*& s, const char* end, T& out) {
154
653
    static_assert(LEN >= 0);
155
653
    if constexpr (MAX_LEN > 0) {
156
653
        out = 0;
157
1.89k
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
158
1.30k
            if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] {
159
69
                if (i < LEN) [[unlikely]] {
160
0
                    return Status::InvalidArgument(
161
0
                            "StringParser: got \"{}\" before get at least {} digit",
162
0
                            std::string {s, end}, LEN - i);
163
0
                }
164
69
                break; // stop consuming if we have consumed enough digits.
165
69
            }
166
1.23k
            out = out * 10 + (*s - '0');
167
1.23k
        }
168
    } else if constexpr (LEN == 0) {
169
        // Consume any length of digits.
170
        out = 0;
171
        while (s != end && is_numeric_ascii(*s)) {
172
            out = out * 10 + (*s - '0');
173
            ++s;
174
        }
175
    } else if constexpr (LEN > 0) {
176
        // Consume exactly LEN digits.
177
        out = 0;
178
        for (int i = 0; i < LEN; ++i, ++s) {
179
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
180
                return Status::InvalidArgument(
181
                        "StringParser: failed to consume {} digits, got '{}'", LEN - i,
182
                        std::string {s, end});
183
            }
184
            out = out * 10 + (*s - '0');
185
        }
186
    }
187
653
    return Status::OK();
188
653
}
_ZN5doris13consume_digitIjLi1ELin1EEENS_6StatusERPKcS3_RT_
Line
Count
Source
153
4
Status consume_digit(const char*& s, const char* end, T& out) {
154
4
    static_assert(LEN >= 0);
155
    if constexpr (MAX_LEN > 0) {
156
        out = 0;
157
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
158
            if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] {
159
                if (i < LEN) [[unlikely]] {
160
                    return Status::InvalidArgument(
161
                            "StringParser: got \"{}\" before get at least {} digit",
162
                            std::string {s, end}, LEN - i);
163
                }
164
                break; // stop consuming if we have consumed enough digits.
165
            }
166
            out = out * 10 + (*s - '0');
167
        }
168
    } else if constexpr (LEN == 0) {
169
        // Consume any length of digits.
170
        out = 0;
171
        while (s != end && is_numeric_ascii(*s)) {
172
            out = out * 10 + (*s - '0');
173
            ++s;
174
        }
175
4
    } else if constexpr (LEN > 0) {
176
        // Consume exactly LEN digits.
177
4
        out = 0;
178
8
        for (int i = 0; i < LEN; ++i, ++s) {
179
4
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
180
0
                return Status::InvalidArgument(
181
0
                        "StringParser: failed to consume {} digits, got '{}'", LEN - i,
182
0
                        std::string {s, end});
183
0
            }
184
4
            out = out * 10 + (*s - '0');
185
4
        }
186
4
    }
187
4
    return Status::OK();
188
4
}
_ZN5doris13consume_digitIjLi4ELin1EEENS_6StatusERPKcS3_RT_
Line
Count
Source
153
2
Status consume_digit(const char*& s, const char* end, T& out) {
154
2
    static_assert(LEN >= 0);
155
    if constexpr (MAX_LEN > 0) {
156
        out = 0;
157
        for (int i = 0; i < MAX_LEN; ++i, ++s) {
158
            if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] {
159
                if (i < LEN) [[unlikely]] {
160
                    return Status::InvalidArgument(
161
                            "StringParser: got \"{}\" before get at least {} digit",
162
                            std::string {s, end}, LEN - i);
163
                }
164
                break; // stop consuming if we have consumed enough digits.
165
            }
166
            out = out * 10 + (*s - '0');
167
        }
168
    } else if constexpr (LEN == 0) {
169
        // Consume any length of digits.
170
        out = 0;
171
        while (s != end && is_numeric_ascii(*s)) {
172
            out = out * 10 + (*s - '0');
173
            ++s;
174
        }
175
2
    } else if constexpr (LEN > 0) {
176
        // Consume exactly LEN digits.
177
2
        out = 0;
178
10
        for (int i = 0; i < LEN; ++i, ++s) {
179
8
            if (s == end || !is_numeric_ascii(*s)) [[unlikely]] {
180
0
                return Status::InvalidArgument(
181
0
                        "StringParser: failed to consume {} digits, got '{}'", LEN - i,
182
0
                        std::string {s, end});
183
0
            }
184
8
            out = out * 10 + (*s - '0');
185
8
        }
186
2
    }
187
2
    return Status::OK();
188
2
}
189
190
template <bool (*Pred)(char)>
191
28
uint32_t count_valid_length(const char* s, const char* end) {
192
28
    DCHECK(s <= end) << "s: " << s << ", end: " << end;
193
28
    uint32_t count = 0;
194
86
    while (s != end && Pred(*s)) {
195
58
        ++count;
196
58
        ++s;
197
58
    }
198
28
    return count;
199
28
}
200
201
inline auto count_digits = count_valid_length<is_numeric_ascii>;
202
203
20
inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) {
204
20
    std::string result(6, '0');
205
20
    result[0] = sign;
206
20
    result[1] = '0' + (hour_offset / 10);
207
20
    result[2] = '0' + (hour_offset % 10);
208
20
    result[3] = ':';
209
20
    result[4] = '0' + (minute_offset / 10);
210
20
    result[5] = '0' + (minute_offset % 10);
211
20
    DCHECK_EQ(result.size(), 6);
212
20
    return result;
213
20
}
214
215
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
216
// this is significantly faster than libc (atoi/strtol and atof/strtod).
217
//
218
// Strings with leading and trailing whitespaces are accepted.
219
// Branching is heavily optimized for the non-whitespace successful case.
220
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
221
// If that first attempt was unsuccessful, these functions retry the parsing after removing
222
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
223
//
224
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
225
// data type.  This is different from hive, which returns NULL for overflow slots for int types
226
// and inf/-inf for float types.
227
//
228
// Things we tried that did not work:
229
//  - lookup table for converting character to digit
230
// Improvements (TODO):
231
//  - Validate input using _simd_compare_ranges
232
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
233
class StringParser {
234
public:
235
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
236
237
    template <typename T>
238
395k
    static T numeric_limits(bool negative) {
239
395k
        if constexpr (std::is_same_v<T, __int128>) {
240
46.3k
            return negative ? MIN_INT128 : MAX_INT128;
241
349k
        } else {
242
349k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
349k
        }
244
395k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
238
46.3k
    static T numeric_limits(bool negative) {
239
46.3k
        if constexpr (std::is_same_v<T, __int128>) {
240
46.3k
            return negative ? MIN_INT128 : MAX_INT128;
241
        } else {
242
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
        }
244
46.3k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
238
135k
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
135k
        } else {
242
135k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
135k
        }
244
135k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
238
68.3k
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
68.3k
        } else {
242
68.3k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
68.3k
        }
244
68.3k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
238
62.1k
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
62.1k
        } else {
242
62.1k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
62.1k
        }
244
62.1k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
238
82.8k
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
82.8k
        } else {
242
82.8k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
82.8k
        }
244
82.8k
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
238
4
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
4
        } else {
242
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
4
        }
244
4
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
238
4
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
4
        } else {
242
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
4
        }
244
4
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
238
21
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
21
        } else {
242
21
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
21
        }
244
21
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
238
132
    static T numeric_limits(bool negative) {
239
        if constexpr (std::is_same_v<T, __int128>) {
240
            return negative ? MIN_INT128 : MAX_INT128;
241
132
        } else {
242
132
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
243
132
        }
244
132
    }
245
246
    template <typename T>
247
943k
    static T get_scale_multiplier(int scale) {
248
943k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
249
943k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
250
943k
                      "You can only instantiate as int32_t, int64_t, __int128.");
251
943k
        if constexpr (std::is_same_v<T, int32_t>) {
252
133k
            return common::exp10_i32(scale);
253
183k
        } else if constexpr (std::is_same_v<T, int64_t>) {
254
183k
            return common::exp10_i64(scale);
255
239k
        } else if constexpr (std::is_same_v<T, __int128>) {
256
239k
            return common::exp10_i128(scale);
257
387k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
258
387k
            return common::exp10_i256(scale);
259
387k
        }
260
943k
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
247
133k
    static T get_scale_multiplier(int scale) {
248
133k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
249
133k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
250
133k
                      "You can only instantiate as int32_t, int64_t, __int128.");
251
133k
        if constexpr (std::is_same_v<T, int32_t>) {
252
133k
            return common::exp10_i32(scale);
253
        } else if constexpr (std::is_same_v<T, int64_t>) {
254
            return common::exp10_i64(scale);
255
        } else if constexpr (std::is_same_v<T, __int128>) {
256
            return common::exp10_i128(scale);
257
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
258
            return common::exp10_i256(scale);
259
        }
260
133k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
247
183k
    static T get_scale_multiplier(int scale) {
248
183k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
249
183k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
250
183k
                      "You can only instantiate as int32_t, int64_t, __int128.");
251
        if constexpr (std::is_same_v<T, int32_t>) {
252
            return common::exp10_i32(scale);
253
183k
        } else if constexpr (std::is_same_v<T, int64_t>) {
254
183k
            return common::exp10_i64(scale);
255
        } else if constexpr (std::is_same_v<T, __int128>) {
256
            return common::exp10_i128(scale);
257
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
258
            return common::exp10_i256(scale);
259
        }
260
183k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
247
239k
    static T get_scale_multiplier(int scale) {
248
239k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
249
239k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
250
239k
                      "You can only instantiate as int32_t, int64_t, __int128.");
251
        if constexpr (std::is_same_v<T, int32_t>) {
252
            return common::exp10_i32(scale);
253
        } else if constexpr (std::is_same_v<T, int64_t>) {
254
            return common::exp10_i64(scale);
255
239k
        } else if constexpr (std::is_same_v<T, __int128>) {
256
239k
            return common::exp10_i128(scale);
257
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
258
            return common::exp10_i256(scale);
259
        }
260
239k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
247
387k
    static T get_scale_multiplier(int scale) {
248
387k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
249
387k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
250
387k
                      "You can only instantiate as int32_t, int64_t, __int128.");
251
        if constexpr (std::is_same_v<T, int32_t>) {
252
            return common::exp10_i32(scale);
253
        } else if constexpr (std::is_same_v<T, int64_t>) {
254
            return common::exp10_i64(scale);
255
        } else if constexpr (std::is_same_v<T, __int128>) {
256
            return common::exp10_i128(scale);
257
387k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
258
387k
            return common::exp10_i256(scale);
259
387k
        }
260
387k
    }
261
262
    // This is considerably faster than glibc's implementation (25x).
263
    // Assumes s represents a decimal number.
264
    template <typename T, bool enable_strict_mode = false>
265
355k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
355k
        s = skip_ascii_whitespaces(s, len);
267
355k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
355k
    }
_ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
45.4k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
45.4k
        s = skip_ascii_whitespaces(s, len);
267
45.4k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
45.4k
    }
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
95.7k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
95.7k
        s = skip_ascii_whitespaces(s, len);
267
95.7k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
95.7k
    }
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
66.6k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
66.6k
        s = skip_ascii_whitespaces(s, len);
267
66.6k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
66.6k
    }
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
61.6k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
61.6k
        s = skip_ascii_whitespaces(s, len);
267
61.6k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
61.6k
    }
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
81.2k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
81.2k
        s = skip_ascii_whitespaces(s, len);
267
81.2k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
81.2k
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
4
        s = skip_ascii_whitespaces(s, len);
267
4
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
4
    }
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
4
        s = skip_ascii_whitespaces(s, len);
267
4
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
4
    }
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
20
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
20
        s = skip_ascii_whitespaces(s, len);
267
20
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
20
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
1.00k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
1.00k
        s = skip_ascii_whitespaces(s, len);
267
1.00k
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
1.00k
    }
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
984
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
984
        s = skip_ascii_whitespaces(s, len);
267
984
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
984
    }
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
968
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
968
        s = skip_ascii_whitespaces(s, len);
267
968
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
968
    }
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
952
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
952
        s = skip_ascii_whitespaces(s, len);
267
952
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
952
    }
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE
Line
Count
Source
265
936
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
266
936
        s = skip_ascii_whitespaces(s, len);
267
936
        return string_to_int_internal<T, enable_strict_mode>(s, len, result);
268
936
    }
269
270
    // This is considerably faster than glibc's implementation.
271
    // In the case of overflow, the max/min value for the data type will be returned.
272
    // Assumes s represents a decimal number.
273
    template <typename T>
274
1.37k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
275
1.37k
        s = skip_ascii_whitespaces(s, len);
276
1.37k
        return string_to_unsigned_int_internal<T>(s, len, result);
277
1.37k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
274
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
275
343
        s = skip_ascii_whitespaces(s, len);
276
343
        return string_to_unsigned_int_internal<T>(s, len, result);
277
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
274
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
275
343
        s = skip_ascii_whitespaces(s, len);
276
343
        return string_to_unsigned_int_internal<T>(s, len, result);
277
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
274
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
275
343
        s = skip_ascii_whitespaces(s, len);
276
343
        return string_to_unsigned_int_internal<T>(s, len, result);
277
343
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
274
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
275
343
        s = skip_ascii_whitespaces(s, len);
276
343
        return string_to_unsigned_int_internal<T>(s, len, result);
277
343
    }
278
279
    // Convert a string s representing a number in given base into a decimal number.
280
    template <typename T>
281
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
282
27.8k
                                  ParseResult* result) {
283
27.8k
        s = skip_ascii_whitespaces(s, len);
284
27.8k
        return string_to_int_internal<T>(s, len, base, result);
285
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
282
26.4k
                                  ParseResult* result) {
283
26.4k
        s = skip_ascii_whitespaces(s, len);
284
26.4k
        return string_to_int_internal<T>(s, len, base, result);
285
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
282
490
                                  ParseResult* result) {
283
490
        s = skip_ascii_whitespaces(s, len);
284
490
        return string_to_int_internal<T>(s, len, base, result);
285
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
282
441
                                  ParseResult* result) {
283
441
        s = skip_ascii_whitespaces(s, len);
284
441
        return string_to_int_internal<T>(s, len, base, result);
285
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
282
441
                                  ParseResult* result) {
283
441
        s = skip_ascii_whitespaces(s, len);
284
441
        return string_to_int_internal<T>(s, len, base, result);
285
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
282
1
                                  ParseResult* result) {
283
1
        s = skip_ascii_whitespaces(s, len);
284
1
        return string_to_int_internal<T>(s, len, base, result);
285
1
    }
286
287
    template <typename T>
288
153k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
289
153k
        s = skip_ascii_whitespaces(s, len);
290
153k
        return string_to_float_internal<T>(s, len, result);
291
153k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
288
87.7k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
289
87.7k
        s = skip_ascii_whitespaces(s, len);
290
87.7k
        return string_to_float_internal<T>(s, len, result);
291
87.7k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
288
65.3k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
289
65.3k
        s = skip_ascii_whitespaces(s, len);
290
65.3k
        return string_to_float_internal<T>(s, len, result);
291
65.3k
    }
292
293
    // Parses a string for 'true' or 'false', case insensitive.
294
11.7k
    static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) {
295
11.7k
        s = skip_ascii_whitespaces(s, len);
296
11.7k
        return string_to_bool_internal(s, len, result);
297
11.7k
    }
298
299
    template <PrimitiveType P>
300
    static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal(
301
            const char* __restrict s, int len, int type_precision, int type_scale,
302
            ParseResult* result);
303
304
    template <typename T>
305
    static Status split_string_to_map(const std::string& base, const T element_separator,
306
                                      const T key_value_separator,
307
                                      std::map<std::string, std::string>* result) {
308
        int key_pos = 0;
309
        int key_end;
310
        int val_pos;
311
        int val_end;
312
313
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
314
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
315
                std::string::npos) {
316
                break;
317
            }
318
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
319
                val_end = base.size();
320
            }
321
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
322
                                          base.substr(val_pos, val_end - val_pos)));
323
            key_pos = val_end;
324
            if (key_pos != std::string::npos) {
325
                ++key_pos;
326
            }
327
        }
328
329
        return Status::OK();
330
    }
331
332
    // This is considerably faster than glibc's implementation.
333
    // In the case of overflow, the max/min value for the data type will be returned.
334
    // Assumes s represents a decimal number.
335
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
336
    template <typename T, bool enable_strict_mode = false>
337
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
338
339
    // This is considerably faster than glibc's implementation.
340
    // In the case of overflow, the max/min value for the data type will be returned.
341
    // Assumes s represents a decimal number.
342
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
343
    template <typename T>
344
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
345
                                                    ParseResult* result);
346
347
    // Convert a string s representing a number in given base into a decimal number.
348
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
349
    template <typename T>
350
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
351
                                           ParseResult* result);
352
353
    // Converts an ascii string to an integer of type T assuming it cannot overflow
354
    // and the number is positive.
355
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
356
    template <typename T, bool enable_strict_mode = false>
357
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
358
                                              ParseResult* result);
359
360
    // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next
361
    // char is not a digit.
362
    template <typename T>
363
    static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
364
                                                      ParseResult* result);
365
366
    // This is considerably faster than glibc's implementation (>100x why???)
367
    // No special case handling needs to be done for overflows, the floating point spec
368
    // already does it and will cap the values to -inf/inf
369
    // To avoid inaccurate conversions this function falls back to strtod for
370
    // scientific notation.
371
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
372
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
373
    template <typename T>
374
    static inline T string_to_float_internal(const char* __restrict s, int len,
375
                                             ParseResult* result);
376
377
    // parses a string for 'true' or 'false', case insensitive
378
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
379
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
380
                                               ParseResult* result);
381
382
    // Returns true if s only contains whitespace.
383
5.46k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
384
5.91k
        for (int i = 0; i < len; ++i) {
385
5.91k
            if (!LIKELY(is_whitespace_ascii(s[i]))) {
386
5.46k
                return false;
387
5.46k
            }
388
5.91k
        }
389
0
        return true;
390
5.46k
    }
391
392
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
393
3.39k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
394
3.39k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
395
3.39k
    }
396
397
2.62k
    static inline bool is_all_digit(const char* __restrict s, int len) {
398
5.36k
        for (int i = 0; i < len; ++i) {
399
2.81k
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
400
81
                return false;
401
81
            }
402
2.81k
        }
403
2.54k
        return true;
404
2.62k
    }
405
}; // end of class StringParser
406
407
template <typename T, bool enable_strict_mode>
408
355k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
355k
    if (UNLIKELY(len <= 0)) {
410
1.33k
        *result = PARSE_FAILURE;
411
1.33k
        return 0;
412
1.33k
    }
413
414
354k
    using UnsignedT = MakeUnsignedT<T>;
415
354k
    UnsignedT val = 0;
416
354k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
354k
    bool negative = false;
418
354k
    int i = 0;
419
354k
    switch (*s) {
420
93.4k
    case '-':
421
93.4k
        negative = true;
422
93.4k
        max_val += 1;
423
93.4k
        [[fallthrough]];
424
96.1k
    case '+':
425
96.1k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
96.1k
        if (UNLIKELY(len == 1)) {
428
1
            *result = PARSE_FAILURE;
429
1
            return 0;
430
1
        }
431
354k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
354k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
244k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
244k
        return static_cast<T>(negative ? -val : val);
437
244k
    }
438
439
109k
    const T max_div_10 = max_val / 10;
440
109k
    const T max_mod_10 = max_val % 10;
441
442
109k
    int first = i;
443
1.48M
    for (; i < len; ++i) {
444
1.41M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
1.40M
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
1.40M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
31.1k
                *result = PARSE_OVERFLOW;
449
31.1k
                return negative ? -max_val : max_val;
450
31.1k
            }
451
1.37M
            val = val * 10 + digit;
452
1.37M
        } else {
453
3.60k
            if constexpr (enable_strict_mode) {
454
1.10k
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
1.10k
                    *result = PARSE_FAILURE;
457
1.10k
                    return 0;
458
1.10k
                }
459
2.49k
            } else {
460
2.49k
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
2.49k
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
1.48k
                    *result = PARSE_FAILURE;
465
1.48k
                    return 0;
466
1.48k
                }
467
2.49k
            }
468
            // Returning here is slightly faster than breaking the loop.
469
1.00k
            *result = PARSE_SUCCESS;
470
3.60k
            return static_cast<T>(negative ? -val : val);
471
3.60k
        }
472
1.41M
    }
473
75.1k
    *result = PARSE_SUCCESS;
474
75.1k
    return static_cast<T>(negative ? -val : val);
475
109k
}
_ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
45.4k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
45.4k
    if (UNLIKELY(len <= 0)) {
410
25
        *result = PARSE_FAILURE;
411
25
        return 0;
412
25
    }
413
414
45.3k
    using UnsignedT = MakeUnsignedT<T>;
415
45.3k
    UnsignedT val = 0;
416
45.3k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
45.3k
    bool negative = false;
418
45.3k
    int i = 0;
419
45.3k
    switch (*s) {
420
3.47k
    case '-':
421
3.47k
        negative = true;
422
3.47k
        max_val += 1;
423
3.47k
        [[fallthrough]];
424
3.70k
    case '+':
425
3.70k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
3.70k
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
45.3k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
45.3k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
41.2k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
41.2k
        return static_cast<T>(negative ? -val : val);
437
41.2k
    }
438
439
4.18k
    const T max_div_10 = max_val / 10;
440
4.18k
    const T max_mod_10 = max_val % 10;
441
442
4.18k
    int first = i;
443
166k
    for (; i < len; ++i) {
444
162k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
162k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
162k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
339
                *result = PARSE_OVERFLOW;
449
339
                return negative ? -max_val : max_val;
450
339
            }
451
162k
            val = val * 10 + digit;
452
162k
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
185
            } else {
460
185
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
185
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
57
                    *result = PARSE_FAILURE;
465
57
                    return 0;
466
57
                }
467
185
            }
468
            // Returning here is slightly faster than breaking the loop.
469
128
            *result = PARSE_SUCCESS;
470
185
            return static_cast<T>(negative ? -val : val);
471
185
        }
472
162k
    }
473
3.65k
    *result = PARSE_SUCCESS;
474
3.65k
    return static_cast<T>(negative ? -val : val);
475
4.18k
}
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
95.7k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
95.7k
    if (UNLIKELY(len <= 0)) {
410
207
        *result = PARSE_FAILURE;
411
207
        return 0;
412
207
    }
413
414
95.5k
    using UnsignedT = MakeUnsignedT<T>;
415
95.5k
    UnsignedT val = 0;
416
95.5k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
95.5k
    bool negative = false;
418
95.5k
    int i = 0;
419
95.5k
    switch (*s) {
420
20.0k
    case '-':
421
20.0k
        negative = true;
422
20.0k
        max_val += 1;
423
20.0k
        [[fallthrough]];
424
20.3k
    case '+':
425
20.3k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
20.3k
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
95.5k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
95.5k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
68.6k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
68.6k
        return static_cast<T>(negative ? -val : val);
437
68.6k
    }
438
439
26.8k
    const T max_div_10 = max_val / 10;
440
26.8k
    const T max_mod_10 = max_val % 10;
441
442
26.8k
    int first = i;
443
90.7k
    for (; i < len; ++i) {
444
81.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
80.1k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
80.1k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
16.2k
                *result = PARSE_OVERFLOW;
449
16.2k
                return negative ? -max_val : max_val;
450
16.2k
            }
451
63.8k
            val = val * 10 + digit;
452
63.8k
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
1.13k
            } else {
460
1.13k
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
1.13k
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
783
                    *result = PARSE_FAILURE;
465
783
                    return 0;
466
783
                }
467
1.13k
            }
468
            // Returning here is slightly faster than breaking the loop.
469
352
            *result = PARSE_SUCCESS;
470
1.13k
            return static_cast<T>(negative ? -val : val);
471
1.13k
        }
472
81.2k
    }
473
9.45k
    *result = PARSE_SUCCESS;
474
9.45k
    return static_cast<T>(negative ? -val : val);
475
26.8k
}
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
66.6k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
66.6k
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
66.6k
    using UnsignedT = MakeUnsignedT<T>;
415
66.6k
    UnsignedT val = 0;
416
66.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
66.6k
    bool negative = false;
418
66.6k
    int i = 0;
419
66.6k
    switch (*s) {
420
10.5k
    case '-':
421
10.5k
        negative = true;
422
10.5k
        max_val += 1;
423
10.5k
        [[fallthrough]];
424
10.8k
    case '+':
425
10.8k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
10.8k
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
66.6k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
66.6k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
50.4k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
50.4k
        return static_cast<T>(negative ? -val : val);
437
50.4k
    }
438
439
16.2k
    const T max_div_10 = max_val / 10;
440
16.2k
    const T max_mod_10 = max_val % 10;
441
442
16.2k
    int first = i;
443
90.7k
    for (; i < len; ++i) {
444
81.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
81.3k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
81.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
6.80k
                *result = PARSE_OVERFLOW;
449
6.80k
                return negative ? -max_val : max_val;
450
6.80k
            }
451
74.5k
            val = val * 10 + digit;
452
74.5k
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
405
            } else {
460
405
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
405
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
133
                    *result = PARSE_FAILURE;
465
133
                    return 0;
466
133
                }
467
405
            }
468
            // Returning here is slightly faster than breaking the loop.
469
272
            *result = PARSE_SUCCESS;
470
405
            return static_cast<T>(negative ? -val : val);
471
405
        }
472
81.7k
    }
473
8.99k
    *result = PARSE_SUCCESS;
474
8.99k
    return static_cast<T>(negative ? -val : val);
475
16.2k
}
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
61.6k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
61.6k
    if (UNLIKELY(len <= 0)) {
410
1.05k
        *result = PARSE_FAILURE;
411
1.05k
        return 0;
412
1.05k
    }
413
414
60.6k
    using UnsignedT = MakeUnsignedT<T>;
415
60.6k
    UnsignedT val = 0;
416
60.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
60.6k
    bool negative = false;
418
60.6k
    int i = 0;
419
60.6k
    switch (*s) {
420
8.49k
    case '-':
421
8.49k
        negative = true;
422
8.49k
        max_val += 1;
423
8.49k
        [[fallthrough]];
424
8.84k
    case '+':
425
8.84k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
8.84k
        if (UNLIKELY(len == 1)) {
428
1
            *result = PARSE_FAILURE;
429
1
            return 0;
430
1
        }
431
60.6k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
60.6k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
50.9k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
50.9k
        return static_cast<T>(negative ? -val : val);
437
50.9k
    }
438
439
9.65k
    const T max_div_10 = max_val / 10;
440
9.65k
    const T max_mod_10 = max_val % 10;
441
442
9.65k
    int first = i;
443
99.6k
    for (; i < len; ++i) {
444
93.7k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
93.1k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
93.1k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
3.11k
                *result = PARSE_OVERFLOW;
449
3.11k
                return negative ? -max_val : max_val;
450
3.11k
            }
451
90.0k
            val = val * 10 + digit;
452
90.0k
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
573
            } else {
460
573
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
573
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
445
                    *result = PARSE_FAILURE;
465
445
                    return 0;
466
445
                }
467
573
            }
468
            // Returning here is slightly faster than breaking the loop.
469
128
            *result = PARSE_SUCCESS;
470
573
            return static_cast<T>(negative ? -val : val);
471
573
        }
472
93.7k
    }
473
5.96k
    *result = PARSE_SUCCESS;
474
5.96k
    return static_cast<T>(negative ? -val : val);
475
9.65k
}
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
81.2k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
81.2k
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
81.2k
    using UnsignedT = MakeUnsignedT<T>;
415
81.2k
    UnsignedT val = 0;
416
81.2k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
81.2k
    bool negative = false;
418
81.2k
    int i = 0;
419
81.2k
    switch (*s) {
420
48.7k
    case '-':
421
48.7k
        negative = true;
422
48.7k
        max_val += 1;
423
48.7k
        [[fallthrough]];
424
49.0k
    case '+':
425
49.0k
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
49.0k
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
81.2k
    }
432
433
    // This is the fast path where the string cannot overflow.
434
81.2k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
31.5k
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
31.5k
        return static_cast<T>(negative ? -val : val);
437
31.5k
    }
438
439
49.7k
    const T max_div_10 = max_val / 10;
440
49.7k
    const T max_mod_10 = max_val % 10;
441
442
49.7k
    int first = i;
443
991k
    for (; i < len; ++i) {
444
944k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
944k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
944k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
2.81k
                *result = PARSE_OVERFLOW;
449
2.81k
                return negative ? -max_val : max_val;
450
2.81k
            }
451
941k
            val = val * 10 + digit;
452
941k
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
199
            } else {
460
199
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
199
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
71
                    *result = PARSE_FAILURE;
465
71
                    return 0;
466
71
                }
467
199
            }
468
            // Returning here is slightly faster than breaking the loop.
469
128
            *result = PARSE_SUCCESS;
470
199
            return static_cast<T>(negative ? -val : val);
471
199
        }
472
944k
    }
473
46.7k
    *result = PARSE_SUCCESS;
474
46.7k
    return static_cast<T>(negative ? -val : val);
475
49.7k
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
4
    if (UNLIKELY(len <= 0)) {
410
0
        *result = PARSE_FAILURE;
411
0
        return 0;
412
0
    }
413
414
4
    using UnsignedT = MakeUnsignedT<T>;
415
4
    UnsignedT val = 0;
416
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
4
    bool negative = false;
418
4
    int i = 0;
419
4
    switch (*s) {
420
0
    case '-':
421
0
        negative = true;
422
0
        max_val += 1;
423
0
        [[fallthrough]];
424
0
    case '+':
425
0
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
0
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
4
    }
432
433
    // This is the fast path where the string cannot overflow.
434
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
4
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
4
        return static_cast<T>(negative ? -val : val);
437
4
    }
438
439
0
    const T max_div_10 = max_val / 10;
440
0
    const T max_mod_10 = max_val % 10;
441
442
0
    int first = i;
443
0
    for (; i < len; ++i) {
444
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
0
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
0
                *result = PARSE_OVERFLOW;
449
0
                return negative ? -max_val : max_val;
450
0
            }
451
0
            val = val * 10 + digit;
452
0
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
0
            } else {
460
0
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
0
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
0
                    *result = PARSE_FAILURE;
465
0
                    return 0;
466
0
                }
467
0
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
0
            return static_cast<T>(negative ? -val : val);
471
0
        }
472
0
    }
473
0
    *result = PARSE_SUCCESS;
474
0
    return static_cast<T>(negative ? -val : val);
475
0
}
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
4
    if (UNLIKELY(len <= 0)) {
410
0
        *result = PARSE_FAILURE;
411
0
        return 0;
412
0
    }
413
414
4
    using UnsignedT = MakeUnsignedT<T>;
415
4
    UnsignedT val = 0;
416
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
4
    bool negative = false;
418
4
    int i = 0;
419
4
    switch (*s) {
420
0
    case '-':
421
0
        negative = true;
422
0
        max_val += 1;
423
0
        [[fallthrough]];
424
0
    case '+':
425
0
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
0
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
4
    }
432
433
    // This is the fast path where the string cannot overflow.
434
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
0
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
0
        return static_cast<T>(negative ? -val : val);
437
0
    }
438
439
4
    const T max_div_10 = max_val / 10;
440
4
    const T max_mod_10 = max_val % 10;
441
442
4
    int first = i;
443
84
    for (; i < len; ++i) {
444
80
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
80
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
80
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
0
                *result = PARSE_OVERFLOW;
449
0
                return negative ? -max_val : max_val;
450
0
            }
451
80
            val = val * 10 + digit;
452
80
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
0
            } else {
460
0
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
0
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
0
                    *result = PARSE_FAILURE;
465
0
                    return 0;
466
0
                }
467
0
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
0
            return static_cast<T>(negative ? -val : val);
471
0
        }
472
80
    }
473
4
    *result = PARSE_SUCCESS;
474
4
    return static_cast<T>(negative ? -val : val);
475
4
}
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
20
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
20
    if (UNLIKELY(len <= 0)) {
410
0
        *result = PARSE_FAILURE;
411
0
        return 0;
412
0
    }
413
414
20
    using UnsignedT = MakeUnsignedT<T>;
415
20
    UnsignedT val = 0;
416
20
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
20
    bool negative = false;
418
20
    int i = 0;
419
20
    switch (*s) {
420
0
    case '-':
421
0
        negative = true;
422
0
        max_val += 1;
423
0
        [[fallthrough]];
424
0
    case '+':
425
0
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
0
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
20
    }
432
433
    // This is the fast path where the string cannot overflow.
434
20
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
20
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
20
        return static_cast<T>(negative ? -val : val);
437
20
    }
438
439
0
    const T max_div_10 = max_val / 10;
440
0
    const T max_mod_10 = max_val % 10;
441
442
0
    int first = i;
443
0
    for (; i < len; ++i) {
444
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
0
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
0
                *result = PARSE_OVERFLOW;
449
0
                return negative ? -max_val : max_val;
450
0
            }
451
0
            val = val * 10 + digit;
452
0
        } else {
453
            if constexpr (enable_strict_mode) {
454
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
                    *result = PARSE_FAILURE;
457
                    return 0;
458
                }
459
0
            } else {
460
0
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
0
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
0
                    *result = PARSE_FAILURE;
465
0
                    return 0;
466
0
                }
467
0
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
0
            return static_cast<T>(negative ? -val : val);
471
0
        }
472
0
    }
473
0
    *result = PARSE_SUCCESS;
474
0
    return static_cast<T>(negative ? -val : val);
475
0
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
1.00k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
1.00k
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
993
    using UnsignedT = MakeUnsignedT<T>;
415
993
    UnsignedT val = 0;
416
993
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
993
    bool negative = false;
418
993
    int i = 0;
419
993
    switch (*s) {
420
446
    case '-':
421
446
        negative = true;
422
446
        max_val += 1;
423
446
        [[fallthrough]];
424
697
    case '+':
425
697
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
697
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
993
    }
432
433
    // This is the fast path where the string cannot overflow.
434
993
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
51
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
51
        return static_cast<T>(negative ? -val : val);
437
51
    }
438
439
942
    const T max_div_10 = max_val / 10;
440
942
    const T max_mod_10 = max_val % 10;
441
442
942
    int first = i;
443
4.25k
    for (; i < len; ++i) {
444
4.12k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
3.71k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
3.71k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
400
                *result = PARSE_OVERFLOW;
449
400
                return negative ? -max_val : max_val;
450
400
            }
451
3.31k
            val = val * 10 + digit;
452
3.31k
        } else {
453
406
            if constexpr (enable_strict_mode) {
454
406
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
406
                    *result = PARSE_FAILURE;
457
406
                    return 0;
458
406
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
406
            return static_cast<T>(negative ? -val : val);
471
406
        }
472
4.12k
    }
473
136
    *result = PARSE_SUCCESS;
474
136
    return static_cast<T>(negative ? -val : val);
475
942
}
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
984
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
984
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
977
    using UnsignedT = MakeUnsignedT<T>;
415
977
    UnsignedT val = 0;
416
977
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
977
    bool negative = false;
418
977
    int i = 0;
419
977
    switch (*s) {
420
438
    case '-':
421
438
        negative = true;
422
438
        max_val += 1;
423
438
        [[fallthrough]];
424
685
    case '+':
425
685
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
685
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
977
    }
432
433
    // This is the fast path where the string cannot overflow.
434
977
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
203
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
203
        return static_cast<T>(negative ? -val : val);
437
203
    }
438
439
774
    const T max_div_10 = max_val / 10;
440
774
    const T max_mod_10 = max_val % 10;
441
442
774
    int first = i;
443
4.92k
    for (; i < len; ++i) {
444
4.84k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
4.53k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
4.53k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
384
                *result = PARSE_OVERFLOW;
449
384
                return negative ? -max_val : max_val;
450
384
            }
451
4.14k
            val = val * 10 + digit;
452
4.14k
        } else {
453
310
            if constexpr (enable_strict_mode) {
454
310
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
310
                    *result = PARSE_FAILURE;
457
310
                    return 0;
458
310
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
310
            return static_cast<T>(negative ? -val : val);
471
310
        }
472
4.84k
    }
473
80
    *result = PARSE_SUCCESS;
474
80
    return static_cast<T>(negative ? -val : val);
475
774
}
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
968
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
968
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
961
    using UnsignedT = MakeUnsignedT<T>;
415
961
    UnsignedT val = 0;
416
961
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
961
    bool negative = false;
418
961
    int i = 0;
419
961
    switch (*s) {
420
430
    case '-':
421
430
        negative = true;
422
430
        max_val += 1;
423
430
        [[fallthrough]];
424
673
    case '+':
425
673
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
673
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
961
    }
432
433
    // This is the fast path where the string cannot overflow.
434
961
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
399
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
399
        return static_cast<T>(negative ? -val : val);
437
399
    }
438
439
562
    const T max_div_10 = max_val / 10;
440
562
    const T max_mod_10 = max_val % 10;
441
442
562
    int first = i;
443
6.65k
    for (; i < len; ++i) {
444
6.58k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
6.45k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
6.45k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
368
                *result = PARSE_OVERFLOW;
449
368
                return negative ? -max_val : max_val;
450
368
            }
451
6.08k
            val = val * 10 + digit;
452
6.08k
        } else {
453
130
            if constexpr (enable_strict_mode) {
454
130
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
130
                    *result = PARSE_FAILURE;
457
130
                    return 0;
458
130
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
130
            return static_cast<T>(negative ? -val : val);
471
130
        }
472
6.58k
    }
473
64
    *result = PARSE_SUCCESS;
474
64
    return static_cast<T>(negative ? -val : val);
475
562
}
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
952
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
952
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
945
    using UnsignedT = MakeUnsignedT<T>;
415
945
    UnsignedT val = 0;
416
945
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
945
    bool negative = false;
418
945
    int i = 0;
419
945
    switch (*s) {
420
422
    case '-':
421
422
        negative = true;
422
422
        max_val += 1;
423
422
        [[fallthrough]];
424
661
    case '+':
425
661
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
661
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
945
    }
432
433
    // This is the fast path where the string cannot overflow.
434
945
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
400
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
400
        return static_cast<T>(negative ? -val : val);
437
400
    }
438
439
545
    const T max_div_10 = max_val / 10;
440
545
    const T max_mod_10 = max_val % 10;
441
442
545
    int first = i;
443
11.5k
    for (; i < len; ++i) {
444
11.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
11.3k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
11.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
352
                *result = PARSE_OVERFLOW;
449
352
                return negative ? -max_val : max_val;
450
352
            }
451
10.9k
            val = val * 10 + digit;
452
10.9k
        } else {
453
129
            if constexpr (enable_strict_mode) {
454
129
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
129
                    *result = PARSE_FAILURE;
457
129
                    return 0;
458
129
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
129
            return static_cast<T>(negative ? -val : val);
471
129
        }
472
11.4k
    }
473
64
    *result = PARSE_SUCCESS;
474
64
    return static_cast<T>(negative ? -val : val);
475
545
}
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
936
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
936
    if (UNLIKELY(len <= 0)) {
410
7
        *result = PARSE_FAILURE;
411
7
        return 0;
412
7
    }
413
414
929
    using UnsignedT = MakeUnsignedT<T>;
415
929
    UnsignedT val = 0;
416
929
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
929
    bool negative = false;
418
929
    int i = 0;
419
929
    switch (*s) {
420
414
    case '-':
421
414
        negative = true;
422
414
        max_val += 1;
423
414
        [[fallthrough]];
424
649
    case '+':
425
649
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
649
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
929
    }
432
433
    // This is the fast path where the string cannot overflow.
434
929
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
401
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
401
        return static_cast<T>(negative ? -val : val);
437
401
    }
438
439
528
    const T max_div_10 = max_val / 10;
440
528
    const T max_mod_10 = max_val % 10;
441
442
528
    int first = i;
443
21.5k
    for (; i < len; ++i) {
444
21.5k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
21.3k
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
21.3k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
336
                *result = PARSE_OVERFLOW;
449
336
                return negative ? -max_val : max_val;
450
336
            }
451
21.0k
            val = val * 10 + digit;
452
21.0k
        } else {
453
128
            if constexpr (enable_strict_mode) {
454
128
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
128
                    *result = PARSE_FAILURE;
457
128
                    return 0;
458
128
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
128
            return static_cast<T>(negative ? -val : val);
471
128
        }
472
21.5k
    }
473
64
    *result = PARSE_SUCCESS;
474
64
    return static_cast<T>(negative ? -val : val);
475
528
}
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
408
132
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
409
132
    if (UNLIKELY(len <= 0)) {
410
0
        *result = PARSE_FAILURE;
411
0
        return 0;
412
0
    }
413
414
132
    using UnsignedT = MakeUnsignedT<T>;
415
132
    UnsignedT val = 0;
416
132
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
417
132
    bool negative = false;
418
132
    int i = 0;
419
132
    switch (*s) {
420
0
    case '-':
421
0
        negative = true;
422
0
        max_val += 1;
423
0
        [[fallthrough]];
424
0
    case '+':
425
0
        ++i;
426
        // only one '+'/'-' char, so could return failure directly
427
0
        if (UNLIKELY(len == 1)) {
428
0
            *result = PARSE_FAILURE;
429
0
            return 0;
430
0
        }
431
132
    }
432
433
    // This is the fast path where the string cannot overflow.
434
132
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
435
132
        val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result);
436
132
        return static_cast<T>(negative ? -val : val);
437
132
    }
438
439
0
    const T max_div_10 = max_val / 10;
440
0
    const T max_mod_10 = max_val % 10;
441
442
0
    int first = i;
443
0
    for (; i < len; ++i) {
444
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
445
0
            T digit = s[i] - '0';
446
            // This is a tricky check to see if adding this digit will cause an overflow.
447
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
448
0
                *result = PARSE_OVERFLOW;
449
0
                return negative ? -max_val : max_val;
450
0
            }
451
0
            val = val * 10 + digit;
452
0
        } else {
453
0
            if constexpr (enable_strict_mode) {
454
0
                if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
455
                    // Reject the string because the remaining chars are not all whitespace
456
0
                    *result = PARSE_FAILURE;
457
0
                    return 0;
458
0
                }
459
            } else {
460
                if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
461
                                             !is_float_suffix(s + i, len - i))))) {
462
                    // Reject the string because either the first char was not a digit,
463
                    // or the remaining chars are not all whitespace
464
                    *result = PARSE_FAILURE;
465
                    return 0;
466
                }
467
            }
468
            // Returning here is slightly faster than breaking the loop.
469
0
            *result = PARSE_SUCCESS;
470
0
            return static_cast<T>(negative ? -val : val);
471
0
        }
472
0
    }
473
0
    *result = PARSE_SUCCESS;
474
0
    return static_cast<T>(negative ? -val : val);
475
0
}
476
477
template <typename T>
478
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
479
1.37k
                                                ParseResult* result) {
480
1.37k
    if (UNLIKELY(len <= 0)) {
481
0
        *result = PARSE_FAILURE;
482
0
        return 0;
483
0
    }
484
485
1.37k
    T val = 0;
486
1.37k
    T max_val = std::numeric_limits<T>::max();
487
1.37k
    int i = 0;
488
489
1.37k
    using signedT = MakeSignedT<T>;
490
    // This is the fast path where the string cannot overflow.
491
1.37k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
492
784
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
493
784
        return val;
494
784
    }
495
496
588
    const T max_div_10 = max_val / 10;
497
588
    const T max_mod_10 = max_val % 10;
498
499
588
    int first = i;
500
4.65k
    for (; i < len; ++i) {
501
4.31k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
502
4.26k
            T digit = s[i] - '0';
503
            // This is a tricky check to see if adding this digit will cause an overflow.
504
4.26k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
505
196
                *result = PARSE_OVERFLOW;
506
196
                return max_val;
507
196
            }
508
4.06k
            val = val * 10 + digit;
509
4.06k
        } else {
510
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
511
                // Reject the string because either the first char was not a digit,
512
                // or the remaining chars are not all whitespace
513
49
                *result = PARSE_FAILURE;
514
49
                return 0;
515
49
            }
516
            // Returning here is slightly faster than breaking the loop.
517
0
            *result = PARSE_SUCCESS;
518
0
            return val;
519
49
        }
520
4.31k
    }
521
343
    *result = PARSE_SUCCESS;
522
343
    return val;
523
588
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
479
343
                                                ParseResult* result) {
480
343
    if (UNLIKELY(len <= 0)) {
481
0
        *result = PARSE_FAILURE;
482
0
        return 0;
483
0
    }
484
485
343
    T val = 0;
486
343
    T max_val = std::numeric_limits<T>::max();
487
343
    int i = 0;
488
489
343
    using signedT = MakeSignedT<T>;
490
    // This is the fast path where the string cannot overflow.
491
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
492
98
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
493
98
        return val;
494
98
    }
495
496
245
    const T max_div_10 = max_val / 10;
497
245
    const T max_mod_10 = max_val % 10;
498
499
245
    int first = i;
500
784
    for (; i < len; ++i) {
501
637
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
502
588
            T digit = s[i] - '0';
503
            // This is a tricky check to see if adding this digit will cause an overflow.
504
588
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
505
49
                *result = PARSE_OVERFLOW;
506
49
                return max_val;
507
49
            }
508
539
            val = val * 10 + digit;
509
539
        } else {
510
49
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
511
                // Reject the string because either the first char was not a digit,
512
                // or the remaining chars are not all whitespace
513
49
                *result = PARSE_FAILURE;
514
49
                return 0;
515
49
            }
516
            // Returning here is slightly faster than breaking the loop.
517
0
            *result = PARSE_SUCCESS;
518
0
            return val;
519
49
        }
520
637
    }
521
147
    *result = PARSE_SUCCESS;
522
147
    return val;
523
245
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
479
343
                                                ParseResult* result) {
480
343
    if (UNLIKELY(len <= 0)) {
481
0
        *result = PARSE_FAILURE;
482
0
        return 0;
483
0
    }
484
485
343
    T val = 0;
486
343
    T max_val = std::numeric_limits<T>::max();
487
343
    int i = 0;
488
489
343
    using signedT = MakeSignedT<T>;
490
    // This is the fast path where the string cannot overflow.
491
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
492
196
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
493
196
        return val;
494
196
    }
495
496
147
    const T max_div_10 = max_val / 10;
497
147
    const T max_mod_10 = max_val % 10;
498
499
147
    int first = i;
500
833
    for (; i < len; ++i) {
501
735
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
502
735
            T digit = s[i] - '0';
503
            // This is a tricky check to see if adding this digit will cause an overflow.
504
735
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
505
49
                *result = PARSE_OVERFLOW;
506
49
                return max_val;
507
49
            }
508
686
            val = val * 10 + digit;
509
686
        } else {
510
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
511
                // Reject the string because either the first char was not a digit,
512
                // or the remaining chars are not all whitespace
513
0
                *result = PARSE_FAILURE;
514
0
                return 0;
515
0
            }
516
            // Returning here is slightly faster than breaking the loop.
517
0
            *result = PARSE_SUCCESS;
518
0
            return val;
519
0
        }
520
735
    }
521
98
    *result = PARSE_SUCCESS;
522
98
    return val;
523
147
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
479
343
                                                ParseResult* result) {
480
343
    if (UNLIKELY(len <= 0)) {
481
0
        *result = PARSE_FAILURE;
482
0
        return 0;
483
0
    }
484
485
343
    T val = 0;
486
343
    T max_val = std::numeric_limits<T>::max();
487
343
    int i = 0;
488
489
343
    using signedT = MakeSignedT<T>;
490
    // This is the fast path where the string cannot overflow.
491
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
492
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
493
245
        return val;
494
245
    }
495
496
98
    const T max_div_10 = max_val / 10;
497
98
    const T max_mod_10 = max_val % 10;
498
499
98
    int first = i;
500
1.02k
    for (; i < len; ++i) {
501
980
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
502
980
            T digit = s[i] - '0';
503
            // This is a tricky check to see if adding this digit will cause an overflow.
504
980
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
505
49
                *result = PARSE_OVERFLOW;
506
49
                return max_val;
507
49
            }
508
931
            val = val * 10 + digit;
509
931
        } else {
510
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
511
                // Reject the string because either the first char was not a digit,
512
                // or the remaining chars are not all whitespace
513
0
                *result = PARSE_FAILURE;
514
0
                return 0;
515
0
            }
516
            // Returning here is slightly faster than breaking the loop.
517
0
            *result = PARSE_SUCCESS;
518
0
            return val;
519
0
        }
520
980
    }
521
49
    *result = PARSE_SUCCESS;
522
49
    return val;
523
98
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
479
343
                                                ParseResult* result) {
480
343
    if (UNLIKELY(len <= 0)) {
481
0
        *result = PARSE_FAILURE;
482
0
        return 0;
483
0
    }
484
485
343
    T val = 0;
486
343
    T max_val = std::numeric_limits<T>::max();
487
343
    int i = 0;
488
489
343
    using signedT = MakeSignedT<T>;
490
    // This is the fast path where the string cannot overflow.
491
343
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
492
245
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
493
245
        return val;
494
245
    }
495
496
98
    const T max_div_10 = max_val / 10;
497
98
    const T max_mod_10 = max_val % 10;
498
499
98
    int first = i;
500
2.00k
    for (; i < len; ++i) {
501
1.96k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
502
1.96k
            T digit = s[i] - '0';
503
            // This is a tricky check to see if adding this digit will cause an overflow.
504
1.96k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
505
49
                *result = PARSE_OVERFLOW;
506
49
                return max_val;
507
49
            }
508
1.91k
            val = val * 10 + digit;
509
1.91k
        } else {
510
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
511
                // Reject the string because either the first char was not a digit,
512
                // or the remaining chars are not all whitespace
513
0
                *result = PARSE_FAILURE;
514
0
                return 0;
515
0
            }
516
            // Returning here is slightly faster than breaking the loop.
517
0
            *result = PARSE_SUCCESS;
518
0
            return val;
519
0
        }
520
1.96k
    }
521
49
    *result = PARSE_SUCCESS;
522
49
    return val;
523
98
}
524
525
template <typename T>
526
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
527
27.8k
                                       ParseResult* result) {
528
27.8k
    using UnsignedT = MakeUnsignedT<T>;
529
27.8k
    UnsignedT val = 0;
530
27.8k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
27.8k
    bool negative = false;
532
27.8k
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
27.8k
    int i = 0;
537
27.8k
    switch (*s) {
538
13.4k
    case '-':
539
13.4k
        negative = true;
540
13.4k
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
13.4k
        [[fallthrough]];
542
13.7k
    case '+':
543
13.7k
        i = 1;
544
27.8k
    }
545
546
27.8k
    const T max_div_base = max_val / base;
547
27.8k
    const T max_mod_base = max_val % base;
548
549
27.8k
    int first = i;
550
90.9k
    for (; i < len; ++i) {
551
76.6k
        T digit;
552
76.6k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
75.7k
            digit = s[i] - '0';
554
75.7k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
639
            digit = (s[i] - 'a' + 10);
556
639
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
98
            digit = (s[i] - 'A' + 10);
558
147
        } else {
559
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
147
                *result = PARSE_FAILURE;
563
147
                return 0;
564
147
            }
565
            // skip trailing whitespace.
566
0
            break;
567
147
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
76.4k
        if (digit >= base) {
571
392
            break;
572
392
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
76.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
12.9k
            *result = PARSE_OVERFLOW;
577
12.9k
            return static_cast<T>(negative ? -max_val : max_val);
578
12.9k
        }
579
63.1k
        val = val * base + digit;
580
63.1k
    }
581
14.7k
    *result = PARSE_SUCCESS;
582
14.7k
    return static_cast<T>(negative ? -val : val);
583
27.8k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
527
26.4k
                                       ParseResult* result) {
528
26.4k
    using UnsignedT = MakeUnsignedT<T>;
529
26.4k
    UnsignedT val = 0;
530
26.4k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
26.4k
    bool negative = false;
532
26.4k
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
26.4k
    int i = 0;
537
26.4k
    switch (*s) {
538
12.8k
    case '-':
539
12.8k
        negative = true;
540
12.8k
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
12.8k
        [[fallthrough]];
542
12.9k
    case '+':
543
12.9k
        i = 1;
544
26.4k
    }
545
546
26.4k
    const T max_div_base = max_val / base;
547
26.4k
    const T max_mod_base = max_val % base;
548
549
26.4k
    int first = i;
550
80.7k
    for (; i < len; ++i) {
551
67.4k
        T digit;
552
67.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
66.6k
            digit = s[i] - '0';
554
66.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
539
            digit = (s[i] - 'a' + 10);
556
539
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
98
            digit = (s[i] - 'A' + 10);
558
147
        } else {
559
147
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
147
                *result = PARSE_FAILURE;
563
147
                return 0;
564
147
            }
565
            // skip trailing whitespace.
566
0
            break;
567
147
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
67.3k
        if (digit >= base) {
571
392
            break;
572
392
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
66.9k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
12.6k
            *result = PARSE_OVERFLOW;
577
12.6k
            return static_cast<T>(negative ? -max_val : max_val);
578
12.6k
        }
579
54.2k
        val = val * base + digit;
580
54.2k
    }
581
13.6k
    *result = PARSE_SUCCESS;
582
13.6k
    return static_cast<T>(negative ? -val : val);
583
26.4k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
527
490
                                       ParseResult* result) {
528
490
    using UnsignedT = MakeUnsignedT<T>;
529
490
    UnsignedT val = 0;
530
490
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
490
    bool negative = false;
532
490
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
490
    int i = 0;
537
490
    switch (*s) {
538
196
    case '-':
539
196
        negative = true;
540
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
196
        [[fallthrough]];
542
245
    case '+':
543
245
        i = 1;
544
490
    }
545
546
490
    const T max_div_base = max_val / base;
547
490
    const T max_mod_base = max_val % base;
548
549
490
    int first = i;
550
2.10k
    for (; i < len; ++i) {
551
1.71k
        T digit;
552
1.71k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
1.61k
            digit = s[i] - '0';
554
1.61k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
98
            digit = (s[i] - 'a' + 10);
556
98
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
0
            digit = (s[i] - 'A' + 10);
558
0
        } else {
559
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
0
                *result = PARSE_FAILURE;
563
0
                return 0;
564
0
            }
565
            // skip trailing whitespace.
566
0
            break;
567
0
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
1.71k
        if (digit >= base) {
571
0
            break;
572
0
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
1.71k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
98
            *result = PARSE_OVERFLOW;
577
98
            return static_cast<T>(negative ? -max_val : max_val);
578
98
        }
579
1.61k
        val = val * base + digit;
580
1.61k
    }
581
392
    *result = PARSE_SUCCESS;
582
392
    return static_cast<T>(negative ? -val : val);
583
490
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
527
441
                                       ParseResult* result) {
528
441
    using UnsignedT = MakeUnsignedT<T>;
529
441
    UnsignedT val = 0;
530
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
441
    bool negative = false;
532
441
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
441
    int i = 0;
537
441
    switch (*s) {
538
147
    case '-':
539
147
        negative = true;
540
147
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
147
        [[fallthrough]];
542
245
    case '+':
543
245
        i = 1;
544
441
    }
545
546
441
    const T max_div_base = max_val / base;
547
441
    const T max_mod_base = max_val % base;
548
549
441
    int first = i;
550
3.03k
    for (; i < len; ++i) {
551
2.69k
        T digit;
552
2.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
2.69k
            digit = s[i] - '0';
554
2.69k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
0
            digit = (s[i] - 'a' + 10);
556
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
0
            digit = (s[i] - 'A' + 10);
558
0
        } else {
559
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
0
                *result = PARSE_FAILURE;
563
0
                return 0;
564
0
            }
565
            // skip trailing whitespace.
566
0
            break;
567
0
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
2.69k
        if (digit >= base) {
571
0
            break;
572
0
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
2.69k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
98
            *result = PARSE_OVERFLOW;
577
98
            return static_cast<T>(negative ? -max_val : max_val);
578
98
        }
579
2.59k
        val = val * base + digit;
580
2.59k
    }
581
343
    *result = PARSE_SUCCESS;
582
343
    return static_cast<T>(negative ? -val : val);
583
441
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
527
441
                                       ParseResult* result) {
528
441
    using UnsignedT = MakeUnsignedT<T>;
529
441
    UnsignedT val = 0;
530
441
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
441
    bool negative = false;
532
441
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
441
    int i = 0;
537
441
    switch (*s) {
538
196
    case '-':
539
196
        negative = true;
540
196
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
196
        [[fallthrough]];
542
245
    case '+':
543
245
        i = 1;
544
441
    }
545
546
441
    const T max_div_base = max_val / base;
547
441
    const T max_mod_base = max_val % base;
548
549
441
    int first = i;
550
5.09k
    for (; i < len; ++i) {
551
4.75k
        T digit;
552
4.75k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
4.75k
            digit = s[i] - '0';
554
4.75k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
0
            digit = (s[i] - 'a' + 10);
556
0
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
0
            digit = (s[i] - 'A' + 10);
558
0
        } else {
559
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
0
                *result = PARSE_FAILURE;
563
0
                return 0;
564
0
            }
565
            // skip trailing whitespace.
566
0
            break;
567
0
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
4.75k
        if (digit >= base) {
571
0
            break;
572
0
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
4.75k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
98
            *result = PARSE_OVERFLOW;
577
98
            return static_cast<T>(negative ? -max_val : max_val);
578
98
        }
579
4.65k
        val = val * base + digit;
580
4.65k
    }
581
343
    *result = PARSE_SUCCESS;
582
343
    return static_cast<T>(negative ? -val : val);
583
441
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
527
1
                                       ParseResult* result) {
528
1
    using UnsignedT = MakeUnsignedT<T>;
529
1
    UnsignedT val = 0;
530
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
531
1
    bool negative = false;
532
1
    if (UNLIKELY(len <= 0)) {
533
0
        *result = PARSE_FAILURE;
534
0
        return 0;
535
0
    }
536
1
    int i = 0;
537
1
    switch (*s) {
538
0
    case '-':
539
0
        negative = true;
540
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
541
0
        [[fallthrough]];
542
0
    case '+':
543
0
        i = 1;
544
1
    }
545
546
1
    const T max_div_base = max_val / base;
547
1
    const T max_mod_base = max_val % base;
548
549
1
    int first = i;
550
3
    for (; i < len; ++i) {
551
2
        T digit;
552
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
553
0
            digit = s[i] - '0';
554
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
555
2
            digit = (s[i] - 'a' + 10);
556
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
557
0
            digit = (s[i] - 'A' + 10);
558
0
        } else {
559
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
560
                // Reject the string because either the first char was not an alpha/digit,
561
                // or the remaining chars are not all whitespace
562
0
                *result = PARSE_FAILURE;
563
0
                return 0;
564
0
            }
565
            // skip trailing whitespace.
566
0
            break;
567
0
        }
568
569
        // Bail, if we encounter a digit that is not available in base.
570
2
        if (digit >= base) {
571
0
            break;
572
0
        }
573
574
        // This is a tricky check to see if adding this digit will cause an overflow.
575
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
576
0
            *result = PARSE_OVERFLOW;
577
0
            return static_cast<T>(negative ? -max_val : max_val);
578
0
        }
579
2
        val = val * base + digit;
580
2
    }
581
1
    *result = PARSE_SUCCESS;
582
1
    return static_cast<T>(negative ? -val : val);
583
1
}
584
585
template <typename T, bool enable_strict_mode>
586
245k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
245k
    T val = 0;
588
245k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
245k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
243k
        val = s[0] - '0';
595
243k
    } else {
596
1.70k
        *result = PARSE_FAILURE;
597
1.70k
        return 0;
598
1.70k
    }
599
389k
    for (int i = 1; i < len; ++i) {
600
148k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
145k
            T digit = s[i] - '0';
602
145k
            val = val * 10 + digit;
603
145k
        } else {
604
2.72k
            if constexpr (enable_strict_mode) {
605
860
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
860
                    *result = PARSE_FAILURE;
607
860
                    return 0;
608
860
                }
609
1.86k
            } else {
610
1.86k
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
1.86k
                              !is_float_suffix(s + i, len - i)))) {
612
328
                    *result = PARSE_FAILURE;
613
328
                    return 0;
614
328
                }
615
1.86k
            }
616
1.53k
            *result = PARSE_SUCCESS;
617
2.72k
            return val;
618
2.72k
        }
619
148k
    }
620
240k
    *result = PARSE_SUCCESS;
621
240k
    return val;
622
243k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
41.2k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
41.2k
    T val = 0;
588
41.2k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
41.2k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
41.0k
        val = s[0] - '0';
595
41.0k
    } else {
596
119
        *result = PARSE_FAILURE;
597
119
        return 0;
598
119
    }
599
57.7k
    for (int i = 1; i < len; ++i) {
600
16.9k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
16.6k
            T digit = s[i] - '0';
602
16.6k
            val = val * 10 + digit;
603
16.6k
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
289
            } else {
610
289
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
289
                              !is_float_suffix(s + i, len - i)))) {
612
65
                    *result = PARSE_FAILURE;
613
65
                    return 0;
614
65
                }
615
289
            }
616
224
            *result = PARSE_SUCCESS;
617
289
            return val;
618
289
        }
619
16.9k
    }
620
40.7k
    *result = PARSE_SUCCESS;
621
40.7k
    return val;
622
41.0k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
68.7k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
68.7k
    T val = 0;
588
68.7k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
68.7k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
68.6k
        val = s[0] - '0';
595
68.6k
    } else {
596
127
        *result = PARSE_FAILURE;
597
127
        return 0;
598
127
    }
599
101k
    for (int i = 1; i < len; ++i) {
600
33.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
33.0k
            T digit = s[i] - '0';
602
33.0k
            val = val * 10 + digit;
603
33.0k
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
1
            } else {
610
1
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
1
                              !is_float_suffix(s + i, len - i)))) {
612
1
                    *result = PARSE_FAILURE;
613
1
                    return 0;
614
1
                }
615
1
            }
616
0
            *result = PARSE_SUCCESS;
617
1
            return val;
618
1
        }
619
33.0k
    }
620
68.6k
    *result = PARSE_SUCCESS;
621
68.6k
    return val;
622
68.6k
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
50.6k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
50.6k
    T val = 0;
588
50.6k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
50.6k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
50.3k
        val = s[0] - '0';
595
50.3k
    } else {
596
250
        *result = PARSE_FAILURE;
597
250
        return 0;
598
250
    }
599
73.4k
    for (int i = 1; i < len; ++i) {
600
23.9k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
23.0k
            T digit = s[i] - '0';
602
23.0k
            val = val * 10 + digit;
603
23.0k
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
924
            } else {
610
924
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
924
                              !is_float_suffix(s + i, len - i)))) {
612
63
                    *result = PARSE_FAILURE;
613
63
                    return 0;
614
63
                }
615
924
            }
616
861
            *result = PARSE_SUCCESS;
617
924
            return val;
618
924
        }
619
23.9k
    }
620
49.4k
    *result = PARSE_SUCCESS;
621
49.4k
    return val;
622
50.3k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
51.2k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
51.2k
    T val = 0;
588
51.2k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
51.2k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
50.3k
        val = s[0] - '0';
595
50.3k
    } else {
596
831
        *result = PARSE_FAILURE;
597
831
        return 0;
598
831
    }
599
86.0k
    for (int i = 1; i < len; ++i) {
600
35.9k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
35.6k
            T digit = s[i] - '0';
602
35.6k
            val = val * 10 + digit;
603
35.6k
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
339
            } else {
610
339
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
339
                              !is_float_suffix(s + i, len - i)))) {
612
115
                    *result = PARSE_FAILURE;
613
115
                    return 0;
614
115
                }
615
339
            }
616
224
            *result = PARSE_SUCCESS;
617
339
            return val;
618
339
        }
619
35.9k
    }
620
50.0k
    *result = PARSE_SUCCESS;
621
50.0k
    return val;
622
50.3k
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
31.8k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
31.8k
    T val = 0;
588
31.8k
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
31.8k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
31.5k
        val = s[0] - '0';
595
31.5k
    } else {
596
258
        *result = PARSE_FAILURE;
597
258
        return 0;
598
258
    }
599
66.2k
    for (int i = 1; i < len; ++i) {
600
35.0k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
34.7k
            T digit = s[i] - '0';
602
34.7k
            val = val * 10 + digit;
603
34.7k
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
309
            } else {
610
309
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
309
                              !is_float_suffix(s + i, len - i)))) {
612
84
                    *result = PARSE_FAILURE;
613
84
                    return 0;
614
84
                }
615
309
            }
616
225
            *result = PARSE_SUCCESS;
617
309
            return val;
618
309
        }
619
35.0k
    }
620
31.2k
    *result = PARSE_SUCCESS;
621
31.2k
    return val;
622
31.5k
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
4
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
4
    T val = 0;
588
4
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
4
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
4
        val = s[0] - '0';
595
4
    } else {
596
0
        *result = PARSE_FAILURE;
597
0
        return 0;
598
0
    }
599
4
    for (int i = 1; i < len; ++i) {
600
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
0
            T digit = s[i] - '0';
602
0
            val = val * 10 + digit;
603
0
        } else {
604
            if constexpr (enable_strict_mode) {
605
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
                    *result = PARSE_FAILURE;
607
                    return 0;
608
                }
609
0
            } else {
610
0
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
0
                              !is_float_suffix(s + i, len - i)))) {
612
0
                    *result = PARSE_FAILURE;
613
0
                    return 0;
614
0
                }
615
0
            }
616
0
            *result = PARSE_SUCCESS;
617
0
            return val;
618
0
        }
619
0
    }
620
4
    *result = PARSE_SUCCESS;
621
4
    return val;
622
4
}
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
51
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
51
    T val = 0;
588
51
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
51
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
41
        val = s[0] - '0';
595
41
    } else {
596
10
        *result = PARSE_FAILURE;
597
10
        return 0;
598
10
    }
599
41
    for (int i = 1; i < len; ++i) {
600
1
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
0
            T digit = s[i] - '0';
602
0
            val = val * 10 + digit;
603
1
        } else {
604
1
            if constexpr (enable_strict_mode) {
605
1
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
1
                    *result = PARSE_FAILURE;
607
1
                    return 0;
608
1
                }
609
            } else {
610
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
                              !is_float_suffix(s + i, len - i)))) {
612
                    *result = PARSE_FAILURE;
613
                    return 0;
614
                }
615
            }
616
0
            *result = PARSE_SUCCESS;
617
1
            return val;
618
1
        }
619
1
    }
620
40
    *result = PARSE_SUCCESS;
621
40
    return val;
622
41
}
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
203
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
203
    T val = 0;
588
203
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
203
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
187
        val = s[0] - '0';
595
187
    } else {
596
16
        *result = PARSE_FAILURE;
597
16
        return 0;
598
16
    }
599
339
    for (int i = 1; i < len; ++i) {
600
243
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
152
            T digit = s[i] - '0';
602
152
            val = val * 10 + digit;
603
152
        } else {
604
91
            if constexpr (enable_strict_mode) {
605
91
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
91
                    *result = PARSE_FAILURE;
607
91
                    return 0;
608
91
                }
609
            } else {
610
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
                              !is_float_suffix(s + i, len - i)))) {
612
                    *result = PARSE_FAILURE;
613
                    return 0;
614
                }
615
            }
616
0
            *result = PARSE_SUCCESS;
617
91
            return val;
618
91
        }
619
243
    }
620
96
    *result = PARSE_SUCCESS;
621
96
    return val;
622
187
}
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
531
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
531
    T val = 0;
588
531
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
531
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
500
        val = s[0] - '0';
595
500
    } else {
596
31
        *result = PARSE_FAILURE;
597
31
        return 0;
598
31
    }
599
1.40k
    for (int i = 1; i < len; ++i) {
600
1.16k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
904
            T digit = s[i] - '0';
602
904
            val = val * 10 + digit;
603
904
        } else {
604
256
            if constexpr (enable_strict_mode) {
605
256
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
256
                    *result = PARSE_FAILURE;
607
256
                    return 0;
608
256
                }
609
            } else {
610
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
                              !is_float_suffix(s + i, len - i)))) {
612
                    *result = PARSE_FAILURE;
613
                    return 0;
614
                }
615
            }
616
0
            *result = PARSE_SUCCESS;
617
256
            return val;
618
256
        }
619
1.16k
    }
620
244
    *result = PARSE_SUCCESS;
621
244
    return val;
622
500
}
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
400
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
400
    T val = 0;
588
400
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
400
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
368
        val = s[0] - '0';
595
368
    } else {
596
32
        *result = PARSE_FAILURE;
597
32
        return 0;
598
32
    }
599
1.09k
    for (int i = 1; i < len; ++i) {
600
981
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
725
            T digit = s[i] - '0';
602
725
            val = val * 10 + digit;
603
725
        } else {
604
256
            if constexpr (enable_strict_mode) {
605
256
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
256
                    *result = PARSE_FAILURE;
607
256
                    return 0;
608
256
                }
609
            } else {
610
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
                              !is_float_suffix(s + i, len - i)))) {
612
                    *result = PARSE_FAILURE;
613
                    return 0;
614
                }
615
            }
616
0
            *result = PARSE_SUCCESS;
617
256
            return val;
618
256
        }
619
981
    }
620
112
    *result = PARSE_SUCCESS;
621
112
    return val;
622
368
}
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE
Line
Count
Source
586
401
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
587
401
    T val = 0;
588
401
    if (UNLIKELY(len == 0)) {
589
0
        *result = PARSE_SUCCESS;
590
0
        return val;
591
0
    }
592
    // Factor out the first char for error handling speeds up the loop.
593
401
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
594
368
        val = s[0] - '0';
595
368
    } else {
596
33
        *result = PARSE_FAILURE;
597
33
        return 0;
598
33
    }
599
1.09k
    for (int i = 1; i < len; ++i) {
600
981
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
601
725
            T digit = s[i] - '0';
602
725
            val = val * 10 + digit;
603
725
        } else {
604
256
            if constexpr (enable_strict_mode) {
605
256
                if (UNLIKELY(!is_all_whitespace(s + i, len - i))) {
606
256
                    *result = PARSE_FAILURE;
607
256
                    return 0;
608
256
                }
609
            } else {
610
                if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
611
                              !is_float_suffix(s + i, len - i)))) {
612
                    *result = PARSE_FAILURE;
613
                    return 0;
614
                }
615
            }
616
0
            *result = PARSE_SUCCESS;
617
256
            return val;
618
256
        }
619
981
    }
620
112
    *result = PARSE_SUCCESS;
621
112
    return val;
622
368
}
623
624
// at least the first char(if any) must be a digit.
625
template <typename T>
626
T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len,
627
59
                                                  ParseResult* result) {
628
59
    T val = 0;
629
59
    if (max_len == 0) [[unlikely]] {
630
10
        *result = PARSE_SUCCESS;
631
10
        return val;
632
10
    }
633
    // Factor out the first char for error handling speeds up the loop.
634
49
    if (is_numeric_ascii(s[0])) [[likely]] {
635
49
        val = s[0] - '0';
636
49
    } else {
637
0
        *result = PARSE_FAILURE;
638
0
        return 0;
639
0
    }
640
215
    for (int i = 1; i < max_len; ++i) {
641
166
        if (is_numeric_ascii(s[i])) [[likely]] {
642
166
            T digit = s[i] - '0';
643
166
            val = val * 10 + digit;
644
166
        } else {
645
            // 123abc, return 123
646
0
            *result = PARSE_SUCCESS;
647
0
            return val;
648
0
        }
649
166
    }
650
49
    *result = PARSE_SUCCESS;
651
49
    return val;
652
49
}
653
654
template <typename T>
655
153k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
656
153k
    int i = 0;
657
    // skip leading spaces
658
153k
    for (; i < len; ++i) {
659
153k
        if (!is_whitespace_ascii(s[i])) {
660
153k
            break;
661
153k
        }
662
153k
    }
663
664
    // skip back spaces
665
153k
    int j = len - 1;
666
153k
    for (; j >= i; j--) {
667
153k
        if (!is_whitespace_ascii(s[j])) {
668
153k
            break;
669
153k
        }
670
153k
    }
671
672
    // skip leading '+', from_chars can handle '-'
673
153k
    if (i < len && s[i] == '+') {
674
7.08k
        i++;
675
        // ++ or +- are not valid, but the first + is already skipped,
676
        // if don't check here, from_chars will succeed.
677
        //
678
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
679
        // which may avoid this extra check here.
680
        // e.g.:
681
        // fast_float::chars_format format =
682
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
683
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
684
7.08k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
685
20
            *result = PARSE_FAILURE;
686
20
            return 0;
687
20
        }
688
7.08k
    }
689
153k
    if (UNLIKELY(i > j)) {
690
32
        *result = PARSE_FAILURE;
691
32
        return 0;
692
32
    }
693
694
    // Use double here to not lose precision while accumulating the result
695
153k
    double val = 0;
696
153k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
697
698
153k
    if (res.ptr == s + j + 1) {
699
148k
        *result = PARSE_SUCCESS;
700
148k
        return val;
701
148k
    } else {
702
4.96k
        *result = PARSE_FAILURE;
703
4.96k
    }
704
4.96k
    return 0;
705
153k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
655
87.7k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
656
87.7k
    int i = 0;
657
    // skip leading spaces
658
87.7k
    for (; i < len; ++i) {
659
87.7k
        if (!is_whitespace_ascii(s[i])) {
660
87.7k
            break;
661
87.7k
        }
662
87.7k
    }
663
664
    // skip back spaces
665
87.7k
    int j = len - 1;
666
87.7k
    for (; j >= i; j--) {
667
87.7k
        if (!is_whitespace_ascii(s[j])) {
668
87.7k
            break;
669
87.7k
        }
670
87.7k
    }
671
672
    // skip leading '+', from_chars can handle '-'
673
87.7k
    if (i < len && s[i] == '+') {
674
3.54k
        i++;
675
        // ++ or +- are not valid, but the first + is already skipped,
676
        // if don't check here, from_chars will succeed.
677
        //
678
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
679
        // which may avoid this extra check here.
680
        // e.g.:
681
        // fast_float::chars_format format =
682
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
683
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
684
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
685
10
            *result = PARSE_FAILURE;
686
10
            return 0;
687
10
        }
688
3.54k
    }
689
87.7k
    if (UNLIKELY(i > j)) {
690
18
        *result = PARSE_FAILURE;
691
18
        return 0;
692
18
    }
693
694
    // Use double here to not lose precision while accumulating the result
695
87.7k
    double val = 0;
696
87.7k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
697
698
87.7k
    if (res.ptr == s + j + 1) {
699
85.0k
        *result = PARSE_SUCCESS;
700
85.0k
        return val;
701
85.0k
    } else {
702
2.67k
        *result = PARSE_FAILURE;
703
2.67k
    }
704
2.67k
    return 0;
705
87.7k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
655
65.3k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
656
65.3k
    int i = 0;
657
    // skip leading spaces
658
65.3k
    for (; i < len; ++i) {
659
65.3k
        if (!is_whitespace_ascii(s[i])) {
660
65.3k
            break;
661
65.3k
        }
662
65.3k
    }
663
664
    // skip back spaces
665
65.3k
    int j = len - 1;
666
65.3k
    for (; j >= i; j--) {
667
65.3k
        if (!is_whitespace_ascii(s[j])) {
668
65.3k
            break;
669
65.3k
        }
670
65.3k
    }
671
672
    // skip leading '+', from_chars can handle '-'
673
65.3k
    if (i < len && s[i] == '+') {
674
3.54k
        i++;
675
        // ++ or +- are not valid, but the first + is already skipped,
676
        // if don't check here, from_chars will succeed.
677
        //
678
        // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus'
679
        // which may avoid this extra check here.
680
        // e.g.:
681
        // fast_float::chars_format format =
682
        //         fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus;
683
        // auto res = fast_float::from_chars(s + i, s + j + 1, val, format);
684
3.54k
        if (i < len && (s[i] == '+' || s[i] == '-')) {
685
10
            *result = PARSE_FAILURE;
686
10
            return 0;
687
10
        }
688
3.54k
    }
689
65.3k
    if (UNLIKELY(i > j)) {
690
14
        *result = PARSE_FAILURE;
691
14
        return 0;
692
14
    }
693
694
    // Use double here to not lose precision while accumulating the result
695
65.3k
    double val = 0;
696
65.3k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
697
698
65.3k
    if (res.ptr == s + j + 1) {
699
63.0k
        *result = PARSE_SUCCESS;
700
63.0k
        return val;
701
63.0k
    } else {
702
2.28k
        *result = PARSE_FAILURE;
703
2.28k
    }
704
2.28k
    return 0;
705
65.3k
}
706
707
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
708
11.7k
                                                  ParseResult* result) {
709
11.7k
    *result = PARSE_SUCCESS;
710
711
11.7k
    if (len == 1) {
712
2.75k
        if (s[0] == '1' || s[0] == 't' || s[0] == 'T') {
713
343
            return true;
714
343
        }
715
2.41k
        if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') {
716
966
            return false;
717
966
        }
718
1.44k
        *result = PARSE_FAILURE;
719
1.44k
        return false;
720
2.41k
    }
721
722
9.00k
    if (len == 2) {
723
1.00k
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
724
10
            return true;
725
10
        }
726
994
        if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) {
727
9
            return false;
728
9
        }
729
994
    }
730
731
8.98k
    if (len == 3) {
732
40
        if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') &&
733
40
            (s[2] == 's' || s[2] == 'S')) {
734
10
            return true;
735
10
        }
736
30
        if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') &&
737
30
            (s[2] == 'f' || s[2] == 'F')) {
738
9
            return false;
739
9
        }
740
30
    }
741
742
8.96k
    if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
743
8.96k
        (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) {
744
3.49k
        return true;
745
3.49k
    }
746
747
5.46k
    if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
748
5.46k
        (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
749
5.46k
        (s[4] == 'e' || s[4] == 'E')) {
750
3.54k
        return false;
751
3.54k
    }
752
753
    // No valid boolean value found
754
1.92k
    *result = PARSE_FAILURE;
755
1.92k
    return false;
756
5.46k
}
757
758
/*
759
template <PrimitiveType P, typename T, typename DecimalType>
760
T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision,
761
                                  int type_scale, ParseResult* result) {
762
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
763
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
764
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
765
                  "wide::Int256.");
766
    // Special cases:
767
    //   1) '' == Fail, an empty string fails to parse.
768
    //   2) '   #   ' == #, leading and trailing white space is ignored.
769
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
770
    //   4) '#.' == '#', a trailing dot is ignored.
771
772
    // Ignore leading and trailing spaces.
773
    while (len > 0 && is_whitespace(*s)) {
774
        ++s;
775
        --len;
776
    }
777
    while (len > 0 && is_whitespace(s[len - 1])) {
778
        --len;
779
    }
780
781
    bool is_negative = false;
782
    if (len > 0) {
783
        switch (*s) {
784
        case '-':
785
            is_negative = true;
786
            [[fallthrough]];
787
        case '+':
788
            ++s;
789
            --len;
790
        }
791
    }
792
793
    // Ignore leading zeros.
794
    bool found_value = false;
795
    while (len > 0 && UNLIKELY(*s == '0')) {
796
        found_value = true;
797
        ++s;
798
        --len;
799
    }
800
801
    // Ignore leading zeros even after a dot. This allows for differentiating between
802
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
803
    // overflow.
804
    int scale = 0;
805
    int found_dot = 0;
806
    if (len > 0 && *s == '.') {
807
        found_dot = 1;
808
        ++s;
809
        --len;
810
        while (len > 0 && UNLIKELY(*s == '0')) {
811
            found_value = true;
812
            ++scale;
813
            ++s;
814
            --len;
815
        }
816
    }
817
818
    int precision = 0;
819
    int max_digit = type_precision - type_scale;
820
    int cur_digit = 0;
821
    bool found_exponent = false;
822
    int8_t exponent = 0;
823
    T value = 0;
824
    bool has_round = false;
825
    for (int i = 0; i < len; ++i) {
826
        const char& c = s[i];
827
        if (LIKELY('0' <= c && c <= '9')) {
828
            found_value = true;
829
            // Ignore digits once the type's precision limit is reached. This avoids
830
            // overflowing the underlying storage while handling a string like
831
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
832
            // an exponent will be made later.
833
            if (LIKELY(type_precision > precision) && !has_round) {
834
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
835
                ++precision;
836
                scale += found_dot;
837
                cur_digit = precision - scale;
838
            } else if (!found_dot && max_digit < (precision - scale)) {
839
                *result = StringParser::PARSE_OVERFLOW;
840
                value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
841
                                    : vectorized::max_decimal_value<P>(type_precision);
842
                return value;
843
            } else if (found_dot && scale >= type_scale && !has_round) {
844
                // make rounding cases
845
                if (c > '4') {
846
                    value += 1;
847
                }
848
                has_round = true;
849
                continue;
850
            } else if (!found_dot) {
851
                ++cur_digit;
852
            }
853
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
854
        } else if (c == '.' && LIKELY(!found_dot)) {
855
            found_dot = 1;
856
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
857
            found_exponent = true;
858
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
859
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
860
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
861
                    *result = StringParser::PARSE_UNDERFLOW;
862
                }
863
                return 0;
864
            }
865
            break;
866
        } else {
867
            if (value == 0) {
868
                *result = StringParser::PARSE_FAILURE;
869
                return 0;
870
            }
871
            // here to handle
872
            *result = StringParser::PARSE_SUCCESS;
873
            if (type_scale >= scale) {
874
                value *= get_scale_multiplier<T>(type_scale - scale);
875
                // here meet non-valid character, should return the value, keep going to meet
876
                // the E/e character because we make right user-given type_precision
877
                // not max number type_precision
878
                if (!is_numeric_ascii(c)) {
879
                    if (cur_digit > type_precision) {
880
                        *result = StringParser::PARSE_OVERFLOW;
881
                        value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
882
                                            : vectorized::max_decimal_value<P>(type_precision);
883
                        return value;
884
                    }
885
                    return is_negative ? T(-value) : T(value);
886
                }
887
            }
888
889
            return is_negative ? T(-value) : T(value);
890
        }
891
    }
892
893
    // Find the number of truncated digits before adjusting the precision for an exponent.
894
    if (exponent > scale) {
895
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
896
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
897
        precision += exponent - scale;
898
899
        value *= get_scale_multiplier<T>(exponent - scale);
900
        scale = 0;
901
    } else {
902
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
903
        //     the precision must also be set to 4 but that will be done below for the
904
        //     non-exponent case anyways.
905
        scale -= exponent;
906
    }
907
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
908
    //     were ignored during previous parsing.
909
    if (scale > precision) {
910
        precision = scale;
911
    }
912
913
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
914
    // than just letting the function run out.
915
    *result = StringParser::PARSE_SUCCESS;
916
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
917
        *result = StringParser::PARSE_OVERFLOW;
918
        if constexpr (TYPE_DECIMALV2 != P) {
919
            // decimalv3 overflow will return max min value for type precision
920
            value = is_negative ? vectorized::min_decimal_value<P>(type_precision)
921
                                : vectorized::max_decimal_value<P>(type_precision);
922
            return value;
923
        }
924
    } else if (UNLIKELY(scale > type_scale)) {
925
        *result = StringParser::PARSE_UNDERFLOW;
926
        int shift = scale - type_scale;
927
        T divisor = get_scale_multiplier<T>(shift);
928
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
929
            value = 0;
930
        } else {
931
            T remainder = value % divisor;
932
            value /= divisor;
933
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
934
                value += 1;
935
            }
936
        }
937
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
938
    } else if (UNLIKELY(!found_value && !found_dot)) {
939
        *result = StringParser::PARSE_FAILURE;
940
    }
941
942
    if (type_scale > scale) {
943
        value *= get_scale_multiplier<T>(type_scale - scale);
944
    }
945
946
    return is_negative ? T(-value) : T(value);
947
}
948
*/
949
950
} // end namespace doris