/root/doris/be/src/util/string_parser.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | #include <sys/types.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <cstdlib> |
30 | | // IWYU pragma: no_include <bits/std_abs.h> |
31 | | #include <cmath> // IWYU pragma: keep |
32 | | #include <cstdint> |
33 | | #include <limits> |
34 | | #include <map> |
35 | | #include <string> |
36 | | #include <type_traits> |
37 | | #include <utility> |
38 | | |
39 | | #include "common/compiler_util.h" // IWYU pragma: keep |
40 | | #include "common/status.h" |
41 | | #include "runtime/large_int_value.h" |
42 | | #include "runtime/primitive_type.h" |
43 | | #include "vec/common/int_exp.h" |
44 | | #include "vec/common/string_utils/string_utils.h" |
45 | | #include "vec/core/extended_types.h" |
46 | | #include "vec/data_types/number_traits.h" |
47 | | |
48 | | namespace doris { |
49 | | #include "common/compile_check_avoid_begin.h" |
50 | | namespace vectorized { |
51 | | template <DecimalNativeTypeConcept T> |
52 | | struct Decimal; |
53 | | } // namespace vectorized |
54 | | |
55 | | // they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not. |
56 | | #ifndef SET_PARAMS_RET_FALSE_IFN |
57 | | #define SET_PARAMS_RET_FALSE_IFN(stmt, ...) \ |
58 | 3.93M | do { \ |
59 | 3.93M | if (!(stmt)) [[unlikely]] { \ |
60 | 36.1k | if constexpr (IsStrict) { \ |
61 | 123 | params.status = Status::InvalidArgument(__VA_ARGS__); \ |
62 | 123 | } \ |
63 | 36.1k | return false; \ |
64 | 36.1k | } \ |
65 | 3.93M | } while (false) |
66 | | #endif |
67 | | |
68 | | #ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION |
69 | | #define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \ |
70 | 157 | do { \ |
71 | 157 | try { \ |
72 | 157 | { stmt; } \ |
73 | 157 | } catch (const doris::Exception& e) { \ |
74 | 15 | if constexpr (IsStrict) { \ |
75 | 5 | params.status = e.to_status(); \ |
76 | 5 | } \ |
77 | 15 | return false; \ |
78 | 15 | } \ |
79 | 157 | } while (false) |
80 | | #endif |
81 | | |
82 | | // skip leading and trailing ascii whitespaces, |
83 | | // return the pointer to the first non-whitespace char, |
84 | | // and update the len to the new length, which does not include |
85 | | // leading and trailing whitespaces |
86 | | template <typename T> |
87 | 544k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { |
88 | 1.01M | while (len > 0 && is_whitespace_ascii(*s)) { |
89 | 469k | ++s; |
90 | 469k | --len; |
91 | 469k | } |
92 | | |
93 | 1.01M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
94 | 465k | --len; |
95 | 465k | } |
96 | | |
97 | 544k | return s; |
98 | 544k | } _ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_ Line | Count | Source | 87 | 515k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 909k | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 393k | ++s; | 90 | 393k | --len; | 91 | 393k | } | 92 | | | 93 | 905k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 390k | --len; | 95 | 390k | } | 96 | | | 97 | 515k | return s; | 98 | 515k | } |
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_ Line | Count | Source | 87 | 1.37k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 4.90k | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 3.52k | ++s; | 90 | 3.52k | --len; | 91 | 3.52k | } | 92 | | | 93 | 4.90k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 3.52k | --len; | 95 | 3.52k | } | 96 | | | 97 | 1.37k | return s; | 98 | 1.37k | } |
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_ Line | Count | Source | 87 | 27.8k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 100k | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 72.4k | ++s; | 90 | 72.4k | --len; | 91 | 72.4k | } | 92 | | | 93 | 99.8k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 72.0k | --len; | 95 | 72.0k | } | 96 | | | 97 | 27.8k | return s; | 98 | 27.8k | } |
|
99 | | |
100 | | template <typename T> |
101 | 53.9k | inline const char* skip_leading_whitespace(const char* __restrict s, T& len) { |
102 | 157k | while (len > 0 && is_whitespace_ascii(*s)) { |
103 | 103k | ++s; |
104 | 103k | --len; |
105 | 103k | } |
106 | | |
107 | 53.9k | return s; |
108 | 53.9k | } |
109 | | |
110 | | // skip trailing ascii whitespaces, |
111 | | // return the pointer to the first char, |
112 | | // and update the len to the new length, which does not include |
113 | | // trailing whitespaces |
114 | | template <typename T> |
115 | 44.3k | inline const char* skip_trailing_whitespaces(const char* s, T& len) { |
116 | 160k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
117 | 115k | --len; |
118 | 115k | } |
119 | | |
120 | 44.3k | return s; |
121 | 44.3k | } |
122 | | |
123 | | template <bool (*Pred)(char)> |
124 | 436k | bool range_suite(const char* s, const char* end) { |
125 | 436k | return std::ranges::all_of(s, end, Pred); |
126 | 436k | } _ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_ Line | Count | Source | 124 | 434k | bool range_suite(const char* s, const char* end) { | 125 | 434k | return std::ranges::all_of(s, end, Pred); | 126 | 434k | } |
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_ Line | Count | Source | 124 | 2.28k | bool range_suite(const char* s, const char* end) { | 125 | 2.28k | return std::ranges::all_of(s, end, Pred); | 126 | 2.28k | } |
|
127 | | |
128 | | inline auto is_digit_range = range_suite<is_numeric_ascii>; |
129 | | inline auto is_space_range = range_suite<is_whitespace_ascii>; |
130 | | |
131 | | // combine in_bound and range_suite is ok. won't lead to duplicated calculation. |
132 | 462k | inline bool in_bound(const char* s, const char* end, size_t offset) { |
133 | 462k | if (s + offset >= end) [[unlikely]] { |
134 | 3.12k | return false; |
135 | 3.12k | } |
136 | 459k | return true; |
137 | 462k | } |
138 | | |
139 | | // LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more. |
140 | | // if need result, use StringRef{origin_s, s} outside |
141 | | template <int LEN, bool (*Pred)(char)> |
142 | 1.50M | bool skip_qualified_char(const char*& s, const char* end) { |
143 | 1.50M | if constexpr (LEN == 0) { |
144 | | // Consume any length of characters that match the predicate. |
145 | 1.19M | while (s != end && Pred(*s)) { |
146 | 692k | ++s; |
147 | 692k | } |
148 | 997k | } else if constexpr (LEN > 0) { |
149 | | // Consume exactly LEN characters that match the predicate. |
150 | 1.98M | for (int i = 0; i < LEN; ++i, ++s) { |
151 | 997k | if (s == end || !Pred(*s)) [[unlikely]] { |
152 | 10.8k | return false; |
153 | 10.8k | } |
154 | 997k | } |
155 | 997k | } else { // LEN < 0 |
156 | | // Consume at least -LEN characters that match the predicate. |
157 | 54 | int count = 0; |
158 | 360 | while (s != end && Pred(*s)) { |
159 | 306 | ++s; |
160 | 306 | ++count; |
161 | 306 | } |
162 | 54 | if (count < -LEN) [[unlikely]] { |
163 | 0 | return false; |
164 | 0 | } |
165 | 54 | } |
166 | 987k | return true; |
167 | 1.50M | } _ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 142 | 198k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | 198k | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | 201k | while (s != end && Pred(*s)) { | 146 | 3.03k | ++s; | 147 | 3.03k | } | 148 | | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | | for (int i = 0; i < LEN; ++i, ++s) { | 151 | | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | | return false; | 153 | | } | 154 | | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 198k | return true; | 167 | 198k | } |
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_ Line | Count | Source | 142 | 308k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | 308k | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | 997k | while (s != end && Pred(*s)) { | 146 | 689k | ++s; | 147 | 689k | } | 148 | | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | | for (int i = 0; i < LEN; ++i, ++s) { | 151 | | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | | return false; | 153 | | } | 154 | | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 308k | return true; | 167 | 308k | } |
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 142 | 54 | bool skip_qualified_char(const char*& s, const char* end) { | 143 | | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | | while (s != end && Pred(*s)) { | 146 | | ++s; | 147 | | } | 148 | | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | | for (int i = 0; i < LEN; ++i, ++s) { | 151 | | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | | return false; | 153 | | } | 154 | | } | 155 | 54 | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | 54 | int count = 0; | 158 | 360 | while (s != end && Pred(*s)) { | 159 | 306 | ++s; | 160 | 306 | ++count; | 161 | 306 | } | 162 | 54 | if (count < -LEN) [[unlikely]] { | 163 | 0 | return false; | 164 | 0 | } | 165 | 54 | } | 166 | 54 | return true; | 167 | 54 | } |
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_ _ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_ Line | Count | Source | 142 | 35.3k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | | while (s != end && Pred(*s)) { | 146 | | ++s; | 147 | | } | 148 | 35.3k | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | 59.9k | for (int i = 0; i < LEN; ++i, ++s) { | 151 | 35.3k | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | 10.6k | return false; | 153 | 10.6k | } | 154 | 35.3k | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 24.6k | return true; | 167 | 35.3k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_ Line | Count | Source | 142 | 176k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | | while (s != end && Pred(*s)) { | 146 | | ++s; | 147 | | } | 148 | 176k | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | 352k | for (int i = 0; i < LEN; ++i, ++s) { | 151 | 176k | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | 48 | return false; | 153 | 48 | } | 154 | 176k | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 176k | return true; | 167 | 176k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_ Line | Count | Source | 142 | 439k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | | while (s != end && Pred(*s)) { | 146 | | ++s; | 147 | | } | 148 | 439k | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | 878k | for (int i = 0; i < LEN; ++i, ++s) { | 151 | 439k | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | 42 | return false; | 153 | 42 | } | 154 | 439k | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 439k | return true; | 167 | 439k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_ Line | Count | Source | 142 | 347k | bool skip_qualified_char(const char*& s, const char* end) { | 143 | | if constexpr (LEN == 0) { | 144 | | // Consume any length of characters that match the predicate. | 145 | | while (s != end && Pred(*s)) { | 146 | | ++s; | 147 | | } | 148 | 347k | } else if constexpr (LEN > 0) { | 149 | | // Consume exactly LEN characters that match the predicate. | 150 | 694k | for (int i = 0; i < LEN; ++i, ++s) { | 151 | 347k | if (s == end || !Pred(*s)) [[unlikely]] { | 152 | 24 | return false; | 153 | 24 | } | 154 | 347k | } | 155 | | } else { // LEN < 0 | 156 | | // Consume at least -LEN characters that match the predicate. | 157 | | int count = 0; | 158 | | while (s != end && Pred(*s)) { | 159 | | ++s; | 160 | | ++count; | 161 | | } | 162 | | if (count < -LEN) [[unlikely]] { | 163 | | return false; | 164 | | } | 165 | | } | 166 | 347k | return true; | 167 | 347k | } |
|
168 | | |
169 | | inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>; |
170 | | inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>; |
171 | | inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>; |
172 | | inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>; |
173 | | inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>; |
174 | | |
175 | 176k | inline bool is_delimiter(char c) { |
176 | 176k | return c == ' ' || c == 'T' || c == ':'; |
177 | 176k | } |
178 | | inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>; |
179 | | |
180 | 664k | inline bool is_date_sep(char c) { |
181 | 664k | return c == '-' || c == '/'; |
182 | 664k | } |
183 | | inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>; |
184 | | |
185 | 347k | inline bool is_colon(char c) { |
186 | 347k | return c == ':'; |
187 | 347k | } |
188 | | inline auto consume_one_colon = skip_qualified_char<1, is_colon>; |
189 | | |
190 | | // only consume a string of digit, not include sign. |
191 | | // when has MAX_LEN > 0, do greedy match but at most MAX_LEN. |
192 | | // LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits. |
193 | | template <typename T, int LEN = 0, int MAX_LEN = -1> |
194 | 20 | bool consume_digit(const char*& s, const char* end, T& out) { |
195 | 20 | static_assert(LEN >= 0); |
196 | | if constexpr (MAX_LEN > 0) { |
197 | | out = 0; |
198 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { |
199 | | if (s == end || !is_numeric_ascii(*s)) { |
200 | | if (i < LEN) [[unlikely]] { |
201 | | return false; |
202 | | } |
203 | | break; // stop consuming if we have consumed enough digits. |
204 | | } |
205 | | out = out * 10 + (*s - '0'); |
206 | | } |
207 | | } else if constexpr (LEN == 0) { |
208 | | // Consume any length of digits. |
209 | | out = 0; |
210 | | while (s != end && is_numeric_ascii(*s)) { |
211 | | out = out * 10 + (*s - '0'); |
212 | | ++s; |
213 | | } |
214 | 20 | } else if constexpr (LEN > 0) { |
215 | | // Consume exactly LEN digits. |
216 | 20 | out = 0; |
217 | 85 | for (int i = 0; i < LEN; ++i, ++s) { |
218 | 65 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
219 | 0 | return false; |
220 | 0 | } |
221 | 65 | out = out * 10 + (*s - '0'); |
222 | 65 | } |
223 | 20 | } |
224 | 20 | return true; |
225 | 20 | } _ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_ Line | Count | Source | 194 | 15 | bool consume_digit(const char*& s, const char* end, T& out) { | 195 | 15 | static_assert(LEN >= 0); | 196 | | if constexpr (MAX_LEN > 0) { | 197 | | out = 0; | 198 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 199 | | if (s == end || !is_numeric_ascii(*s)) { | 200 | | if (i < LEN) [[unlikely]] { | 201 | | return false; | 202 | | } | 203 | | break; // stop consuming if we have consumed enough digits. | 204 | | } | 205 | | out = out * 10 + (*s - '0'); | 206 | | } | 207 | | } else if constexpr (LEN == 0) { | 208 | | // Consume any length of digits. | 209 | | out = 0; | 210 | | while (s != end && is_numeric_ascii(*s)) { | 211 | | out = out * 10 + (*s - '0'); | 212 | | ++s; | 213 | | } | 214 | 15 | } else if constexpr (LEN > 0) { | 215 | | // Consume exactly LEN digits. | 216 | 15 | out = 0; | 217 | 75 | for (int i = 0; i < LEN; ++i, ++s) { | 218 | 60 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 219 | 0 | return false; | 220 | 0 | } | 221 | 60 | out = out * 10 + (*s - '0'); | 222 | 60 | } | 223 | 15 | } | 224 | 15 | return true; | 225 | 15 | } |
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_ Line | Count | Source | 194 | 5 | bool consume_digit(const char*& s, const char* end, T& out) { | 195 | 5 | static_assert(LEN >= 0); | 196 | | if constexpr (MAX_LEN > 0) { | 197 | | out = 0; | 198 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 199 | | if (s == end || !is_numeric_ascii(*s)) { | 200 | | if (i < LEN) [[unlikely]] { | 201 | | return false; | 202 | | } | 203 | | break; // stop consuming if we have consumed enough digits. | 204 | | } | 205 | | out = out * 10 + (*s - '0'); | 206 | | } | 207 | | } else if constexpr (LEN == 0) { | 208 | | // Consume any length of digits. | 209 | | out = 0; | 210 | | while (s != end && is_numeric_ascii(*s)) { | 211 | | out = out * 10 + (*s - '0'); | 212 | | ++s; | 213 | | } | 214 | 5 | } else if constexpr (LEN > 0) { | 215 | | // Consume exactly LEN digits. | 216 | 5 | out = 0; | 217 | 10 | for (int i = 0; i < LEN; ++i, ++s) { | 218 | 5 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 219 | 0 | return false; | 220 | 0 | } | 221 | 5 | out = out * 10 + (*s - '0'); | 222 | 5 | } | 223 | 5 | } | 224 | 5 | return true; | 225 | 5 | } |
|
226 | | |
227 | | // specialized version for 2 digits, which is used very often in date/time parsing. |
228 | | template <> |
229 | 518k | inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) { |
230 | 518k | out = 0; |
231 | 518k | if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1))) |
232 | 18.2k | [[unlikely]] { |
233 | 18.2k | return false; |
234 | 18.2k | } |
235 | 500k | out = (s[0] - '0') * 10 + (s[1] - '0'); |
236 | 500k | s += 2; // consume 2 digits |
237 | 500k | return true; |
238 | 518k | } |
239 | | |
240 | | // specialized version for 1 or 2 digits, which is used very often in date/time parsing. |
241 | | template <> |
242 | 987k | inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) { |
243 | 987k | out = 0; |
244 | 987k | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
245 | 480 | return false; |
246 | 986k | } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) { |
247 | | // consume 2 digits |
248 | 970k | out = (*s - '0') * 10 + (*(s + 1) - '0'); |
249 | 970k | s += 2; |
250 | 970k | } else { |
251 | | // consume 1 digit |
252 | 16.6k | out = *s - '0'; |
253 | 16.6k | ++s; |
254 | 16.6k | } |
255 | 986k | return true; |
256 | 987k | } |
257 | | |
258 | | template <bool (*Pred)(char)> |
259 | 148 | uint32_t count_valid_length(const char* s, const char* end) { |
260 | 148 | DCHECK(s <= end) << "s: " << s << ", end: " << end; |
261 | 148 | uint32_t count = 0; |
262 | 449 | while (s != end && Pred(*s)) { |
263 | 301 | ++count; |
264 | 301 | ++s; |
265 | 301 | } |
266 | 148 | return count; |
267 | 148 | } |
268 | | |
269 | | inline auto count_digits = count_valid_length<is_numeric_ascii>; |
270 | | |
271 | 136 | inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { |
272 | 136 | std::string result(6, '0'); |
273 | 136 | result[0] = sign; |
274 | 136 | result[1] = '0' + (hour_offset / 10); |
275 | 136 | result[2] = '0' + (hour_offset % 10); |
276 | 136 | result[3] = ':'; |
277 | 136 | result[4] = '0' + (minute_offset / 10); |
278 | 136 | result[5] = '0' + (minute_offset % 10); |
279 | 136 | DCHECK_EQ(result.size(), 6); |
280 | 136 | return result; |
281 | 136 | } |
282 | | |
283 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
284 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
285 | | // |
286 | | // Strings with leading and trailing whitespaces are accepted. |
287 | | // Branching is heavily optimized for the non-whitespace successful case. |
288 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
289 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
290 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
291 | | // |
292 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
293 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
294 | | // and inf/-inf for float types. |
295 | | // |
296 | | // Things we tried that did not work: |
297 | | // - lookup table for converting character to digit |
298 | | // Improvements (TODO): |
299 | | // - Validate input using _simd_compare_ranges |
300 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
301 | | class StringParser { |
302 | | public: |
303 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
304 | | |
305 | | template <typename T> |
306 | 447k | static T numeric_limits(bool negative) { |
307 | 447k | if constexpr (std::is_same_v<T, __int128>) { |
308 | 48.3k | return negative ? MIN_INT128 : MAX_INT128; |
309 | 399k | } else { |
310 | 399k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
311 | 399k | } |
312 | 447k | } _ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 306 | 48.3k | static T numeric_limits(bool negative) { | 307 | 48.3k | if constexpr (std::is_same_v<T, __int128>) { | 308 | 48.3k | return negative ? MIN_INT128 : MAX_INT128; | 309 | | } else { | 310 | | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | | } | 312 | 48.3k | } |
_ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 306 | 165k | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 165k | } else { | 310 | 165k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 165k | } | 312 | 165k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 306 | 76.9k | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 76.9k | } else { | 310 | 76.9k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 76.9k | } | 312 | 76.9k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 306 | 68.2k | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 68.2k | } else { | 310 | 68.2k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 68.2k | } | 312 | 68.2k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 306 | 87.9k | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 87.9k | } else { | 310 | 87.9k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 87.9k | } | 312 | 87.9k | } |
_ZN5doris12StringParser14numeric_limitsIjEET_b Line | Count | Source | 306 | 147 | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 147 | } else { | 310 | 147 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 147 | } | 312 | 147 | } |
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Line | Count | Source | 306 | 4 | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 4 | } else { | 310 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 4 | } | 312 | 4 | } |
_ZN5doris12StringParser14numeric_limitsIoEET_b Line | Count | Source | 306 | 4 | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 4 | } else { | 310 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 4 | } | 312 | 4 | } |
_ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 306 | 21 | static T numeric_limits(bool negative) { | 307 | | if constexpr (std::is_same_v<T, __int128>) { | 308 | | return negative ? MIN_INT128 : MAX_INT128; | 309 | 21 | } else { | 310 | 21 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 311 | 21 | } | 312 | 21 | } |
|
313 | | |
314 | | template <typename T> |
315 | 873k | static T get_scale_multiplier(int scale) { |
316 | 873k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
317 | 873k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
318 | 873k | "You can only instantiate as int32_t, int64_t, __int128."); |
319 | 873k | if constexpr (std::is_same_v<T, int32_t>) { |
320 | 133k | return common::exp10_i32(scale); |
321 | 182k | } else if constexpr (std::is_same_v<T, int64_t>) { |
322 | 182k | return common::exp10_i64(scale); |
323 | 237k | } else if constexpr (std::is_same_v<T, __int128>) { |
324 | 237k | return common::exp10_i128(scale); |
325 | 320k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
326 | 320k | return common::exp10_i256(scale); |
327 | 320k | } |
328 | 873k | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 315 | 133k | static T get_scale_multiplier(int scale) { | 316 | 133k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 317 | 133k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 318 | 133k | "You can only instantiate as int32_t, int64_t, __int128."); | 319 | 133k | if constexpr (std::is_same_v<T, int32_t>) { | 320 | 133k | return common::exp10_i32(scale); | 321 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 322 | | return common::exp10_i64(scale); | 323 | | } else if constexpr (std::is_same_v<T, __int128>) { | 324 | | return common::exp10_i128(scale); | 325 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 326 | | return common::exp10_i256(scale); | 327 | | } | 328 | 133k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 315 | 182k | static T get_scale_multiplier(int scale) { | 316 | 182k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 317 | 182k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 318 | 182k | "You can only instantiate as int32_t, int64_t, __int128."); | 319 | | if constexpr (std::is_same_v<T, int32_t>) { | 320 | | return common::exp10_i32(scale); | 321 | 182k | } else if constexpr (std::is_same_v<T, int64_t>) { | 322 | 182k | return common::exp10_i64(scale); | 323 | | } else if constexpr (std::is_same_v<T, __int128>) { | 324 | | return common::exp10_i128(scale); | 325 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 326 | | return common::exp10_i256(scale); | 327 | | } | 328 | 182k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 315 | 237k | static T get_scale_multiplier(int scale) { | 316 | 237k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 317 | 237k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 318 | 237k | "You can only instantiate as int32_t, int64_t, __int128."); | 319 | | if constexpr (std::is_same_v<T, int32_t>) { | 320 | | return common::exp10_i32(scale); | 321 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 322 | | return common::exp10_i64(scale); | 323 | 237k | } else if constexpr (std::is_same_v<T, __int128>) { | 324 | 237k | return common::exp10_i128(scale); | 325 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 326 | | return common::exp10_i256(scale); | 327 | | } | 328 | 237k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 315 | 320k | static T get_scale_multiplier(int scale) { | 316 | 320k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 317 | 320k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 318 | 320k | "You can only instantiate as int32_t, int64_t, __int128."); | 319 | | if constexpr (std::is_same_v<T, int32_t>) { | 320 | | return common::exp10_i32(scale); | 321 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 322 | | return common::exp10_i64(scale); | 323 | | } else if constexpr (std::is_same_v<T, __int128>) { | 324 | | return common::exp10_i128(scale); | 325 | 320k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 326 | 320k | return common::exp10_i256(scale); | 327 | 320k | } | 328 | 320k | } |
|
329 | | |
330 | | // This is considerably faster than glibc's implementation (25x). |
331 | | // Assumes s represents a decimal number. |
332 | | template <typename T, bool enable_strict_mode = false> |
333 | 354k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
334 | 354k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); |
335 | 354k | if (LIKELY(*result == PARSE_SUCCESS)) { |
336 | 300k | return ans; |
337 | 300k | } |
338 | 53.9k | s = skip_leading_whitespace(s, len); |
339 | 53.9k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); |
340 | 354k | } _ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 45.2k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 45.2k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 45.2k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 43.9k | return ans; | 337 | 43.9k | } | 338 | 1.33k | s = skip_leading_whitespace(s, len); | 339 | 1.33k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 45.2k | } |
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 95.4k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 95.4k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 95.4k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 66.2k | return ans; | 337 | 66.2k | } | 338 | 29.2k | s = skip_leading_whitespace(s, len); | 339 | 29.2k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 95.4k | } |
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 66.3k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 66.3k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 66.3k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 58.1k | return ans; | 337 | 58.1k | } | 338 | 8.12k | s = skip_leading_whitespace(s, len); | 339 | 8.12k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 66.3k | } |
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 61.3k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 61.3k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 61.3k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 55.0k | return ans; | 337 | 55.0k | } | 338 | 6.33k | s = skip_leading_whitespace(s, len); | 339 | 6.33k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 61.3k | } |
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 81.0k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 81.0k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 81.0k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 76.4k | return ans; | 337 | 76.4k | } | 338 | 4.50k | s = skip_leading_whitespace(s, len); | 339 | 4.50k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 81.0k | } |
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 1.00k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 1.00k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 1.00k | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 88 | return ans; | 337 | 88 | } | 338 | 912 | s = skip_leading_whitespace(s, len); | 339 | 912 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 1.00k | } |
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 984 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 984 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 984 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 88 | return ans; | 337 | 88 | } | 338 | 896 | s = skip_leading_whitespace(s, len); | 339 | 896 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 984 | } |
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 968 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 968 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 968 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 88 | return ans; | 337 | 88 | } | 338 | 880 | s = skip_leading_whitespace(s, len); | 339 | 880 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 968 | } |
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 961 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 961 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 961 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 94 | return ans; | 337 | 94 | } | 338 | 867 | s = skip_leading_whitespace(s, len); | 339 | 867 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 961 | } |
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 936 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 936 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 936 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 88 | return ans; | 337 | 88 | } | 338 | 848 | s = skip_leading_whitespace(s, len); | 339 | 848 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 936 | } |
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 4 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 4 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 4 | return ans; | 337 | 4 | } | 338 | 0 | s = skip_leading_whitespace(s, len); | 339 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 4 | } |
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 4 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 4 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 4 | return ans; | 337 | 4 | } | 338 | 0 | s = skip_leading_whitespace(s, len); | 339 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 4 | } |
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 20 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 20 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 335 | 20 | if (LIKELY(*result == PARSE_SUCCESS)) { | 336 | 20 | return ans; | 337 | 20 | } | 338 | 0 | s = skip_leading_whitespace(s, len); | 339 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 340 | 20 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE |
341 | | |
342 | | // This is considerably faster than glibc's implementation. |
343 | | // In the case of overflow, the max/min value for the data type will be returned. |
344 | | // Assumes s represents a decimal number. |
345 | | template <typename T> |
346 | 1.37k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
347 | 1.37k | s = skip_ascii_whitespaces(s, len); |
348 | 1.37k | return string_to_unsigned_int_internal<T>(s, len, result); |
349 | 1.37k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 346 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 347 | 343 | s = skip_ascii_whitespaces(s, len); | 348 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 349 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 346 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 347 | 343 | s = skip_ascii_whitespaces(s, len); | 348 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 349 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 346 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 347 | 343 | s = skip_ascii_whitespaces(s, len); | 348 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 349 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 346 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 347 | 343 | s = skip_ascii_whitespaces(s, len); | 348 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 349 | 343 | } |
|
350 | | |
351 | | // Convert a string s representing a number in given base into a decimal number. |
352 | | template <typename T> |
353 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
354 | 27.8k | ParseResult* result) { |
355 | 27.8k | s = skip_ascii_whitespaces(s, len); |
356 | 27.8k | return string_to_int_internal<T>(s, len, base, result); |
357 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 354 | 26.4k | ParseResult* result) { | 355 | 26.4k | s = skip_ascii_whitespaces(s, len); | 356 | 26.4k | return string_to_int_internal<T>(s, len, base, result); | 357 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 354 | 490 | ParseResult* result) { | 355 | 490 | s = skip_ascii_whitespaces(s, len); | 356 | 490 | return string_to_int_internal<T>(s, len, base, result); | 357 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 354 | 441 | ParseResult* result) { | 355 | 441 | s = skip_ascii_whitespaces(s, len); | 356 | 441 | return string_to_int_internal<T>(s, len, base, result); | 357 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 354 | 441 | ParseResult* result) { | 355 | 441 | s = skip_ascii_whitespaces(s, len); | 356 | 441 | return string_to_int_internal<T>(s, len, base, result); | 357 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 354 | 1 | ParseResult* result) { | 355 | 1 | s = skip_ascii_whitespaces(s, len); | 356 | 1 | return string_to_int_internal<T>(s, len, base, result); | 357 | 1 | } |
|
358 | | |
359 | | template <typename T> |
360 | 152k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
361 | 152k | s = skip_ascii_whitespaces(s, len); |
362 | 152k | return string_to_float_internal<T>(s, len, result); |
363 | 152k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 360 | 87.6k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 361 | 87.6k | s = skip_ascii_whitespaces(s, len); | 362 | 87.6k | return string_to_float_internal<T>(s, len, result); | 363 | 87.6k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 360 | 65.1k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 361 | 65.1k | s = skip_ascii_whitespaces(s, len); | 362 | 65.1k | return string_to_float_internal<T>(s, len, result); | 363 | 65.1k | } |
|
364 | | |
365 | | // Parses a string for 'true' or 'false', case insensitive. |
366 | 11.3k | static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) { |
367 | 11.3k | s = skip_ascii_whitespaces(s, len); |
368 | 11.3k | return string_to_bool_internal(s, len, result); |
369 | 11.3k | } |
370 | | |
371 | | template <PrimitiveType P> |
372 | | static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal( |
373 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
374 | | ParseResult* result); |
375 | | |
376 | | template <typename T> |
377 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
378 | | const T key_value_separator, |
379 | | std::map<std::string, std::string>* result) { |
380 | | int key_pos = 0; |
381 | | int key_end; |
382 | | int val_pos; |
383 | | int val_end; |
384 | | |
385 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
386 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
387 | | std::string::npos) { |
388 | | break; |
389 | | } |
390 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
391 | | val_end = base.size(); |
392 | | } |
393 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
394 | | base.substr(val_pos, val_end - val_pos))); |
395 | | key_pos = val_end; |
396 | | if (key_pos != std::string::npos) { |
397 | | ++key_pos; |
398 | | } |
399 | | } |
400 | | |
401 | | return Status::OK(); |
402 | | } |
403 | | |
404 | | // This is considerably faster than glibc's implementation. |
405 | | // In the case of overflow, the max/min value for the data type will be returned. |
406 | | // Assumes s represents a decimal number. |
407 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
408 | | template <typename T, bool enable_strict_mode = false> |
409 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
410 | | |
411 | | // This is considerably faster than glibc's implementation. |
412 | | // In the case of overflow, the max/min value for the data type will be returned. |
413 | | // Assumes s represents a decimal number. |
414 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
415 | | template <typename T> |
416 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
417 | | ParseResult* result); |
418 | | |
419 | | // Convert a string s representing a number in given base into a decimal number. |
420 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
421 | | template <typename T> |
422 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
423 | | ParseResult* result); |
424 | | |
425 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
426 | | // and the number is positive. |
427 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
428 | | template <typename T, bool enable_strict_mode = false> |
429 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
430 | | ParseResult* result); |
431 | | |
432 | | // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next |
433 | | // char is not a digit. |
434 | | template <typename T> |
435 | | static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
436 | | ParseResult* result); |
437 | | |
438 | | // This is considerably faster than glibc's implementation (>100x why???) |
439 | | // No special case handling needs to be done for overflows, the floating point spec |
440 | | // already does it and will cap the values to -inf/inf |
441 | | // To avoid inaccurate conversions this function falls back to strtod for |
442 | | // scientific notation. |
443 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
444 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
445 | | template <typename T> |
446 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
447 | | ParseResult* result); |
448 | | |
449 | | // parses a string for 'true' or 'false', case insensitive |
450 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
451 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
452 | | ParseResult* result); |
453 | | |
454 | | // Returns true if s only contains whitespace. |
455 | 3.54k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
456 | 6.44k | for (int i = 0; i < len; ++i) { |
457 | 6.00k | if (!LIKELY(is_whitespace_ascii(s[i]))) { |
458 | 3.10k | return false; |
459 | 3.10k | } |
460 | 6.00k | } |
461 | 440 | return true; |
462 | 3.54k | } |
463 | | |
464 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
465 | 3.65k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
466 | 3.65k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
467 | 3.65k | } |
468 | | |
469 | 2.67k | static inline bool is_all_digit(const char* __restrict s, int len) { |
470 | 5.57k | for (int i = 0; i < len; ++i) { |
471 | 3.05k | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
472 | 151 | return false; |
473 | 151 | } |
474 | 3.05k | } |
475 | 2.52k | return true; |
476 | 2.67k | } |
477 | | }; // end of class StringParser |
478 | | |
479 | | template <typename T, bool enable_strict_mode> |
480 | 408k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
481 | 408k | if (UNLIKELY(len <= 0)) { |
482 | 2.25k | *result = PARSE_FAILURE; |
483 | 2.25k | return 0; |
484 | 2.25k | } |
485 | | |
486 | 406k | using UnsignedT = MakeUnsignedT<T>; |
487 | 406k | UnsignedT val = 0; |
488 | 406k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
489 | 406k | bool negative = false; |
490 | 406k | int i = 0; |
491 | 406k | switch (*s) { |
492 | 102k | case '-': |
493 | 102k | negative = true; |
494 | 102k | max_val += 1; |
495 | 102k | [[fallthrough]]; |
496 | 105k | case '+': |
497 | 105k | ++i; |
498 | | // only one '+'/'-' char, so could return failure directly |
499 | 105k | if (UNLIKELY(len == 1)) { |
500 | 0 | *result = PARSE_FAILURE; |
501 | 0 | return 0; |
502 | 0 | } |
503 | 406k | } |
504 | | |
505 | | // This is the fast path where the string cannot overflow. |
506 | 406k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
507 | 240k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); |
508 | 240k | return static_cast<T>(negative ? -val : val); |
509 | 240k | } |
510 | | |
511 | 166k | const T max_div_10 = max_val / 10; |
512 | 166k | const T max_mod_10 = max_val % 10; |
513 | | |
514 | 166k | int first = i; |
515 | 1.68M | for (; i < len; ++i) { |
516 | 1.61M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
517 | 1.57M | T digit = s[i] - '0'; |
518 | | // This is a tricky check to see if adding this digit will cause an overflow. |
519 | 1.57M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
520 | 48.4k | *result = PARSE_OVERFLOW; |
521 | 48.4k | return negative ? -max_val : max_val; |
522 | 48.4k | } |
523 | 1.52M | val = val * 10 + digit; |
524 | 1.52M | } else { |
525 | 45.9k | if constexpr (enable_strict_mode) { |
526 | 4.08k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
527 | | // Reject the string because the remaining chars are not all whitespace |
528 | 3.78k | *result = PARSE_FAILURE; |
529 | 3.78k | return 0; |
530 | 3.78k | } |
531 | 41.8k | } else { |
532 | | // Save original position where non-digit was found |
533 | 41.8k | int remaining_len = len - i; |
534 | 41.8k | const char* remaining_s = s + i; |
535 | | // Skip trailing whitespaces from the remaining portion |
536 | 41.8k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); |
537 | 41.8k | if ((UNLIKELY(i == first || (remaining_len != 0 && |
538 | 41.8k | !is_float_suffix(remaining_s, remaining_len))))) { |
539 | | // Reject the string because either the first char was not a digit, |
540 | | // or the remaining chars are not all whitespace |
541 | 28.9k | *result = PARSE_FAILURE; |
542 | 28.9k | return 0; |
543 | 28.9k | } |
544 | 41.8k | } |
545 | | // Returning here is slightly faster than breaking the loop. |
546 | 13.1k | *result = PARSE_SUCCESS; |
547 | 45.9k | return static_cast<T>(negative ? -val : val); |
548 | 45.9k | } |
549 | 1.61M | } |
550 | 71.7k | *result = PARSE_SUCCESS; |
551 | 71.7k | return static_cast<T>(negative ? -val : val); |
552 | 166k | } _ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 46.6k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 46.6k | if (UNLIKELY(len <= 0)) { | 482 | 44 | *result = PARSE_FAILURE; | 483 | 44 | return 0; | 484 | 44 | } | 485 | | | 486 | 46.5k | using UnsignedT = MakeUnsignedT<T>; | 487 | 46.5k | UnsignedT val = 0; | 488 | 46.5k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 46.5k | bool negative = false; | 490 | 46.5k | int i = 0; | 491 | 46.5k | switch (*s) { | 492 | 3.54k | case '-': | 493 | 3.54k | negative = true; | 494 | 3.54k | max_val += 1; | 495 | 3.54k | [[fallthrough]]; | 496 | 3.82k | case '+': | 497 | 3.82k | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 3.82k | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 46.5k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 46.5k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 41.9k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 41.9k | return static_cast<T>(negative ? -val : val); | 509 | 41.9k | } | 510 | | | 511 | 4.65k | const T max_div_10 = max_val / 10; | 512 | 4.65k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 4.65k | int first = i; | 515 | 172k | for (; i < len; ++i) { | 516 | 169k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 168k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 168k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 512 | *result = PARSE_OVERFLOW; | 521 | 512 | return negative ? -max_val : max_val; | 522 | 512 | } | 523 | 168k | val = val * 10 + digit; | 524 | 168k | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 536 | } else { | 532 | | // Save original position where non-digit was found | 533 | 536 | int remaining_len = len - i; | 534 | 536 | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 536 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 536 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 536 | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 376 | *result = PARSE_FAILURE; | 542 | 376 | return 0; | 543 | 376 | } | 544 | 536 | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 160 | *result = PARSE_SUCCESS; | 547 | 536 | return static_cast<T>(negative ? -val : val); | 548 | 536 | } | 549 | 169k | } | 550 | 3.60k | *result = PARSE_SUCCESS; | 551 | 3.60k | return static_cast<T>(negative ? -val : val); | 552 | 4.65k | } |
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 124k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 124k | if (UNLIKELY(len <= 0)) { | 482 | 218 | *result = PARSE_FAILURE; | 483 | 218 | return 0; | 484 | 218 | } | 485 | | | 486 | 124k | using UnsignedT = MakeUnsignedT<T>; | 487 | 124k | UnsignedT val = 0; | 488 | 124k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 124k | bool negative = false; | 490 | 124k | int i = 0; | 491 | 124k | switch (*s) { | 492 | 22.4k | case '-': | 493 | 22.4k | negative = true; | 494 | 22.4k | max_val += 1; | 495 | 22.4k | [[fallthrough]]; | 496 | 22.9k | case '+': | 497 | 22.9k | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 22.9k | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 124k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 124k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 60.4k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 60.4k | return static_cast<T>(negative ? -val : val); | 509 | 60.4k | } | 510 | | | 511 | 64.0k | const T max_div_10 = max_val / 10; | 512 | 64.0k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 64.0k | int first = i; | 515 | 154k | for (; i < len; ++i) { | 516 | 147k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 111k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 111k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 21.2k | *result = PARSE_OVERFLOW; | 521 | 21.2k | return negative ? -max_val : max_val; | 522 | 21.2k | } | 523 | 90.2k | val = val * 10 + digit; | 524 | 90.2k | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 35.9k | } else { | 532 | | // Save original position where non-digit was found | 533 | 35.9k | int remaining_len = len - i; | 534 | 35.9k | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 35.9k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 35.9k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 35.9k | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 24.5k | *result = PARSE_FAILURE; | 542 | 24.5k | return 0; | 543 | 24.5k | } | 544 | 35.9k | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 11.4k | *result = PARSE_SUCCESS; | 547 | 35.9k | return static_cast<T>(negative ? -val : val); | 548 | 35.9k | } | 549 | 147k | } | 550 | 6.80k | *result = PARSE_SUCCESS; | 551 | 6.80k | return static_cast<T>(negative ? -val : val); | 552 | 64.0k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 74.4k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 74.4k | if (UNLIKELY(len <= 0)) { | 482 | 8 | *result = PARSE_FAILURE; | 483 | 8 | return 0; | 484 | 8 | } | 485 | | | 486 | 74.4k | using UnsignedT = MakeUnsignedT<T>; | 487 | 74.4k | UnsignedT val = 0; | 488 | 74.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 74.4k | bool negative = false; | 490 | 74.4k | int i = 0; | 491 | 74.4k | switch (*s) { | 492 | 12.8k | case '-': | 493 | 12.8k | negative = true; | 494 | 12.8k | max_val += 1; | 495 | 12.8k | [[fallthrough]]; | 496 | 13.1k | case '+': | 497 | 13.1k | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 13.1k | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 74.4k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 74.4k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 50.8k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 50.8k | return static_cast<T>(negative ? -val : val); | 509 | 50.8k | } | 510 | | | 511 | 23.6k | const T max_div_10 = max_val / 10; | 512 | 23.6k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 23.6k | int first = i; | 515 | 123k | for (; i < len; ++i) { | 516 | 114k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 112k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 112k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 12.9k | *result = PARSE_OVERFLOW; | 521 | 12.9k | return negative ? -max_val : max_val; | 522 | 12.9k | } | 523 | 99.5k | val = val * 10 + digit; | 524 | 99.5k | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 1.90k | } else { | 532 | | // Save original position where non-digit was found | 533 | 1.90k | int remaining_len = len - i; | 534 | 1.90k | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 1.90k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 1.90k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 1.90k | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 1.29k | *result = PARSE_FAILURE; | 542 | 1.29k | return 0; | 543 | 1.29k | } | 544 | 1.90k | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 610 | *result = PARSE_SUCCESS; | 547 | 1.90k | return static_cast<T>(negative ? -val : val); | 548 | 1.90k | } | 549 | 114k | } | 550 | 8.80k | *result = PARSE_SUCCESS; | 551 | 8.80k | return static_cast<T>(negative ? -val : val); | 552 | 23.6k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 67.7k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 67.7k | if (UNLIKELY(len <= 0)) { | 482 | 1.92k | *result = PARSE_FAILURE; | 483 | 1.92k | return 0; | 484 | 1.92k | } | 485 | | | 486 | 65.7k | using UnsignedT = MakeUnsignedT<T>; | 487 | 65.7k | UnsignedT val = 0; | 488 | 65.7k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 65.7k | bool negative = false; | 490 | 65.7k | int i = 0; | 491 | 65.7k | switch (*s) { | 492 | 10.1k | case '-': | 493 | 10.1k | negative = true; | 494 | 10.1k | max_val += 1; | 495 | 10.1k | [[fallthrough]]; | 496 | 10.5k | case '+': | 497 | 10.5k | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 10.5k | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 65.7k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 65.7k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 52.2k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 52.2k | return static_cast<T>(negative ? -val : val); | 509 | 52.2k | } | 510 | | | 511 | 13.5k | const T max_div_10 = max_val / 10; | 512 | 13.5k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 13.5k | int first = i; | 515 | 128k | for (; i < len; ++i) { | 516 | 122k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 120k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 120k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 5.79k | *result = PARSE_OVERFLOW; | 521 | 5.79k | return negative ? -max_val : max_val; | 522 | 5.79k | } | 523 | 115k | val = val * 10 + digit; | 524 | 115k | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 1.93k | } else { | 532 | | // Save original position where non-digit was found | 533 | 1.93k | int remaining_len = len - i; | 534 | 1.93k | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 1.93k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 1.93k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 1.93k | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 1.54k | *result = PARSE_FAILURE; | 542 | 1.54k | return 0; | 543 | 1.54k | } | 544 | 1.93k | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 386 | *result = PARSE_SUCCESS; | 547 | 1.93k | return static_cast<T>(negative ? -val : val); | 548 | 1.93k | } | 549 | 122k | } | 550 | 5.82k | *result = PARSE_SUCCESS; | 551 | 5.82k | return static_cast<T>(negative ? -val : val); | 552 | 13.5k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 85.5k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 85.5k | if (UNLIKELY(len <= 0)) { | 482 | 14 | *result = PARSE_FAILURE; | 483 | 14 | return 0; | 484 | 14 | } | 485 | | | 486 | 85.4k | using UnsignedT = MakeUnsignedT<T>; | 487 | 85.4k | UnsignedT val = 0; | 488 | 85.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 85.4k | bool negative = false; | 490 | 85.4k | int i = 0; | 491 | 85.4k | switch (*s) { | 492 | 50.2k | case '-': | 493 | 50.2k | negative = true; | 494 | 50.2k | max_val += 1; | 495 | 50.2k | [[fallthrough]]; | 496 | 50.5k | case '+': | 497 | 50.5k | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 50.5k | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 85.4k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 85.4k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 32.3k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 32.3k | return static_cast<T>(negative ? -val : val); | 509 | 32.3k | } | 510 | | | 511 | 53.1k | const T max_div_10 = max_val / 10; | 512 | 53.1k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 53.1k | int first = i; | 515 | 1.03M | for (; i < len; ++i) { | 516 | 989k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 988k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 988k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 5.22k | *result = PARSE_OVERFLOW; | 521 | 5.22k | return negative ? -max_val : max_val; | 522 | 5.22k | } | 523 | 983k | val = val * 10 + digit; | 524 | 983k | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 1.49k | } else { | 532 | | // Save original position where non-digit was found | 533 | 1.49k | int remaining_len = len - i; | 534 | 1.49k | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 1.49k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 1.49k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 1.49k | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 1.24k | *result = PARSE_FAILURE; | 542 | 1.24k | return 0; | 543 | 1.24k | } | 544 | 1.49k | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 244 | *result = PARSE_SUCCESS; | 547 | 1.49k | return static_cast<T>(negative ? -val : val); | 548 | 1.49k | } | 549 | 989k | } | 550 | 46.4k | *result = PARSE_SUCCESS; | 551 | 46.4k | return static_cast<T>(negative ? -val : val); | 552 | 53.1k | } |
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 149 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 149 | if (UNLIKELY(len <= 0)) { | 482 | 2 | *result = PARSE_FAILURE; | 483 | 2 | return 0; | 484 | 2 | } | 485 | | | 486 | 147 | using UnsignedT = MakeUnsignedT<T>; | 487 | 147 | UnsignedT val = 0; | 488 | 147 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 147 | bool negative = false; | 490 | 147 | int i = 0; | 491 | 147 | switch (*s) { | 492 | 0 | case '-': | 493 | 0 | negative = true; | 494 | 0 | max_val += 1; | 495 | 0 | [[fallthrough]]; | 496 | 0 | case '+': | 497 | 0 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 0 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 147 | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 147 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 147 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 147 | return static_cast<T>(negative ? -val : val); | 509 | 147 | } | 510 | | | 511 | 0 | const T max_div_10 = max_val / 10; | 512 | 0 | const T max_mod_10 = max_val % 10; | 513 | |
| 514 | 0 | int first = i; | 515 | 0 | for (; i < len; ++i) { | 516 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 0 | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 0 | *result = PARSE_OVERFLOW; | 521 | 0 | return negative ? -max_val : max_val; | 522 | 0 | } | 523 | 0 | val = val * 10 + digit; | 524 | 0 | } else { | 525 | 0 | if constexpr (enable_strict_mode) { | 526 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 0 | *result = PARSE_FAILURE; | 529 | 0 | return 0; | 530 | 0 | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 0 | *result = PARSE_SUCCESS; | 547 | 0 | return static_cast<T>(negative ? -val : val); | 548 | 0 | } | 549 | 0 | } | 550 | 0 | *result = PARSE_SUCCESS; | 551 | 0 | return static_cast<T>(negative ? -val : val); | 552 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 1.91k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 1.91k | if (UNLIKELY(len <= 0)) { | 482 | 8 | *result = PARSE_FAILURE; | 483 | 8 | return 0; | 484 | 8 | } | 485 | | | 486 | 1.90k | using UnsignedT = MakeUnsignedT<T>; | 487 | 1.90k | UnsignedT val = 0; | 488 | 1.90k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 1.90k | bool negative = false; | 490 | 1.90k | int i = 0; | 491 | 1.90k | switch (*s) { | 492 | 632 | case '-': | 493 | 632 | negative = true; | 494 | 632 | max_val += 1; | 495 | 632 | [[fallthrough]]; | 496 | 988 | case '+': | 497 | 988 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 988 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 1.90k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 1.90k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 48 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 48 | return static_cast<T>(negative ? -val : val); | 509 | 48 | } | 510 | | | 511 | 1.85k | const T max_div_10 = max_val / 10; | 512 | 1.85k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 1.85k | int first = i; | 515 | 6.58k | for (; i < len; ++i) { | 516 | 6.51k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 5.32k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 5.32k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 600 | *result = PARSE_OVERFLOW; | 521 | 600 | return negative ? -max_val : max_val; | 522 | 600 | } | 523 | 4.72k | val = val * 10 + digit; | 524 | 4.72k | } else { | 525 | 1.18k | if constexpr (enable_strict_mode) { | 526 | 1.18k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 1.10k | *result = PARSE_FAILURE; | 529 | 1.10k | return 0; | 530 | 1.10k | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 88 | *result = PARSE_SUCCESS; | 547 | 1.18k | return static_cast<T>(negative ? -val : val); | 548 | 1.18k | } | 549 | 6.51k | } | 550 | 68 | *result = PARSE_SUCCESS; | 551 | 68 | return static_cast<T>(negative ? -val : val); | 552 | 1.85k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 1.88k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 1.88k | if (UNLIKELY(len <= 0)) { | 482 | 8 | *result = PARSE_FAILURE; | 483 | 8 | return 0; | 484 | 8 | } | 485 | | | 486 | 1.87k | using UnsignedT = MakeUnsignedT<T>; | 487 | 1.87k | UnsignedT val = 0; | 488 | 1.87k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 1.87k | bool negative = false; | 490 | 1.87k | int i = 0; | 491 | 1.87k | switch (*s) { | 492 | 620 | case '-': | 493 | 620 | negative = true; | 494 | 620 | max_val += 1; | 495 | 620 | [[fallthrough]]; | 496 | 970 | case '+': | 497 | 970 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 970 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 1.87k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 1.87k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 168 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 168 | return static_cast<T>(negative ? -val : val); | 509 | 168 | } | 510 | | | 511 | 1.70k | const T max_div_10 = max_val / 10; | 512 | 1.70k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 1.70k | int first = i; | 515 | 7.87k | for (; i < len; ++i) { | 516 | 7.83k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 6.74k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 6.74k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 576 | *result = PARSE_OVERFLOW; | 521 | 576 | return negative ? -max_val : max_val; | 522 | 576 | } | 523 | 6.17k | val = val * 10 + digit; | 524 | 6.17k | } else { | 525 | 1.08k | if constexpr (enable_strict_mode) { | 526 | 1.08k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 1.00k | *result = PARSE_FAILURE; | 529 | 1.00k | return 0; | 530 | 1.00k | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 88 | *result = PARSE_SUCCESS; | 547 | 1.08k | return static_cast<T>(negative ? -val : val); | 548 | 1.08k | } | 549 | 7.83k | } | 550 | 40 | *result = PARSE_SUCCESS; | 551 | 40 | return static_cast<T>(negative ? -val : val); | 552 | 1.70k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 1.87k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 1.87k | if (UNLIKELY(len <= 0)) { | 482 | 8 | *result = PARSE_FAILURE; | 483 | 8 | return 0; | 484 | 8 | } | 485 | | | 486 | 1.86k | using UnsignedT = MakeUnsignedT<T>; | 487 | 1.86k | UnsignedT val = 0; | 488 | 1.86k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 1.86k | bool negative = false; | 490 | 1.86k | int i = 0; | 491 | 1.86k | switch (*s) { | 492 | 608 | case '-': | 493 | 608 | negative = true; | 494 | 608 | max_val += 1; | 495 | 608 | [[fallthrough]]; | 496 | 952 | case '+': | 497 | 952 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 952 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 1.86k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 1.86k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 488 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 488 | return static_cast<T>(negative ? -val : val); | 509 | 488 | } | 510 | | | 511 | 1.37k | const T max_div_10 = max_val / 10; | 512 | 1.37k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 1.37k | int first = i; | 515 | 10.7k | for (; i < len; ++i) { | 516 | 10.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 9.90k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 9.90k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 552 | *result = PARSE_OVERFLOW; | 521 | 552 | return negative ? -max_val : max_val; | 522 | 552 | } | 523 | 9.34k | val = val * 10 + digit; | 524 | 9.34k | } else { | 525 | 795 | if constexpr (enable_strict_mode) { | 526 | 795 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 735 | *result = PARSE_FAILURE; | 529 | 735 | return 0; | 530 | 735 | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 60 | *result = PARSE_SUCCESS; | 547 | 795 | return static_cast<T>(negative ? -val : val); | 548 | 795 | } | 549 | 10.6k | } | 550 | 32 | *result = PARSE_SUCCESS; | 551 | 32 | return static_cast<T>(negative ? -val : val); | 552 | 1.37k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 1.83k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 1.83k | if (UNLIKELY(len <= 0)) { | 482 | 10 | *result = PARSE_FAILURE; | 483 | 10 | return 0; | 484 | 10 | } | 485 | | | 486 | 1.82k | using UnsignedT = MakeUnsignedT<T>; | 487 | 1.82k | UnsignedT val = 0; | 488 | 1.82k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 1.82k | bool negative = false; | 490 | 1.82k | int i = 0; | 491 | 1.82k | switch (*s) { | 492 | 596 | case '-': | 493 | 596 | negative = true; | 494 | 596 | max_val += 1; | 495 | 596 | [[fallthrough]]; | 496 | 934 | case '+': | 497 | 934 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 934 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 1.82k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 1.82k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 738 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 738 | return static_cast<T>(negative ? -val : val); | 509 | 738 | } | 510 | | | 511 | 1.08k | const T max_div_10 = max_val / 10; | 512 | 1.08k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 1.08k | int first = i; | 515 | 16.9k | for (; i < len; ++i) { | 516 | 16.8k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 16.3k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 16.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 528 | *result = PARSE_OVERFLOW; | 521 | 528 | return negative ? -max_val : max_val; | 522 | 528 | } | 523 | 15.8k | val = val * 10 + digit; | 524 | 15.8k | } else { | 525 | 523 | if constexpr (enable_strict_mode) { | 526 | 523 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 491 | *result = PARSE_FAILURE; | 529 | 491 | return 0; | 530 | 491 | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 32 | *result = PARSE_SUCCESS; | 547 | 523 | return static_cast<T>(negative ? -val : val); | 548 | 523 | } | 549 | 16.8k | } | 550 | 32 | *result = PARSE_SUCCESS; | 551 | 32 | return static_cast<T>(negative ? -val : val); | 552 | 1.08k | } |
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 1.78k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 1.78k | if (UNLIKELY(len <= 0)) { | 482 | 8 | *result = PARSE_FAILURE; | 483 | 8 | return 0; | 484 | 8 | } | 485 | | | 486 | 1.77k | using UnsignedT = MakeUnsignedT<T>; | 487 | 1.77k | UnsignedT val = 0; | 488 | 1.77k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 1.77k | bool negative = false; | 490 | 1.77k | int i = 0; | 491 | 1.77k | switch (*s) { | 492 | 584 | case '-': | 493 | 584 | negative = true; | 494 | 584 | max_val += 1; | 495 | 584 | [[fallthrough]]; | 496 | 916 | case '+': | 497 | 916 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 916 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 1.77k | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 1.77k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 752 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 752 | return static_cast<T>(negative ? -val : val); | 509 | 752 | } | 510 | | | 511 | 1.02k | const T max_div_10 = max_val / 10; | 512 | 1.02k | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 1.02k | int first = i; | 515 | 31.3k | for (; i < len; ++i) { | 516 | 31.2k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 30.7k | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 30.7k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 504 | *result = PARSE_OVERFLOW; | 521 | 504 | return negative ? -max_val : max_val; | 522 | 504 | } | 523 | 30.2k | val = val * 10 + digit; | 524 | 30.2k | } else { | 525 | 488 | if constexpr (enable_strict_mode) { | 526 | 488 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | 456 | *result = PARSE_FAILURE; | 529 | 456 | return 0; | 530 | 456 | } | 531 | | } else { | 532 | | // Save original position where non-digit was found | 533 | | int remaining_len = len - i; | 534 | | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | | *result = PARSE_FAILURE; | 542 | | return 0; | 543 | | } | 544 | | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 32 | *result = PARSE_SUCCESS; | 547 | 488 | return static_cast<T>(negative ? -val : val); | 548 | 488 | } | 549 | 31.2k | } | 550 | 32 | *result = PARSE_SUCCESS; | 551 | 32 | return static_cast<T>(negative ? -val : val); | 552 | 1.02k | } |
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 4 | if (UNLIKELY(len <= 0)) { | 482 | 0 | *result = PARSE_FAILURE; | 483 | 0 | return 0; | 484 | 0 | } | 485 | | | 486 | 4 | using UnsignedT = MakeUnsignedT<T>; | 487 | 4 | UnsignedT val = 0; | 488 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 4 | bool negative = false; | 490 | 4 | int i = 0; | 491 | 4 | switch (*s) { | 492 | 0 | case '-': | 493 | 0 | negative = true; | 494 | 0 | max_val += 1; | 495 | 0 | [[fallthrough]]; | 496 | 0 | case '+': | 497 | 0 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 0 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 4 | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 4 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 4 | return static_cast<T>(negative ? -val : val); | 509 | 4 | } | 510 | | | 511 | 0 | const T max_div_10 = max_val / 10; | 512 | 0 | const T max_mod_10 = max_val % 10; | 513 | |
| 514 | 0 | int first = i; | 515 | 0 | for (; i < len; ++i) { | 516 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 0 | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 0 | *result = PARSE_OVERFLOW; | 521 | 0 | return negative ? -max_val : max_val; | 522 | 0 | } | 523 | 0 | val = val * 10 + digit; | 524 | 0 | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 0 | } else { | 532 | | // Save original position where non-digit was found | 533 | 0 | int remaining_len = len - i; | 534 | 0 | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 0 | *result = PARSE_FAILURE; | 542 | 0 | return 0; | 543 | 0 | } | 544 | 0 | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 0 | *result = PARSE_SUCCESS; | 547 | 0 | return static_cast<T>(negative ? -val : val); | 548 | 0 | } | 549 | 0 | } | 550 | 0 | *result = PARSE_SUCCESS; | 551 | 0 | return static_cast<T>(negative ? -val : val); | 552 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 4 | if (UNLIKELY(len <= 0)) { | 482 | 0 | *result = PARSE_FAILURE; | 483 | 0 | return 0; | 484 | 0 | } | 485 | | | 486 | 4 | using UnsignedT = MakeUnsignedT<T>; | 487 | 4 | UnsignedT val = 0; | 488 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 4 | bool negative = false; | 490 | 4 | int i = 0; | 491 | 4 | switch (*s) { | 492 | 0 | case '-': | 493 | 0 | negative = true; | 494 | 0 | max_val += 1; | 495 | 0 | [[fallthrough]]; | 496 | 0 | case '+': | 497 | 0 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 0 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 4 | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 0 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 0 | return static_cast<T>(negative ? -val : val); | 509 | 0 | } | 510 | | | 511 | 4 | const T max_div_10 = max_val / 10; | 512 | 4 | const T max_mod_10 = max_val % 10; | 513 | | | 514 | 4 | int first = i; | 515 | 84 | for (; i < len; ++i) { | 516 | 80 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 80 | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 80 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 0 | *result = PARSE_OVERFLOW; | 521 | 0 | return negative ? -max_val : max_val; | 522 | 0 | } | 523 | 80 | val = val * 10 + digit; | 524 | 80 | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 0 | } else { | 532 | | // Save original position where non-digit was found | 533 | 0 | int remaining_len = len - i; | 534 | 0 | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 0 | *result = PARSE_FAILURE; | 542 | 0 | return 0; | 543 | 0 | } | 544 | 0 | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 0 | *result = PARSE_SUCCESS; | 547 | 0 | return static_cast<T>(negative ? -val : val); | 548 | 0 | } | 549 | 80 | } | 550 | 4 | *result = PARSE_SUCCESS; | 551 | 4 | return static_cast<T>(negative ? -val : val); | 552 | 4 | } |
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 20 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 20 | if (UNLIKELY(len <= 0)) { | 482 | 0 | *result = PARSE_FAILURE; | 483 | 0 | return 0; | 484 | 0 | } | 485 | | | 486 | 20 | using UnsignedT = MakeUnsignedT<T>; | 487 | 20 | UnsignedT val = 0; | 488 | 20 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 489 | 20 | bool negative = false; | 490 | 20 | int i = 0; | 491 | 20 | switch (*s) { | 492 | 0 | case '-': | 493 | 0 | negative = true; | 494 | 0 | max_val += 1; | 495 | 0 | [[fallthrough]]; | 496 | 0 | case '+': | 497 | 0 | ++i; | 498 | | // only one '+'/'-' char, so could return failure directly | 499 | 0 | if (UNLIKELY(len == 1)) { | 500 | 0 | *result = PARSE_FAILURE; | 501 | 0 | return 0; | 502 | 0 | } | 503 | 20 | } | 504 | | | 505 | | // This is the fast path where the string cannot overflow. | 506 | 20 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 507 | 20 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 508 | 20 | return static_cast<T>(negative ? -val : val); | 509 | 20 | } | 510 | | | 511 | 0 | const T max_div_10 = max_val / 10; | 512 | 0 | const T max_mod_10 = max_val % 10; | 513 | |
| 514 | 0 | int first = i; | 515 | 0 | for (; i < len; ++i) { | 516 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 517 | 0 | T digit = s[i] - '0'; | 518 | | // This is a tricky check to see if adding this digit will cause an overflow. | 519 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 520 | 0 | *result = PARSE_OVERFLOW; | 521 | 0 | return negative ? -max_val : max_val; | 522 | 0 | } | 523 | 0 | val = val * 10 + digit; | 524 | 0 | } else { | 525 | | if constexpr (enable_strict_mode) { | 526 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 527 | | // Reject the string because the remaining chars are not all whitespace | 528 | | *result = PARSE_FAILURE; | 529 | | return 0; | 530 | | } | 531 | 0 | } else { | 532 | | // Save original position where non-digit was found | 533 | 0 | int remaining_len = len - i; | 534 | 0 | const char* remaining_s = s + i; | 535 | | // Skip trailing whitespaces from the remaining portion | 536 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 537 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 538 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 539 | | // Reject the string because either the first char was not a digit, | 540 | | // or the remaining chars are not all whitespace | 541 | 0 | *result = PARSE_FAILURE; | 542 | 0 | return 0; | 543 | 0 | } | 544 | 0 | } | 545 | | // Returning here is slightly faster than breaking the loop. | 546 | 0 | *result = PARSE_SUCCESS; | 547 | 0 | return static_cast<T>(negative ? -val : val); | 548 | 0 | } | 549 | 0 | } | 550 | 0 | *result = PARSE_SUCCESS; | 551 | 0 | return static_cast<T>(negative ? -val : val); | 552 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE |
553 | | |
554 | | template <typename T> |
555 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
556 | 1.37k | ParseResult* result) { |
557 | 1.37k | if (UNLIKELY(len <= 0)) { |
558 | 0 | *result = PARSE_FAILURE; |
559 | 0 | return 0; |
560 | 0 | } |
561 | | |
562 | 1.37k | T val = 0; |
563 | 1.37k | T max_val = std::numeric_limits<T>::max(); |
564 | 1.37k | int i = 0; |
565 | | |
566 | 1.37k | using signedT = MakeSignedT<T>; |
567 | | // This is the fast path where the string cannot overflow. |
568 | 1.37k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
569 | 784 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
570 | 784 | return val; |
571 | 784 | } |
572 | | |
573 | 588 | const T max_div_10 = max_val / 10; |
574 | 588 | const T max_mod_10 = max_val % 10; |
575 | | |
576 | 588 | int first = i; |
577 | 4.65k | for (; i < len; ++i) { |
578 | 4.31k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
579 | 4.26k | T digit = s[i] - '0'; |
580 | | // This is a tricky check to see if adding this digit will cause an overflow. |
581 | 4.26k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
582 | 196 | *result = PARSE_OVERFLOW; |
583 | 196 | return max_val; |
584 | 196 | } |
585 | 4.06k | val = val * 10 + digit; |
586 | 4.06k | } else { |
587 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
588 | | // Reject the string because either the first char was not a digit, |
589 | | // or the remaining chars are not all whitespace |
590 | 49 | *result = PARSE_FAILURE; |
591 | 49 | return 0; |
592 | 49 | } |
593 | | // Returning here is slightly faster than breaking the loop. |
594 | 0 | *result = PARSE_SUCCESS; |
595 | 0 | return val; |
596 | 49 | } |
597 | 4.31k | } |
598 | 343 | *result = PARSE_SUCCESS; |
599 | 343 | return val; |
600 | 588 | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 556 | 343 | ParseResult* result) { | 557 | 343 | if (UNLIKELY(len <= 0)) { | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | | 562 | 343 | T val = 0; | 563 | 343 | T max_val = std::numeric_limits<T>::max(); | 564 | 343 | int i = 0; | 565 | | | 566 | 343 | using signedT = MakeSignedT<T>; | 567 | | // This is the fast path where the string cannot overflow. | 568 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 569 | 98 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 570 | 98 | return val; | 571 | 98 | } | 572 | | | 573 | 245 | const T max_div_10 = max_val / 10; | 574 | 245 | const T max_mod_10 = max_val % 10; | 575 | | | 576 | 245 | int first = i; | 577 | 784 | for (; i < len; ++i) { | 578 | 637 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 579 | 588 | T digit = s[i] - '0'; | 580 | | // This is a tricky check to see if adding this digit will cause an overflow. | 581 | 588 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 582 | 49 | *result = PARSE_OVERFLOW; | 583 | 49 | return max_val; | 584 | 49 | } | 585 | 539 | val = val * 10 + digit; | 586 | 539 | } else { | 587 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 588 | | // Reject the string because either the first char was not a digit, | 589 | | // or the remaining chars are not all whitespace | 590 | 49 | *result = PARSE_FAILURE; | 591 | 49 | return 0; | 592 | 49 | } | 593 | | // Returning here is slightly faster than breaking the loop. | 594 | 0 | *result = PARSE_SUCCESS; | 595 | 0 | return val; | 596 | 49 | } | 597 | 637 | } | 598 | 147 | *result = PARSE_SUCCESS; | 599 | 147 | return val; | 600 | 245 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 556 | 343 | ParseResult* result) { | 557 | 343 | if (UNLIKELY(len <= 0)) { | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | | 562 | 343 | T val = 0; | 563 | 343 | T max_val = std::numeric_limits<T>::max(); | 564 | 343 | int i = 0; | 565 | | | 566 | 343 | using signedT = MakeSignedT<T>; | 567 | | // This is the fast path where the string cannot overflow. | 568 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 569 | 196 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 570 | 196 | return val; | 571 | 196 | } | 572 | | | 573 | 147 | const T max_div_10 = max_val / 10; | 574 | 147 | const T max_mod_10 = max_val % 10; | 575 | | | 576 | 147 | int first = i; | 577 | 833 | for (; i < len; ++i) { | 578 | 735 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 579 | 735 | T digit = s[i] - '0'; | 580 | | // This is a tricky check to see if adding this digit will cause an overflow. | 581 | 735 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 582 | 49 | *result = PARSE_OVERFLOW; | 583 | 49 | return max_val; | 584 | 49 | } | 585 | 686 | val = val * 10 + digit; | 586 | 686 | } else { | 587 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 588 | | // Reject the string because either the first char was not a digit, | 589 | | // or the remaining chars are not all whitespace | 590 | 0 | *result = PARSE_FAILURE; | 591 | 0 | return 0; | 592 | 0 | } | 593 | | // Returning here is slightly faster than breaking the loop. | 594 | 0 | *result = PARSE_SUCCESS; | 595 | 0 | return val; | 596 | 0 | } | 597 | 735 | } | 598 | 98 | *result = PARSE_SUCCESS; | 599 | 98 | return val; | 600 | 147 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 556 | 343 | ParseResult* result) { | 557 | 343 | if (UNLIKELY(len <= 0)) { | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | | 562 | 343 | T val = 0; | 563 | 343 | T max_val = std::numeric_limits<T>::max(); | 564 | 343 | int i = 0; | 565 | | | 566 | 343 | using signedT = MakeSignedT<T>; | 567 | | // This is the fast path where the string cannot overflow. | 568 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 569 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 570 | 245 | return val; | 571 | 245 | } | 572 | | | 573 | 98 | const T max_div_10 = max_val / 10; | 574 | 98 | const T max_mod_10 = max_val % 10; | 575 | | | 576 | 98 | int first = i; | 577 | 1.02k | for (; i < len; ++i) { | 578 | 980 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 579 | 980 | T digit = s[i] - '0'; | 580 | | // This is a tricky check to see if adding this digit will cause an overflow. | 581 | 980 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 582 | 49 | *result = PARSE_OVERFLOW; | 583 | 49 | return max_val; | 584 | 49 | } | 585 | 931 | val = val * 10 + digit; | 586 | 931 | } else { | 587 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 588 | | // Reject the string because either the first char was not a digit, | 589 | | // or the remaining chars are not all whitespace | 590 | 0 | *result = PARSE_FAILURE; | 591 | 0 | return 0; | 592 | 0 | } | 593 | | // Returning here is slightly faster than breaking the loop. | 594 | 0 | *result = PARSE_SUCCESS; | 595 | 0 | return val; | 596 | 0 | } | 597 | 980 | } | 598 | 49 | *result = PARSE_SUCCESS; | 599 | 49 | return val; | 600 | 98 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 556 | 343 | ParseResult* result) { | 557 | 343 | if (UNLIKELY(len <= 0)) { | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | | 562 | 343 | T val = 0; | 563 | 343 | T max_val = std::numeric_limits<T>::max(); | 564 | 343 | int i = 0; | 565 | | | 566 | 343 | using signedT = MakeSignedT<T>; | 567 | | // This is the fast path where the string cannot overflow. | 568 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 569 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 570 | 245 | return val; | 571 | 245 | } | 572 | | | 573 | 98 | const T max_div_10 = max_val / 10; | 574 | 98 | const T max_mod_10 = max_val % 10; | 575 | | | 576 | 98 | int first = i; | 577 | 2.00k | for (; i < len; ++i) { | 578 | 1.96k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 579 | 1.96k | T digit = s[i] - '0'; | 580 | | // This is a tricky check to see if adding this digit will cause an overflow. | 581 | 1.96k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 582 | 49 | *result = PARSE_OVERFLOW; | 583 | 49 | return max_val; | 584 | 49 | } | 585 | 1.91k | val = val * 10 + digit; | 586 | 1.91k | } else { | 587 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 588 | | // Reject the string because either the first char was not a digit, | 589 | | // or the remaining chars are not all whitespace | 590 | 0 | *result = PARSE_FAILURE; | 591 | 0 | return 0; | 592 | 0 | } | 593 | | // Returning here is slightly faster than breaking the loop. | 594 | 0 | *result = PARSE_SUCCESS; | 595 | 0 | return val; | 596 | 0 | } | 597 | 1.96k | } | 598 | 49 | *result = PARSE_SUCCESS; | 599 | 49 | return val; | 600 | 98 | } |
|
601 | | |
602 | | template <typename T> |
603 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
604 | 27.8k | ParseResult* result) { |
605 | 27.8k | using UnsignedT = MakeUnsignedT<T>; |
606 | 27.8k | UnsignedT val = 0; |
607 | 27.8k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
608 | 27.8k | bool negative = false; |
609 | 27.8k | if (UNLIKELY(len <= 0)) { |
610 | 0 | *result = PARSE_FAILURE; |
611 | 0 | return 0; |
612 | 0 | } |
613 | 27.8k | int i = 0; |
614 | 27.8k | switch (*s) { |
615 | 13.4k | case '-': |
616 | 13.4k | negative = true; |
617 | 13.4k | max_val = StringParser::numeric_limits<T>(false) + 1; |
618 | 13.4k | [[fallthrough]]; |
619 | 13.7k | case '+': |
620 | 13.7k | i = 1; |
621 | 27.8k | } |
622 | | |
623 | 27.8k | const T max_div_base = max_val / base; |
624 | 27.8k | const T max_mod_base = max_val % base; |
625 | | |
626 | 27.8k | int first = i; |
627 | 90.9k | for (; i < len; ++i) { |
628 | 76.6k | T digit; |
629 | 76.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
630 | 75.7k | digit = s[i] - '0'; |
631 | 75.7k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
632 | 639 | digit = (s[i] - 'a' + 10); |
633 | 639 | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
634 | 98 | digit = (s[i] - 'A' + 10); |
635 | 147 | } else { |
636 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
637 | | // Reject the string because either the first char was not an alpha/digit, |
638 | | // or the remaining chars are not all whitespace |
639 | 147 | *result = PARSE_FAILURE; |
640 | 147 | return 0; |
641 | 147 | } |
642 | | // skip trailing whitespace. |
643 | 0 | break; |
644 | 147 | } |
645 | | |
646 | | // Bail, if we encounter a digit that is not available in base. |
647 | 76.4k | if (digit >= base) { |
648 | 392 | break; |
649 | 392 | } |
650 | | |
651 | | // This is a tricky check to see if adding this digit will cause an overflow. |
652 | 76.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
653 | 12.9k | *result = PARSE_OVERFLOW; |
654 | 12.9k | return static_cast<T>(negative ? -max_val : max_val); |
655 | 12.9k | } |
656 | 63.1k | val = val * base + digit; |
657 | 63.1k | } |
658 | 14.7k | *result = PARSE_SUCCESS; |
659 | 14.7k | return static_cast<T>(negative ? -val : val); |
660 | 27.8k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 604 | 26.4k | ParseResult* result) { | 605 | 26.4k | using UnsignedT = MakeUnsignedT<T>; | 606 | 26.4k | UnsignedT val = 0; | 607 | 26.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 608 | 26.4k | bool negative = false; | 609 | 26.4k | if (UNLIKELY(len <= 0)) { | 610 | 0 | *result = PARSE_FAILURE; | 611 | 0 | return 0; | 612 | 0 | } | 613 | 26.4k | int i = 0; | 614 | 26.4k | switch (*s) { | 615 | 12.8k | case '-': | 616 | 12.8k | negative = true; | 617 | 12.8k | max_val = StringParser::numeric_limits<T>(false) + 1; | 618 | 12.8k | [[fallthrough]]; | 619 | 12.9k | case '+': | 620 | 12.9k | i = 1; | 621 | 26.4k | } | 622 | | | 623 | 26.4k | const T max_div_base = max_val / base; | 624 | 26.4k | const T max_mod_base = max_val % base; | 625 | | | 626 | 26.4k | int first = i; | 627 | 80.7k | for (; i < len; ++i) { | 628 | 67.4k | T digit; | 629 | 67.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 630 | 66.6k | digit = s[i] - '0'; | 631 | 66.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 632 | 539 | digit = (s[i] - 'a' + 10); | 633 | 539 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 634 | 98 | digit = (s[i] - 'A' + 10); | 635 | 147 | } else { | 636 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 637 | | // Reject the string because either the first char was not an alpha/digit, | 638 | | // or the remaining chars are not all whitespace | 639 | 147 | *result = PARSE_FAILURE; | 640 | 147 | return 0; | 641 | 147 | } | 642 | | // skip trailing whitespace. | 643 | 0 | break; | 644 | 147 | } | 645 | | | 646 | | // Bail, if we encounter a digit that is not available in base. | 647 | 67.3k | if (digit >= base) { | 648 | 392 | break; | 649 | 392 | } | 650 | | | 651 | | // This is a tricky check to see if adding this digit will cause an overflow. | 652 | 66.9k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 653 | 12.6k | *result = PARSE_OVERFLOW; | 654 | 12.6k | return static_cast<T>(negative ? -max_val : max_val); | 655 | 12.6k | } | 656 | 54.2k | val = val * base + digit; | 657 | 54.2k | } | 658 | 13.6k | *result = PARSE_SUCCESS; | 659 | 13.6k | return static_cast<T>(negative ? -val : val); | 660 | 26.4k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 604 | 490 | ParseResult* result) { | 605 | 490 | using UnsignedT = MakeUnsignedT<T>; | 606 | 490 | UnsignedT val = 0; | 607 | 490 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 608 | 490 | bool negative = false; | 609 | 490 | if (UNLIKELY(len <= 0)) { | 610 | 0 | *result = PARSE_FAILURE; | 611 | 0 | return 0; | 612 | 0 | } | 613 | 490 | int i = 0; | 614 | 490 | switch (*s) { | 615 | 196 | case '-': | 616 | 196 | negative = true; | 617 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 618 | 196 | [[fallthrough]]; | 619 | 245 | case '+': | 620 | 245 | i = 1; | 621 | 490 | } | 622 | | | 623 | 490 | const T max_div_base = max_val / base; | 624 | 490 | const T max_mod_base = max_val % base; | 625 | | | 626 | 490 | int first = i; | 627 | 2.10k | for (; i < len; ++i) { | 628 | 1.71k | T digit; | 629 | 1.71k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 630 | 1.61k | digit = s[i] - '0'; | 631 | 1.61k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 632 | 98 | digit = (s[i] - 'a' + 10); | 633 | 98 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 634 | 0 | digit = (s[i] - 'A' + 10); | 635 | 0 | } else { | 636 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 637 | | // Reject the string because either the first char was not an alpha/digit, | 638 | | // or the remaining chars are not all whitespace | 639 | 0 | *result = PARSE_FAILURE; | 640 | 0 | return 0; | 641 | 0 | } | 642 | | // skip trailing whitespace. | 643 | 0 | break; | 644 | 0 | } | 645 | | | 646 | | // Bail, if we encounter a digit that is not available in base. | 647 | 1.71k | if (digit >= base) { | 648 | 0 | break; | 649 | 0 | } | 650 | | | 651 | | // This is a tricky check to see if adding this digit will cause an overflow. | 652 | 1.71k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 653 | 98 | *result = PARSE_OVERFLOW; | 654 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 655 | 98 | } | 656 | 1.61k | val = val * base + digit; | 657 | 1.61k | } | 658 | 392 | *result = PARSE_SUCCESS; | 659 | 392 | return static_cast<T>(negative ? -val : val); | 660 | 490 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 604 | 441 | ParseResult* result) { | 605 | 441 | using UnsignedT = MakeUnsignedT<T>; | 606 | 441 | UnsignedT val = 0; | 607 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 608 | 441 | bool negative = false; | 609 | 441 | if (UNLIKELY(len <= 0)) { | 610 | 0 | *result = PARSE_FAILURE; | 611 | 0 | return 0; | 612 | 0 | } | 613 | 441 | int i = 0; | 614 | 441 | switch (*s) { | 615 | 147 | case '-': | 616 | 147 | negative = true; | 617 | 147 | max_val = StringParser::numeric_limits<T>(false) + 1; | 618 | 147 | [[fallthrough]]; | 619 | 245 | case '+': | 620 | 245 | i = 1; | 621 | 441 | } | 622 | | | 623 | 441 | const T max_div_base = max_val / base; | 624 | 441 | const T max_mod_base = max_val % base; | 625 | | | 626 | 441 | int first = i; | 627 | 3.03k | for (; i < len; ++i) { | 628 | 2.69k | T digit; | 629 | 2.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 630 | 2.69k | digit = s[i] - '0'; | 631 | 2.69k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 632 | 0 | digit = (s[i] - 'a' + 10); | 633 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 634 | 0 | digit = (s[i] - 'A' + 10); | 635 | 0 | } else { | 636 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 637 | | // Reject the string because either the first char was not an alpha/digit, | 638 | | // or the remaining chars are not all whitespace | 639 | 0 | *result = PARSE_FAILURE; | 640 | 0 | return 0; | 641 | 0 | } | 642 | | // skip trailing whitespace. | 643 | 0 | break; | 644 | 0 | } | 645 | | | 646 | | // Bail, if we encounter a digit that is not available in base. | 647 | 2.69k | if (digit >= base) { | 648 | 0 | break; | 649 | 0 | } | 650 | | | 651 | | // This is a tricky check to see if adding this digit will cause an overflow. | 652 | 2.69k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 653 | 98 | *result = PARSE_OVERFLOW; | 654 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 655 | 98 | } | 656 | 2.59k | val = val * base + digit; | 657 | 2.59k | } | 658 | 343 | *result = PARSE_SUCCESS; | 659 | 343 | return static_cast<T>(negative ? -val : val); | 660 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 604 | 441 | ParseResult* result) { | 605 | 441 | using UnsignedT = MakeUnsignedT<T>; | 606 | 441 | UnsignedT val = 0; | 607 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 608 | 441 | bool negative = false; | 609 | 441 | if (UNLIKELY(len <= 0)) { | 610 | 0 | *result = PARSE_FAILURE; | 611 | 0 | return 0; | 612 | 0 | } | 613 | 441 | int i = 0; | 614 | 441 | switch (*s) { | 615 | 196 | case '-': | 616 | 196 | negative = true; | 617 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 618 | 196 | [[fallthrough]]; | 619 | 245 | case '+': | 620 | 245 | i = 1; | 621 | 441 | } | 622 | | | 623 | 441 | const T max_div_base = max_val / base; | 624 | 441 | const T max_mod_base = max_val % base; | 625 | | | 626 | 441 | int first = i; | 627 | 5.09k | for (; i < len; ++i) { | 628 | 4.75k | T digit; | 629 | 4.75k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 630 | 4.75k | digit = s[i] - '0'; | 631 | 4.75k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 632 | 0 | digit = (s[i] - 'a' + 10); | 633 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 634 | 0 | digit = (s[i] - 'A' + 10); | 635 | 0 | } else { | 636 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 637 | | // Reject the string because either the first char was not an alpha/digit, | 638 | | // or the remaining chars are not all whitespace | 639 | 0 | *result = PARSE_FAILURE; | 640 | 0 | return 0; | 641 | 0 | } | 642 | | // skip trailing whitespace. | 643 | 0 | break; | 644 | 0 | } | 645 | | | 646 | | // Bail, if we encounter a digit that is not available in base. | 647 | 4.75k | if (digit >= base) { | 648 | 0 | break; | 649 | 0 | } | 650 | | | 651 | | // This is a tricky check to see if adding this digit will cause an overflow. | 652 | 4.75k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 653 | 98 | *result = PARSE_OVERFLOW; | 654 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 655 | 98 | } | 656 | 4.65k | val = val * base + digit; | 657 | 4.65k | } | 658 | 343 | *result = PARSE_SUCCESS; | 659 | 343 | return static_cast<T>(negative ? -val : val); | 660 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 604 | 1 | ParseResult* result) { | 605 | 1 | using UnsignedT = MakeUnsignedT<T>; | 606 | 1 | UnsignedT val = 0; | 607 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 608 | 1 | bool negative = false; | 609 | 1 | if (UNLIKELY(len <= 0)) { | 610 | 0 | *result = PARSE_FAILURE; | 611 | 0 | return 0; | 612 | 0 | } | 613 | 1 | int i = 0; | 614 | 1 | switch (*s) { | 615 | 0 | case '-': | 616 | 0 | negative = true; | 617 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 618 | 0 | [[fallthrough]]; | 619 | 0 | case '+': | 620 | 0 | i = 1; | 621 | 1 | } | 622 | | | 623 | 1 | const T max_div_base = max_val / base; | 624 | 1 | const T max_mod_base = max_val % base; | 625 | | | 626 | 1 | int first = i; | 627 | 3 | for (; i < len; ++i) { | 628 | 2 | T digit; | 629 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 630 | 0 | digit = s[i] - '0'; | 631 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 632 | 2 | digit = (s[i] - 'a' + 10); | 633 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 634 | 0 | digit = (s[i] - 'A' + 10); | 635 | 0 | } else { | 636 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 637 | | // Reject the string because either the first char was not an alpha/digit, | 638 | | // or the remaining chars are not all whitespace | 639 | 0 | *result = PARSE_FAILURE; | 640 | 0 | return 0; | 641 | 0 | } | 642 | | // skip trailing whitespace. | 643 | 0 | break; | 644 | 0 | } | 645 | | | 646 | | // Bail, if we encounter a digit that is not available in base. | 647 | 2 | if (digit >= base) { | 648 | 0 | break; | 649 | 0 | } | 650 | | | 651 | | // This is a tricky check to see if adding this digit will cause an overflow. | 652 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 653 | 0 | *result = PARSE_OVERFLOW; | 654 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 655 | 0 | } | 656 | 2 | val = val * base + digit; | 657 | 2 | } | 658 | 1 | *result = PARSE_SUCCESS; | 659 | 1 | return static_cast<T>(negative ? -val : val); | 660 | 1 | } |
|
661 | | |
662 | | template <typename T, bool enable_strict_mode> |
663 | 240k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
664 | 240k | T val = 0; |
665 | 240k | if (UNLIKELY(len == 0)) { |
666 | 0 | *result = PARSE_SUCCESS; |
667 | 0 | return val; |
668 | 0 | } |
669 | | // Factor out the first char for error handling speeds up the loop. |
670 | 240k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
671 | 234k | val = s[0] - '0'; |
672 | 234k | } else { |
673 | 6.33k | *result = PARSE_FAILURE; |
674 | 6.33k | return 0; |
675 | 6.33k | } |
676 | 372k | for (int i = 1; i < len; ++i) { |
677 | 141k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
678 | 137k | T digit = s[i] - '0'; |
679 | 137k | val = val * 10 + digit; |
680 | 137k | } else { |
681 | 3.79k | if constexpr (enable_strict_mode) { |
682 | 1.31k | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { |
683 | 1.17k | *result = PARSE_FAILURE; |
684 | 1.17k | return 0; |
685 | 1.17k | } |
686 | 2.47k | } else { |
687 | | // Save original position where non-digit was found |
688 | 2.47k | int remaining_len = len - i; |
689 | 2.47k | const char* remaining_s = s + i; |
690 | | // Skip trailing whitespaces from the remaining portion |
691 | 2.47k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); |
692 | 2.47k | if ((UNLIKELY(remaining_len != 0 && |
693 | 2.47k | !is_float_suffix(remaining_s, remaining_len)))) { |
694 | 477 | *result = PARSE_FAILURE; |
695 | 477 | return 0; |
696 | 477 | } |
697 | 2.47k | } |
698 | 2.14k | *result = PARSE_SUCCESS; |
699 | 3.79k | return val; |
700 | 3.79k | } |
701 | 141k | } |
702 | 230k | *result = PARSE_SUCCESS; |
703 | 230k | return val; |
704 | 234k | } _ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 41.9k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 41.9k | T val = 0; | 665 | 41.9k | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 41.9k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 41.0k | val = s[0] - '0'; | 672 | 41.0k | } else { | 673 | 914 | *result = PARSE_FAILURE; | 674 | 914 | return 0; | 675 | 914 | } | 676 | 57.7k | for (int i = 1; i < len; ++i) { | 677 | 17.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 16.6k | T digit = s[i] - '0'; | 679 | 16.6k | val = val * 10 + digit; | 680 | 16.6k | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 378 | } else { | 687 | | // Save original position where non-digit was found | 688 | 378 | int remaining_len = len - i; | 689 | 378 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 378 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 378 | if ((UNLIKELY(remaining_len != 0 && | 693 | 378 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 98 | *result = PARSE_FAILURE; | 695 | 98 | return 0; | 696 | 98 | } | 697 | 378 | } | 698 | 280 | *result = PARSE_SUCCESS; | 699 | 378 | return val; | 700 | 378 | } | 701 | 17.0k | } | 702 | 40.6k | *result = PARSE_SUCCESS; | 703 | 40.6k | return val; | 704 | 41.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 60.5k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 60.5k | T val = 0; | 665 | 60.5k | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 60.5k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 59.9k | val = s[0] - '0'; | 672 | 59.9k | } else { | 673 | 582 | *result = PARSE_FAILURE; | 674 | 582 | return 0; | 675 | 582 | } | 676 | 85.3k | for (int i = 1; i < len; ++i) { | 677 | 25.3k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 25.3k | T digit = s[i] - '0'; | 679 | 25.3k | val = val * 10 + digit; | 680 | 25.3k | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 2 | } else { | 687 | | // Save original position where non-digit was found | 688 | 2 | int remaining_len = len - i; | 689 | 2 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 2 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 2 | if ((UNLIKELY(remaining_len != 0 && | 693 | 2 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 2 | *result = PARSE_FAILURE; | 695 | 2 | return 0; | 696 | 2 | } | 697 | 2 | } | 698 | 0 | *result = PARSE_SUCCESS; | 699 | 2 | return val; | 700 | 2 | } | 701 | 25.3k | } | 702 | 59.9k | *result = PARSE_SUCCESS; | 703 | 59.9k | return val; | 704 | 59.9k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 51.0k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 51.0k | T val = 0; | 665 | 51.0k | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 51.0k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 50.0k | val = s[0] - '0'; | 672 | 50.0k | } else { | 673 | 918 | *result = PARSE_FAILURE; | 674 | 918 | return 0; | 675 | 918 | } | 676 | 72.9k | for (int i = 1; i < len; ++i) { | 677 | 23.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 22.8k | T digit = s[i] - '0'; | 679 | 22.8k | val = val * 10 + digit; | 680 | 22.8k | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 957 | } else { | 687 | | // Save original position where non-digit was found | 688 | 957 | int remaining_len = len - i; | 689 | 957 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 957 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 957 | if ((UNLIKELY(remaining_len != 0 && | 693 | 957 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 74 | *result = PARSE_FAILURE; | 695 | 74 | return 0; | 696 | 74 | } | 697 | 957 | } | 698 | 883 | *result = PARSE_SUCCESS; | 699 | 957 | return val; | 700 | 957 | } | 701 | 23.7k | } | 702 | 49.1k | *result = PARSE_SUCCESS; | 703 | 49.1k | return val; | 704 | 50.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 52.4k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 52.4k | T val = 0; | 665 | 52.4k | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 52.4k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 50.2k | val = s[0] - '0'; | 672 | 50.2k | } else { | 673 | 2.24k | *result = PARSE_FAILURE; | 674 | 2.24k | return 0; | 675 | 2.24k | } | 676 | 85.4k | for (int i = 1; i < len; ++i) { | 677 | 35.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 35.1k | T digit = s[i] - '0'; | 679 | 35.1k | val = val * 10 + digit; | 680 | 35.1k | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 527 | } else { | 687 | | // Save original position where non-digit was found | 688 | 527 | int remaining_len = len - i; | 689 | 527 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 527 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 527 | if ((UNLIKELY(remaining_len != 0 && | 693 | 527 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 179 | *result = PARSE_FAILURE; | 695 | 179 | return 0; | 696 | 179 | } | 697 | 527 | } | 698 | 348 | *result = PARSE_SUCCESS; | 699 | 527 | return val; | 700 | 527 | } | 701 | 35.7k | } | 702 | 49.7k | *result = PARSE_SUCCESS; | 703 | 49.7k | return val; | 704 | 50.2k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 32.5k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 32.5k | T val = 0; | 665 | 32.5k | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 32.5k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 31.5k | val = s[0] - '0'; | 672 | 31.5k | } else { | 673 | 1.07k | *result = PARSE_FAILURE; | 674 | 1.07k | return 0; | 675 | 1.07k | } | 676 | 66.1k | for (int i = 1; i < len; ++i) { | 677 | 35.2k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 34.6k | T digit = s[i] - '0'; | 679 | 34.6k | val = val * 10 + digit; | 680 | 34.6k | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 615 | } else { | 687 | | // Save original position where non-digit was found | 688 | 615 | int remaining_len = len - i; | 689 | 615 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 615 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 615 | if ((UNLIKELY(remaining_len != 0 && | 693 | 615 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 124 | *result = PARSE_FAILURE; | 695 | 124 | return 0; | 696 | 124 | } | 697 | 615 | } | 698 | 491 | *result = PARSE_SUCCESS; | 699 | 615 | return val; | 700 | 615 | } | 701 | 35.2k | } | 702 | 30.8k | *result = PARSE_SUCCESS; | 703 | 30.8k | return val; | 704 | 31.5k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 635 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 635 | T val = 0; | 665 | 635 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 635 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 550 | val = s[0] - '0'; | 672 | 550 | } else { | 673 | 85 | *result = PARSE_FAILURE; | 674 | 85 | return 0; | 675 | 85 | } | 676 | 1.32k | for (int i = 1; i < len; ++i) { | 677 | 1.09k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 770 | T digit = s[i] - '0'; | 679 | 770 | val = val * 10 + digit; | 680 | 770 | } else { | 681 | 320 | if constexpr (enable_strict_mode) { | 682 | 320 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | 292 | *result = PARSE_FAILURE; | 684 | 292 | return 0; | 685 | 292 | } | 686 | | } else { | 687 | | // Save original position where non-digit was found | 688 | | int remaining_len = len - i; | 689 | | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | | if ((UNLIKELY(remaining_len != 0 && | 693 | | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | | *result = PARSE_FAILURE; | 695 | | return 0; | 696 | | } | 697 | | } | 698 | 28 | *result = PARSE_SUCCESS; | 699 | 320 | return val; | 700 | 320 | } | 701 | 1.09k | } | 702 | 230 | *result = PARSE_SUCCESS; | 703 | 230 | return val; | 704 | 550 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 48 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 48 | T val = 0; | 665 | 48 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 48 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 22 | val = s[0] - '0'; | 672 | 26 | } else { | 673 | 26 | *result = PARSE_FAILURE; | 674 | 26 | return 0; | 675 | 26 | } | 676 | 22 | for (int i = 1; i < len; ++i) { | 677 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 0 | T digit = s[i] - '0'; | 679 | 0 | val = val * 10 + digit; | 680 | 2 | } else { | 681 | 2 | if constexpr (enable_strict_mode) { | 682 | 2 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | 2 | *result = PARSE_FAILURE; | 684 | 2 | return 0; | 685 | 2 | } | 686 | | } else { | 687 | | // Save original position where non-digit was found | 688 | | int remaining_len = len - i; | 689 | | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | | if ((UNLIKELY(remaining_len != 0 && | 693 | | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | | *result = PARSE_FAILURE; | 695 | | return 0; | 696 | | } | 697 | | } | 698 | 0 | *result = PARSE_SUCCESS; | 699 | 2 | return val; | 700 | 2 | } | 701 | 2 | } | 702 | 20 | *result = PARSE_SUCCESS; | 703 | 20 | return val; | 704 | 22 | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 168 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 168 | T val = 0; | 665 | 168 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 168 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 130 | val = s[0] - '0'; | 672 | 130 | } else { | 673 | 38 | *result = PARSE_FAILURE; | 674 | 38 | return 0; | 675 | 38 | } | 676 | 206 | for (int i = 1; i < len; ++i) { | 677 | 158 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 76 | T digit = s[i] - '0'; | 679 | 76 | val = val * 10 + digit; | 680 | 82 | } else { | 681 | 82 | if constexpr (enable_strict_mode) { | 682 | 82 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | 82 | *result = PARSE_FAILURE; | 684 | 82 | return 0; | 685 | 82 | } | 686 | | } else { | 687 | | // Save original position where non-digit was found | 688 | | int remaining_len = len - i; | 689 | | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | | if ((UNLIKELY(remaining_len != 0 && | 693 | | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | | *result = PARSE_FAILURE; | 695 | | return 0; | 696 | | } | 697 | | } | 698 | 0 | *result = PARSE_SUCCESS; | 699 | 82 | return val; | 700 | 82 | } | 701 | 158 | } | 702 | 48 | *result = PARSE_SUCCESS; | 703 | 48 | return val; | 704 | 130 | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 738 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 738 | T val = 0; | 665 | 738 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 738 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 521 | val = s[0] - '0'; | 672 | 521 | } else { | 673 | 217 | *result = PARSE_FAILURE; | 674 | 217 | return 0; | 675 | 217 | } | 676 | 1.52k | for (int i = 1; i < len; ++i) { | 677 | 1.45k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 1.00k | T digit = s[i] - '0'; | 679 | 1.00k | val = val * 10 + digit; | 680 | 1.00k | } else { | 681 | 456 | if constexpr (enable_strict_mode) { | 682 | 456 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | 400 | *result = PARSE_FAILURE; | 684 | 400 | return 0; | 685 | 400 | } | 686 | | } else { | 687 | | // Save original position where non-digit was found | 688 | | int remaining_len = len - i; | 689 | | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | | if ((UNLIKELY(remaining_len != 0 && | 693 | | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | | *result = PARSE_FAILURE; | 695 | | return 0; | 696 | | } | 697 | | } | 698 | 56 | *result = PARSE_SUCCESS; | 699 | 456 | return val; | 700 | 456 | } | 701 | 1.45k | } | 702 | 65 | *result = PARSE_SUCCESS; | 703 | 65 | return val; | 704 | 521 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 752 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 752 | T val = 0; | 665 | 752 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 752 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 512 | val = s[0] - '0'; | 672 | 512 | } else { | 673 | 240 | *result = PARSE_FAILURE; | 674 | 240 | return 0; | 675 | 240 | } | 676 | 1.49k | for (int i = 1; i < len; ++i) { | 677 | 1.44k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 986 | T digit = s[i] - '0'; | 679 | 986 | val = val * 10 + digit; | 680 | 986 | } else { | 681 | 456 | if constexpr (enable_strict_mode) { | 682 | 456 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | 400 | *result = PARSE_FAILURE; | 684 | 400 | return 0; | 685 | 400 | } | 686 | | } else { | 687 | | // Save original position where non-digit was found | 688 | | int remaining_len = len - i; | 689 | | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | | if ((UNLIKELY(remaining_len != 0 && | 693 | | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | | *result = PARSE_FAILURE; | 695 | | return 0; | 696 | | } | 697 | | } | 698 | 56 | *result = PARSE_SUCCESS; | 699 | 456 | return val; | 700 | 456 | } | 701 | 1.44k | } | 702 | 56 | *result = PARSE_SUCCESS; | 703 | 56 | return val; | 704 | 512 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 663 | 4 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 664 | 4 | T val = 0; | 665 | 4 | if (UNLIKELY(len == 0)) { | 666 | 0 | *result = PARSE_SUCCESS; | 667 | 0 | return val; | 668 | 0 | } | 669 | | // Factor out the first char for error handling speeds up the loop. | 670 | 4 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 671 | 4 | val = s[0] - '0'; | 672 | 4 | } else { | 673 | 0 | *result = PARSE_FAILURE; | 674 | 0 | return 0; | 675 | 0 | } | 676 | 4 | for (int i = 1; i < len; ++i) { | 677 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 678 | 0 | T digit = s[i] - '0'; | 679 | 0 | val = val * 10 + digit; | 680 | 0 | } else { | 681 | | if constexpr (enable_strict_mode) { | 682 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 683 | | *result = PARSE_FAILURE; | 684 | | return 0; | 685 | | } | 686 | 0 | } else { | 687 | | // Save original position where non-digit was found | 688 | 0 | int remaining_len = len - i; | 689 | 0 | const char* remaining_s = s + i; | 690 | | // Skip trailing whitespaces from the remaining portion | 691 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 692 | 0 | if ((UNLIKELY(remaining_len != 0 && | 693 | 0 | !is_float_suffix(remaining_s, remaining_len)))) { | 694 | 0 | *result = PARSE_FAILURE; | 695 | 0 | return 0; | 696 | 0 | } | 697 | 0 | } | 698 | 0 | *result = PARSE_SUCCESS; | 699 | 0 | return val; | 700 | 0 | } | 701 | 0 | } | 702 | 4 | *result = PARSE_SUCCESS; | 703 | 4 | return val; | 704 | 4 | } |
|
705 | | |
706 | | // at least the first char(if any) must be a digit. |
707 | | template <typename T> |
708 | | T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
709 | 136k | ParseResult* result) { |
710 | 136k | T val = 0; |
711 | 136k | if (max_len == 0) [[unlikely]] { |
712 | 135k | *result = PARSE_SUCCESS; |
713 | 135k | return val; |
714 | 135k | } |
715 | | // Factor out the first char for error handling speeds up the loop. |
716 | 1.09k | if (is_numeric_ascii(s[0])) [[likely]] { |
717 | 1.09k | val = s[0] - '0'; |
718 | 1.09k | } else { |
719 | 0 | *result = PARSE_FAILURE; |
720 | 0 | return 0; |
721 | 0 | } |
722 | 4.87k | for (int i = 1; i < max_len; ++i) { |
723 | 3.77k | if (is_numeric_ascii(s[i])) [[likely]] { |
724 | 3.77k | T digit = s[i] - '0'; |
725 | 3.77k | val = val * 10 + digit; |
726 | 3.77k | } else { |
727 | | // 123abc, return 123 |
728 | 0 | *result = PARSE_SUCCESS; |
729 | 0 | return val; |
730 | 0 | } |
731 | 3.77k | } |
732 | 1.09k | *result = PARSE_SUCCESS; |
733 | 1.09k | return val; |
734 | 1.09k | } |
735 | | |
736 | | template <typename T> |
737 | 152k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
738 | 152k | int i = 0; |
739 | | // skip leading spaces |
740 | 152k | for (; i < len; ++i) { |
741 | 152k | if (!is_whitespace_ascii(s[i])) { |
742 | 152k | break; |
743 | 152k | } |
744 | 152k | } |
745 | | |
746 | | // skip back spaces |
747 | 152k | int j = len - 1; |
748 | 152k | for (; j >= i; j--) { |
749 | 152k | if (!is_whitespace_ascii(s[j])) { |
750 | 152k | break; |
751 | 152k | } |
752 | 152k | } |
753 | | |
754 | | // skip leading '+', from_chars can handle '-' |
755 | 152k | if (i < len && s[i] == '+') { |
756 | 7.08k | i++; |
757 | | // ++ or +- are not valid, but the first + is already skipped, |
758 | | // if don't check here, from_chars will succeed. |
759 | | // |
760 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' |
761 | | // which may avoid this extra check here. |
762 | | // e.g.: |
763 | | // fast_float::chars_format format = |
764 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; |
765 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); |
766 | 7.08k | if (i < len && (s[i] == '+' || s[i] == '-')) { |
767 | 20 | *result = PARSE_FAILURE; |
768 | 20 | return 0; |
769 | 20 | } |
770 | 7.08k | } |
771 | 152k | if (UNLIKELY(i > j)) { |
772 | 32 | *result = PARSE_FAILURE; |
773 | 32 | return 0; |
774 | 32 | } |
775 | | |
776 | | // Use double here to not lose precision while accumulating the result |
777 | 152k | double val = 0; |
778 | 152k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
779 | | |
780 | 152k | if (res.ptr == s + j + 1) { |
781 | 148k | *result = PARSE_SUCCESS; |
782 | 148k | return val; |
783 | 148k | } else { |
784 | 4.61k | *result = PARSE_FAILURE; |
785 | 4.61k | } |
786 | 4.61k | return 0; |
787 | 152k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 737 | 87.6k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 738 | 87.6k | int i = 0; | 739 | | // skip leading spaces | 740 | 87.6k | for (; i < len; ++i) { | 741 | 87.6k | if (!is_whitespace_ascii(s[i])) { | 742 | 87.6k | break; | 743 | 87.6k | } | 744 | 87.6k | } | 745 | | | 746 | | // skip back spaces | 747 | 87.6k | int j = len - 1; | 748 | 87.6k | for (; j >= i; j--) { | 749 | 87.6k | if (!is_whitespace_ascii(s[j])) { | 750 | 87.6k | break; | 751 | 87.6k | } | 752 | 87.6k | } | 753 | | | 754 | | // skip leading '+', from_chars can handle '-' | 755 | 87.6k | if (i < len && s[i] == '+') { | 756 | 3.54k | i++; | 757 | | // ++ or +- are not valid, but the first + is already skipped, | 758 | | // if don't check here, from_chars will succeed. | 759 | | // | 760 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 761 | | // which may avoid this extra check here. | 762 | | // e.g.: | 763 | | // fast_float::chars_format format = | 764 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 765 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 766 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 767 | 10 | *result = PARSE_FAILURE; | 768 | 10 | return 0; | 769 | 10 | } | 770 | 3.54k | } | 771 | 87.6k | if (UNLIKELY(i > j)) { | 772 | 18 | *result = PARSE_FAILURE; | 773 | 18 | return 0; | 774 | 18 | } | 775 | | | 776 | | // Use double here to not lose precision while accumulating the result | 777 | 87.6k | double val = 0; | 778 | 87.6k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 779 | | | 780 | 87.6k | if (res.ptr == s + j + 1) { | 781 | 85.3k | *result = PARSE_SUCCESS; | 782 | 85.3k | return val; | 783 | 85.3k | } else { | 784 | 2.32k | *result = PARSE_FAILURE; | 785 | 2.32k | } | 786 | 2.32k | return 0; | 787 | 87.6k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 737 | 65.1k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 738 | 65.1k | int i = 0; | 739 | | // skip leading spaces | 740 | 65.1k | for (; i < len; ++i) { | 741 | 65.1k | if (!is_whitespace_ascii(s[i])) { | 742 | 65.1k | break; | 743 | 65.1k | } | 744 | 65.1k | } | 745 | | | 746 | | // skip back spaces | 747 | 65.1k | int j = len - 1; | 748 | 65.1k | for (; j >= i; j--) { | 749 | 65.1k | if (!is_whitespace_ascii(s[j])) { | 750 | 65.1k | break; | 751 | 65.1k | } | 752 | 65.1k | } | 753 | | | 754 | | // skip leading '+', from_chars can handle '-' | 755 | 65.1k | if (i < len && s[i] == '+') { | 756 | 3.54k | i++; | 757 | | // ++ or +- are not valid, but the first + is already skipped, | 758 | | // if don't check here, from_chars will succeed. | 759 | | // | 760 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 761 | | // which may avoid this extra check here. | 762 | | // e.g.: | 763 | | // fast_float::chars_format format = | 764 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 765 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 766 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 767 | 10 | *result = PARSE_FAILURE; | 768 | 10 | return 0; | 769 | 10 | } | 770 | 3.54k | } | 771 | 65.1k | if (UNLIKELY(i > j)) { | 772 | 14 | *result = PARSE_FAILURE; | 773 | 14 | return 0; | 774 | 14 | } | 775 | | | 776 | | // Use double here to not lose precision while accumulating the result | 777 | 65.1k | double val = 0; | 778 | 65.1k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 779 | | | 780 | 65.1k | if (res.ptr == s + j + 1) { | 781 | 62.8k | *result = PARSE_SUCCESS; | 782 | 62.8k | return val; | 783 | 62.8k | } else { | 784 | 2.28k | *result = PARSE_FAILURE; | 785 | 2.28k | } | 786 | 2.28k | return 0; | 787 | 65.1k | } |
|
788 | | |
789 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
790 | 11.3k | ParseResult* result) { |
791 | 11.3k | *result = PARSE_SUCCESS; |
792 | | |
793 | 11.3k | if (len == 1) { |
794 | 2.66k | if (s[0] == '1' || s[0] == 't' || s[0] == 'T') { |
795 | 333 | return true; |
796 | 333 | } |
797 | 2.32k | if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') { |
798 | 934 | return false; |
799 | 934 | } |
800 | 1.39k | *result = PARSE_FAILURE; |
801 | 1.39k | return false; |
802 | 2.32k | } |
803 | | |
804 | 8.71k | if (len == 2) { |
805 | 975 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { |
806 | 10 | return true; |
807 | 10 | } |
808 | 965 | if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) { |
809 | 9 | return false; |
810 | 9 | } |
811 | 965 | } |
812 | | |
813 | 8.69k | if (len == 3) { |
814 | 42 | if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') && |
815 | 42 | (s[2] == 's' || s[2] == 'S')) { |
816 | 10 | return true; |
817 | 10 | } |
818 | 32 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') && |
819 | 32 | (s[2] == 'f' || s[2] == 'F')) { |
820 | 9 | return false; |
821 | 9 | } |
822 | 32 | } |
823 | | |
824 | 8.67k | if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') && |
825 | 8.67k | (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) { |
826 | 3.38k | return true; |
827 | 3.38k | } |
828 | | |
829 | 5.29k | if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') && |
830 | 5.29k | (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') && |
831 | 5.29k | (s[4] == 'e' || s[4] == 'E')) { |
832 | 3.42k | return false; |
833 | 3.42k | } |
834 | | |
835 | | // No valid boolean value found |
836 | 1.87k | *result = PARSE_FAILURE; |
837 | 1.87k | return false; |
838 | 5.29k | } |
839 | | #include "common/compile_check_avoid_end.h" |
840 | | } // end namespace doris |