be/src/util/string_parser.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | #include <sys/types.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <cstdlib> |
30 | | // IWYU pragma: no_include <bits/std_abs.h> |
31 | | #include <cmath> // IWYU pragma: keep |
32 | | #include <cstdint> |
33 | | #include <limits> |
34 | | #include <map> |
35 | | #include <string> |
36 | | #include <type_traits> |
37 | | #include <utility> |
38 | | |
39 | | #include "common/compiler_util.h" // IWYU pragma: keep |
40 | | #include "common/status.h" |
41 | | #include "core/data_type/number_traits.h" |
42 | | #include "core/data_type/primitive_type.h" |
43 | | #include "core/extended_types.h" |
44 | | #include "core/value/large_int_value.h" |
45 | | #include "exec/common/int_exp.h" |
46 | | #include "exec/common/string_utils/string_utils.h" |
47 | | |
48 | | namespace doris { |
49 | | #include "common/compile_check_avoid_begin.h" |
50 | | template <DecimalNativeTypeConcept T> |
51 | | struct Decimal; |
52 | | |
53 | | // they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not. |
54 | | #ifndef SET_PARAMS_RET_FALSE_IFN |
55 | | #define SET_PARAMS_RET_FALSE_IFN(stmt, ...) \ |
56 | 7.87M | do { \ |
57 | 7.87M | if (!(stmt)) [[unlikely]] { \ |
58 | 72.2k | if constexpr (IsStrict) { \ |
59 | 246 | params.status = Status::InvalidArgument(__VA_ARGS__); \ |
60 | 246 | } \ |
61 | 72.2k | return false; \ |
62 | 72.2k | } \ |
63 | 7.87M | } while (false) |
64 | | #endif |
65 | | |
66 | | #ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION |
67 | | #define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \ |
68 | 314 | do { \ |
69 | 314 | try { \ |
70 | 314 | { stmt; } \ |
71 | 314 | } catch (const doris::Exception& e) { \ |
72 | 30 | if constexpr (IsStrict) { \ |
73 | 10 | params.status = e.to_status(); \ |
74 | 10 | } \ |
75 | 30 | return false; \ |
76 | 30 | } \ |
77 | 314 | } while (false) |
78 | | #endif |
79 | | |
80 | | // skip leading and trailing ascii whitespaces, |
81 | | // return the pointer to the first non-whitespace char, |
82 | | // and update the len to the new length, which does not include |
83 | | // leading and trailing whitespaces |
84 | | template <typename T> |
85 | 1.08M | inline const char* skip_ascii_whitespaces(const char* s, T& len) { |
86 | 2.02M | while (len > 0 && is_whitespace_ascii(*s)) { |
87 | 939k | ++s; |
88 | 939k | --len; |
89 | 939k | } |
90 | | |
91 | 2.02M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
92 | 931k | --len; |
93 | 931k | } |
94 | | |
95 | 1.08M | return s; |
96 | 1.08M | } _ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_ Line | Count | Source | 85 | 1.03M | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 86 | 1.81M | while (len > 0 && is_whitespace_ascii(*s)) { | 87 | 787k | ++s; | 88 | 787k | --len; | 89 | 787k | } | 90 | | | 91 | 1.81M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 92 | 780k | --len; | 93 | 780k | } | 94 | | | 95 | 1.03M | return s; | 96 | 1.03M | } |
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_ Line | Count | Source | 85 | 2.74k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 86 | 9.80k | while (len > 0 && is_whitespace_ascii(*s)) { | 87 | 7.05k | ++s; | 88 | 7.05k | --len; | 89 | 7.05k | } | 90 | | | 91 | 9.80k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 92 | 7.05k | --len; | 93 | 7.05k | } | 94 | | | 95 | 2.74k | return s; | 96 | 2.74k | } |
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_ Line | Count | Source | 85 | 55.6k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 86 | 200k | while (len > 0 && is_whitespace_ascii(*s)) { | 87 | 144k | ++s; | 88 | 144k | --len; | 89 | 144k | } | 90 | | | 91 | 199k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 92 | 144k | --len; | 93 | 144k | } | 94 | | | 95 | 55.6k | return s; | 96 | 55.6k | } |
|
97 | | |
98 | | template <typename T> |
99 | 107k | inline const char* skip_leading_whitespace(const char* __restrict s, T& len) { |
100 | 315k | while (len > 0 && is_whitespace_ascii(*s)) { |
101 | 207k | ++s; |
102 | 207k | --len; |
103 | 207k | } |
104 | | |
105 | 107k | return s; |
106 | 107k | } |
107 | | |
108 | | // skip trailing ascii whitespaces, |
109 | | // return the pointer to the first char, |
110 | | // and update the len to the new length, which does not include |
111 | | // trailing whitespaces |
112 | | template <typename T> |
113 | 88.6k | inline const char* skip_trailing_whitespaces(const char* s, T& len) { |
114 | 320k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
115 | 231k | --len; |
116 | 231k | } |
117 | | |
118 | 88.6k | return s; |
119 | 88.6k | } |
120 | | |
121 | | template <bool (*Pred)(char)> |
122 | 874k | bool range_suite(const char* s, const char* end) { |
123 | 874k | return std::ranges::all_of(s, end, Pred); |
124 | 874k | } _ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_ Line | Count | Source | 122 | 869k | bool range_suite(const char* s, const char* end) { | 123 | 869k | return std::ranges::all_of(s, end, Pred); | 124 | 869k | } |
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_ Line | Count | Source | 122 | 4.57k | bool range_suite(const char* s, const char* end) { | 123 | 4.57k | return std::ranges::all_of(s, end, Pred); | 124 | 4.57k | } |
|
125 | | |
126 | | inline auto is_digit_range = range_suite<is_numeric_ascii>; |
127 | | inline auto is_space_range = range_suite<is_whitespace_ascii>; |
128 | | |
129 | | // combine in_bound and range_suite is ok. won't lead to duplicated calculation. |
130 | 925k | inline bool in_bound(const char* s, const char* end, size_t offset) { |
131 | 925k | if (s + offset >= end) [[unlikely]] { |
132 | 6.24k | return false; |
133 | 6.24k | } |
134 | 919k | return true; |
135 | 925k | } |
136 | | |
137 | | // LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more. |
138 | | // if need result, use StringRef{origin_s, s} outside |
139 | | template <int LEN, bool (*Pred)(char)> |
140 | 3.01M | bool skip_qualified_char(const char*& s, const char* end) { |
141 | 3.01M | if constexpr (LEN == 0) { |
142 | | // Consume any length of characters that match the predicate. |
143 | 2.39M | while (s != end && Pred(*s)) { |
144 | 1.38M | ++s; |
145 | 1.38M | } |
146 | 1.99M | } else if constexpr (LEN > 0) { |
147 | | // Consume exactly LEN characters that match the predicate. |
148 | 3.97M | for (int i = 0; i < LEN; ++i, ++s) { |
149 | 1.99M | if (s == end || !Pred(*s)) [[unlikely]] { |
150 | 21.6k | return false; |
151 | 21.6k | } |
152 | 1.99M | } |
153 | 1.99M | } else { // LEN < 0 |
154 | | // Consume at least -LEN characters that match the predicate. |
155 | 108 | int count = 0; |
156 | 720 | while (s != end && Pred(*s)) { |
157 | 612 | ++s; |
158 | 612 | ++count; |
159 | 612 | } |
160 | 108 | if (count < -LEN) [[unlikely]] { |
161 | 0 | return false; |
162 | 0 | } |
163 | 108 | } |
164 | 1.97M | return true; |
165 | 3.01M | } _ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 140 | 397k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | 397k | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | 403k | while (s != end && Pred(*s)) { | 144 | 6.06k | ++s; | 145 | 6.06k | } | 146 | | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | | for (int i = 0; i < LEN; ++i, ++s) { | 149 | | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | | return false; | 151 | | } | 152 | | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 397k | return true; | 165 | 397k | } |
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_ Line | Count | Source | 140 | 616k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | 616k | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | 1.99M | while (s != end && Pred(*s)) { | 144 | 1.38M | ++s; | 145 | 1.38M | } | 146 | | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | | for (int i = 0; i < LEN; ++i, ++s) { | 149 | | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | | return false; | 151 | | } | 152 | | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 616k | return true; | 165 | 616k | } |
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 140 | 108 | bool skip_qualified_char(const char*& s, const char* end) { | 141 | | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | | while (s != end && Pred(*s)) { | 144 | | ++s; | 145 | | } | 146 | | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | | for (int i = 0; i < LEN; ++i, ++s) { | 149 | | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | | return false; | 151 | | } | 152 | | } | 153 | 108 | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | 108 | int count = 0; | 156 | 720 | while (s != end && Pred(*s)) { | 157 | 612 | ++s; | 158 | 612 | ++count; | 159 | 612 | } | 160 | 108 | if (count < -LEN) [[unlikely]] { | 161 | 0 | return false; | 162 | 0 | } | 163 | 108 | } | 164 | 108 | return true; | 165 | 108 | } |
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_ _ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_ Line | Count | Source | 140 | 70.6k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | | while (s != end && Pred(*s)) { | 144 | | ++s; | 145 | | } | 146 | 70.6k | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | 119k | for (int i = 0; i < LEN; ++i, ++s) { | 149 | 70.6k | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | 21.3k | return false; | 151 | 21.3k | } | 152 | 70.6k | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 49.2k | return true; | 165 | 70.6k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_ Line | Count | Source | 140 | 352k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | | while (s != end && Pred(*s)) { | 144 | | ++s; | 145 | | } | 146 | 352k | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | 704k | for (int i = 0; i < LEN; ++i, ++s) { | 149 | 352k | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | 96 | return false; | 151 | 96 | } | 152 | 352k | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 352k | return true; | 165 | 352k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_11is_date_sepEcEEEEbRPKcS2_ Line | Count | Source | 140 | 878k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | | while (s != end && Pred(*s)) { | 144 | | ++s; | 145 | | } | 146 | 878k | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | 1.75M | for (int i = 0; i < LEN; ++i, ++s) { | 149 | 878k | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | 84 | return false; | 151 | 84 | } | 152 | 878k | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 878k | return true; | 165 | 878k | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_ Line | Count | Source | 140 | 694k | bool skip_qualified_char(const char*& s, const char* end) { | 141 | | if constexpr (LEN == 0) { | 142 | | // Consume any length of characters that match the predicate. | 143 | | while (s != end && Pred(*s)) { | 144 | | ++s; | 145 | | } | 146 | 694k | } else if constexpr (LEN > 0) { | 147 | | // Consume exactly LEN characters that match the predicate. | 148 | 1.38M | for (int i = 0; i < LEN; ++i, ++s) { | 149 | 694k | if (s == end || !Pred(*s)) [[unlikely]] { | 150 | 48 | return false; | 151 | 48 | } | 152 | 694k | } | 153 | | } else { // LEN < 0 | 154 | | // Consume at least -LEN characters that match the predicate. | 155 | | int count = 0; | 156 | | while (s != end && Pred(*s)) { | 157 | | ++s; | 158 | | ++count; | 159 | | } | 160 | | if (count < -LEN) [[unlikely]] { | 161 | | return false; | 162 | | } | 163 | | } | 164 | 694k | return true; | 165 | 694k | } |
|
166 | | |
167 | | inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>; |
168 | | inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>; |
169 | | inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>; |
170 | | inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>; |
171 | | inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>; |
172 | | |
173 | 352k | inline bool is_delimiter(char c) { |
174 | 352k | return c == ' ' || c == 'T' || c == ':'; |
175 | 352k | } |
176 | | inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>; |
177 | | |
178 | 1.32M | inline bool is_date_sep(char c) { |
179 | 1.32M | return c == '-' || c == '/'; |
180 | 1.32M | } |
181 | | inline auto consume_one_date_sep = skip_qualified_char<1, is_date_sep>; |
182 | | |
183 | 694k | inline bool is_colon(char c) { |
184 | 694k | return c == ':'; |
185 | 694k | } |
186 | | inline auto consume_one_colon = skip_qualified_char<1, is_colon>; |
187 | | |
188 | | // only consume a string of digit, not include sign. |
189 | | // when has MAX_LEN > 0, do greedy match but at most MAX_LEN. |
190 | | // LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits. |
191 | | template <typename T, int LEN = 0, int MAX_LEN = -1> |
192 | 40 | bool consume_digit(const char*& s, const char* end, T& out) { |
193 | 40 | static_assert(LEN >= 0); |
194 | | if constexpr (MAX_LEN > 0) { |
195 | | out = 0; |
196 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { |
197 | | if (s == end || !is_numeric_ascii(*s)) { |
198 | | if (i < LEN) [[unlikely]] { |
199 | | return false; |
200 | | } |
201 | | break; // stop consuming if we have consumed enough digits. |
202 | | } |
203 | | out = out * 10 + (*s - '0'); |
204 | | } |
205 | | } else if constexpr (LEN == 0) { |
206 | | // Consume any length of digits. |
207 | | out = 0; |
208 | | while (s != end && is_numeric_ascii(*s)) { |
209 | | out = out * 10 + (*s - '0'); |
210 | | ++s; |
211 | | } |
212 | 40 | } else if constexpr (LEN > 0) { |
213 | | // Consume exactly LEN digits. |
214 | 40 | out = 0; |
215 | 170 | for (int i = 0; i < LEN; ++i, ++s) { |
216 | 130 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
217 | 0 | return false; |
218 | 0 | } |
219 | 130 | out = out * 10 + (*s - '0'); |
220 | 130 | } |
221 | 40 | } |
222 | 40 | return true; |
223 | 40 | } _ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_ Line | Count | Source | 192 | 30 | bool consume_digit(const char*& s, const char* end, T& out) { | 193 | 30 | static_assert(LEN >= 0); | 194 | | if constexpr (MAX_LEN > 0) { | 195 | | out = 0; | 196 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 197 | | if (s == end || !is_numeric_ascii(*s)) { | 198 | | if (i < LEN) [[unlikely]] { | 199 | | return false; | 200 | | } | 201 | | break; // stop consuming if we have consumed enough digits. | 202 | | } | 203 | | out = out * 10 + (*s - '0'); | 204 | | } | 205 | | } else if constexpr (LEN == 0) { | 206 | | // Consume any length of digits. | 207 | | out = 0; | 208 | | while (s != end && is_numeric_ascii(*s)) { | 209 | | out = out * 10 + (*s - '0'); | 210 | | ++s; | 211 | | } | 212 | 30 | } else if constexpr (LEN > 0) { | 213 | | // Consume exactly LEN digits. | 214 | 30 | out = 0; | 215 | 150 | for (int i = 0; i < LEN; ++i, ++s) { | 216 | 120 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 217 | 0 | return false; | 218 | 0 | } | 219 | 120 | out = out * 10 + (*s - '0'); | 220 | 120 | } | 221 | 30 | } | 222 | 30 | return true; | 223 | 30 | } |
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_ Line | Count | Source | 192 | 10 | bool consume_digit(const char*& s, const char* end, T& out) { | 193 | 10 | static_assert(LEN >= 0); | 194 | | if constexpr (MAX_LEN > 0) { | 195 | | out = 0; | 196 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 197 | | if (s == end || !is_numeric_ascii(*s)) { | 198 | | if (i < LEN) [[unlikely]] { | 199 | | return false; | 200 | | } | 201 | | break; // stop consuming if we have consumed enough digits. | 202 | | } | 203 | | out = out * 10 + (*s - '0'); | 204 | | } | 205 | | } else if constexpr (LEN == 0) { | 206 | | // Consume any length of digits. | 207 | | out = 0; | 208 | | while (s != end && is_numeric_ascii(*s)) { | 209 | | out = out * 10 + (*s - '0'); | 210 | | ++s; | 211 | | } | 212 | 10 | } else if constexpr (LEN > 0) { | 213 | | // Consume exactly LEN digits. | 214 | 10 | out = 0; | 215 | 20 | for (int i = 0; i < LEN; ++i, ++s) { | 216 | 10 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 217 | 0 | return false; | 218 | 0 | } | 219 | 10 | out = out * 10 + (*s - '0'); | 220 | 10 | } | 221 | 10 | } | 222 | 10 | return true; | 223 | 10 | } |
|
224 | | |
225 | | // specialized version for 2 digits, which is used very often in date/time parsing. |
226 | | template <> |
227 | 1.03M | inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) { |
228 | 1.03M | out = 0; |
229 | 1.03M | if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1))) |
230 | 36.4k | [[unlikely]] { |
231 | 36.4k | return false; |
232 | 36.4k | } |
233 | 1.00M | out = (s[0] - '0') * 10 + (s[1] - '0'); |
234 | 1.00M | s += 2; // consume 2 digits |
235 | 1.00M | return true; |
236 | 1.03M | } |
237 | | |
238 | | // specialized version for 1 or 2 digits, which is used very often in date/time parsing. |
239 | | template <> |
240 | 1.97M | inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) { |
241 | 1.97M | out = 0; |
242 | 1.97M | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
243 | 960 | return false; |
244 | 1.97M | } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) { |
245 | | // consume 2 digits |
246 | 1.94M | out = (*s - '0') * 10 + (*(s + 1) - '0'); |
247 | 1.94M | s += 2; |
248 | 1.94M | } else { |
249 | | // consume 1 digit |
250 | 33.3k | out = *s - '0'; |
251 | 33.3k | ++s; |
252 | 33.3k | } |
253 | 1.97M | return true; |
254 | 1.97M | } |
255 | | |
256 | | template <bool (*Pred)(char)> |
257 | 296 | uint32_t count_valid_length(const char* s, const char* end) { |
258 | 296 | DCHECK(s <= end) << "s: " << s << ", end: " << end; |
259 | 296 | uint32_t count = 0; |
260 | 898 | while (s != end && Pred(*s)) { |
261 | 602 | ++count; |
262 | 602 | ++s; |
263 | 602 | } |
264 | 296 | return count; |
265 | 296 | } |
266 | | |
267 | | inline auto count_digits = count_valid_length<is_numeric_ascii>; |
268 | | |
269 | 272 | inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { |
270 | 272 | std::string result(6, '0'); |
271 | 272 | result[0] = sign; |
272 | 272 | result[1] = '0' + (hour_offset / 10); |
273 | 272 | result[2] = '0' + (hour_offset % 10); |
274 | 272 | result[3] = ':'; |
275 | 272 | result[4] = '0' + (minute_offset / 10); |
276 | 272 | result[5] = '0' + (minute_offset % 10); |
277 | 272 | DCHECK_EQ(result.size(), 6); |
278 | 272 | return result; |
279 | 272 | } |
280 | | |
281 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
282 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
283 | | // |
284 | | // Strings with leading and trailing whitespaces are accepted. |
285 | | // Branching is heavily optimized for the non-whitespace successful case. |
286 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
287 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
288 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
289 | | // |
290 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
291 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
292 | | // and inf/-inf for float types. |
293 | | // |
294 | | // Things we tried that did not work: |
295 | | // - lookup table for converting character to digit |
296 | | // Improvements (TODO): |
297 | | // - Validate input using _simd_compare_ranges |
298 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
299 | | class StringParser { |
300 | | public: |
301 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
302 | | |
303 | | template <typename T> |
304 | 969k | static T numeric_limits(bool negative) { |
305 | 969k | if constexpr (std::is_same_v<T, __int128>) { |
306 | 96.7k | return negative ? MIN_INT128 : MAX_INT128; |
307 | 873k | } else { |
308 | 873k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
309 | 873k | } |
310 | 969k | } _ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 304 | 96.7k | static T numeric_limits(bool negative) { | 305 | 96.7k | if constexpr (std::is_same_v<T, __int128>) { | 306 | 96.7k | return negative ? MIN_INT128 : MAX_INT128; | 307 | | } else { | 308 | | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | | } | 310 | 96.7k | } |
_ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 304 | 331k | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 331k | } else { | 308 | 331k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 331k | } | 310 | 331k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 304 | 153k | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 153k | } else { | 308 | 153k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 153k | } | 310 | 153k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 304 | 211k | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 211k | } else { | 308 | 211k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 211k | } | 310 | 211k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 304 | 175k | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 175k | } else { | 308 | 175k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 175k | } | 310 | 175k | } |
_ZN5doris12StringParser14numeric_limitsIjEET_b Line | Count | Source | 304 | 294 | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 294 | } else { | 308 | 294 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 294 | } | 310 | 294 | } |
_ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 304 | 42 | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 42 | } else { | 308 | 42 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 42 | } | 310 | 42 | } |
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Line | Count | Source | 304 | 8 | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 8 | } else { | 308 | 8 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 8 | } | 310 | 8 | } |
_ZN5doris12StringParser14numeric_limitsIoEET_b Line | Count | Source | 304 | 8 | static T numeric_limits(bool negative) { | 305 | | if constexpr (std::is_same_v<T, __int128>) { | 306 | | return negative ? MIN_INT128 : MAX_INT128; | 307 | 8 | } else { | 308 | 8 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 309 | 8 | } | 310 | 8 | } |
|
311 | | |
312 | | template <typename T> |
313 | 1.74M | static T get_scale_multiplier(int scale) { |
314 | 1.74M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
315 | 1.74M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
316 | 1.74M | "You can only instantiate as int32_t, int64_t, __int128."); |
317 | 1.74M | if constexpr (std::is_same_v<T, int32_t>) { |
318 | 266k | return common::exp10_i32(scale); |
319 | 365k | } else if constexpr (std::is_same_v<T, int64_t>) { |
320 | 365k | return common::exp10_i64(scale); |
321 | 475k | } else if constexpr (std::is_same_v<T, __int128>) { |
322 | 475k | return common::exp10_i128(scale); |
323 | 640k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
324 | 640k | return common::exp10_i256(scale); |
325 | 640k | } |
326 | 1.74M | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 313 | 266k | static T get_scale_multiplier(int scale) { | 314 | 266k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 315 | 266k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 316 | 266k | "You can only instantiate as int32_t, int64_t, __int128."); | 317 | 266k | if constexpr (std::is_same_v<T, int32_t>) { | 318 | 266k | return common::exp10_i32(scale); | 319 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 320 | | return common::exp10_i64(scale); | 321 | | } else if constexpr (std::is_same_v<T, __int128>) { | 322 | | return common::exp10_i128(scale); | 323 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 324 | | return common::exp10_i256(scale); | 325 | | } | 326 | 266k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 313 | 365k | static T get_scale_multiplier(int scale) { | 314 | 365k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 315 | 365k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 316 | 365k | "You can only instantiate as int32_t, int64_t, __int128."); | 317 | | if constexpr (std::is_same_v<T, int32_t>) { | 318 | | return common::exp10_i32(scale); | 319 | 365k | } else if constexpr (std::is_same_v<T, int64_t>) { | 320 | 365k | return common::exp10_i64(scale); | 321 | | } else if constexpr (std::is_same_v<T, __int128>) { | 322 | | return common::exp10_i128(scale); | 323 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 324 | | return common::exp10_i256(scale); | 325 | | } | 326 | 365k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 313 | 475k | static T get_scale_multiplier(int scale) { | 314 | 475k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 315 | 475k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 316 | 475k | "You can only instantiate as int32_t, int64_t, __int128."); | 317 | | if constexpr (std::is_same_v<T, int32_t>) { | 318 | | return common::exp10_i32(scale); | 319 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 320 | | return common::exp10_i64(scale); | 321 | 475k | } else if constexpr (std::is_same_v<T, __int128>) { | 322 | 475k | return common::exp10_i128(scale); | 323 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 324 | | return common::exp10_i256(scale); | 325 | | } | 326 | 475k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 313 | 640k | static T get_scale_multiplier(int scale) { | 314 | 640k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 315 | 640k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 316 | 640k | "You can only instantiate as int32_t, int64_t, __int128."); | 317 | | if constexpr (std::is_same_v<T, int32_t>) { | 318 | | return common::exp10_i32(scale); | 319 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 320 | | return common::exp10_i64(scale); | 321 | | } else if constexpr (std::is_same_v<T, __int128>) { | 322 | | return common::exp10_i128(scale); | 323 | 640k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 324 | 640k | return common::exp10_i256(scale); | 325 | 640k | } | 326 | 640k | } |
|
327 | | |
328 | | // This is considerably faster than glibc's implementation (25x). |
329 | | // Assumes s represents a decimal number. |
330 | | template <typename T, bool enable_strict_mode = false> |
331 | 783k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
332 | 783k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); |
333 | 783k | if (LIKELY(*result == PARSE_SUCCESS)) { |
334 | 675k | return ans; |
335 | 675k | } |
336 | 107k | s = skip_leading_whitespace(s, len); |
337 | 107k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); |
338 | 783k | } _ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 90.5k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 90.5k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 90.5k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 87.9k | return ans; | 335 | 87.9k | } | 336 | 2.67k | s = skip_leading_whitespace(s, len); | 337 | 2.67k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 90.5k | } |
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 190k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 190k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 190k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 132k | return ans; | 335 | 132k | } | 336 | 58.4k | s = skip_leading_whitespace(s, len); | 337 | 58.4k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 190k | } |
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 2.00k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 2.00k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 2.00k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 176 | return ans; | 335 | 176 | } | 336 | 1.82k | s = skip_leading_whitespace(s, len); | 337 | 1.82k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 2.00k | } |
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 132k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 132k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 132k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 116k | return ans; | 335 | 116k | } | 336 | 16.2k | s = skip_leading_whitespace(s, len); | 337 | 16.2k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 132k | } |
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 1.96k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 1.96k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 1.96k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 176 | return ans; | 335 | 176 | } | 336 | 1.79k | s = skip_leading_whitespace(s, len); | 337 | 1.79k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 1.96k | } |
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 197k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 197k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 197k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 185k | return ans; | 335 | 185k | } | 336 | 12.6k | s = skip_leading_whitespace(s, len); | 337 | 12.6k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 197k | } |
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 1.93k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 1.93k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 1.93k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 176 | return ans; | 335 | 176 | } | 336 | 1.76k | s = skip_leading_whitespace(s, len); | 337 | 1.76k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 1.93k | } |
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 162k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 162k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 162k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 152k | return ans; | 335 | 152k | } | 336 | 9.00k | s = skip_leading_whitespace(s, len); | 337 | 9.00k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 162k | } |
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 1.92k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 1.92k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 1.92k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 188 | return ans; | 335 | 188 | } | 336 | 1.73k | s = skip_leading_whitespace(s, len); | 337 | 1.73k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 1.92k | } |
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 1.87k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 1.87k | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 1.87k | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 176 | return ans; | 335 | 176 | } | 336 | 1.69k | s = skip_leading_whitespace(s, len); | 337 | 1.69k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 1.87k | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 40 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 40 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 40 | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 40 | return ans; | 335 | 40 | } | 336 | 0 | s = skip_leading_whitespace(s, len); | 337 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 40 | } |
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 8 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 8 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 8 | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 8 | return ans; | 335 | 8 | } | 336 | 0 | s = skip_leading_whitespace(s, len); | 337 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 8 | } |
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 331 | 8 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 332 | 8 | T ans = string_to_int_internal<T, enable_strict_mode>(s, len, result); | 333 | 8 | if (LIKELY(*result == PARSE_SUCCESS)) { | 334 | 8 | return ans; | 335 | 8 | } | 336 | 0 | s = skip_leading_whitespace(s, len); | 337 | 0 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 338 | 8 | } |
|
339 | | |
340 | | // This is considerably faster than glibc's implementation. |
341 | | // In the case of overflow, the max/min value for the data type will be returned. |
342 | | // Assumes s represents a decimal number. |
343 | | template <typename T> |
344 | 2.74k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
345 | 2.74k | s = skip_ascii_whitespaces(s, len); |
346 | 2.74k | return string_to_unsigned_int_internal<T>(s, len, result); |
347 | 2.74k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 344 | 686 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 345 | 686 | s = skip_ascii_whitespaces(s, len); | 346 | 686 | return string_to_unsigned_int_internal<T>(s, len, result); | 347 | 686 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 344 | 686 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 345 | 686 | s = skip_ascii_whitespaces(s, len); | 346 | 686 | return string_to_unsigned_int_internal<T>(s, len, result); | 347 | 686 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 344 | 686 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 345 | 686 | s = skip_ascii_whitespaces(s, len); | 346 | 686 | return string_to_unsigned_int_internal<T>(s, len, result); | 347 | 686 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 344 | 686 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 345 | 686 | s = skip_ascii_whitespaces(s, len); | 346 | 686 | return string_to_unsigned_int_internal<T>(s, len, result); | 347 | 686 | } |
|
348 | | |
349 | | // Convert a string s representing a number in given base into a decimal number. |
350 | | template <typename T> |
351 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
352 | 55.6k | ParseResult* result) { |
353 | 55.6k | s = skip_ascii_whitespaces(s, len); |
354 | 55.6k | return string_to_int_internal<T>(s, len, base, result); |
355 | 55.6k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 352 | 52.9k | ParseResult* result) { | 353 | 52.9k | s = skip_ascii_whitespaces(s, len); | 354 | 52.9k | return string_to_int_internal<T>(s, len, base, result); | 355 | 52.9k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 352 | 980 | ParseResult* result) { | 353 | 980 | s = skip_ascii_whitespaces(s, len); | 354 | 980 | return string_to_int_internal<T>(s, len, base, result); | 355 | 980 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 352 | 882 | ParseResult* result) { | 353 | 882 | s = skip_ascii_whitespaces(s, len); | 354 | 882 | return string_to_int_internal<T>(s, len, base, result); | 355 | 882 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 352 | 882 | ParseResult* result) { | 353 | 882 | s = skip_ascii_whitespaces(s, len); | 354 | 882 | return string_to_int_internal<T>(s, len, base, result); | 355 | 882 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 352 | 2 | ParseResult* result) { | 353 | 2 | s = skip_ascii_whitespaces(s, len); | 354 | 2 | return string_to_int_internal<T>(s, len, base, result); | 355 | 2 | } |
|
356 | | |
357 | | template <typename T> |
358 | 305k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
359 | 305k | s = skip_ascii_whitespaces(s, len); |
360 | 305k | return string_to_float_internal<T>(s, len, result); |
361 | 305k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 358 | 175k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 359 | 175k | s = skip_ascii_whitespaces(s, len); | 360 | 175k | return string_to_float_internal<T>(s, len, result); | 361 | 175k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 358 | 130k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 359 | 130k | s = skip_ascii_whitespaces(s, len); | 360 | 130k | return string_to_float_internal<T>(s, len, result); | 361 | 130k | } |
|
362 | | |
363 | | // Parses a string for 'true' or 'false', case insensitive. |
364 | 22.7k | static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) { |
365 | 22.7k | s = skip_ascii_whitespaces(s, len); |
366 | 22.7k | return string_to_bool_internal(s, len, result); |
367 | 22.7k | } |
368 | | |
369 | | template <PrimitiveType P> |
370 | | static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal( |
371 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
372 | | ParseResult* result); |
373 | | |
374 | | template <typename T> |
375 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
376 | | const T key_value_separator, |
377 | | std::map<std::string, std::string>* result) { |
378 | | int key_pos = 0; |
379 | | int key_end; |
380 | | int val_pos; |
381 | | int val_end; |
382 | | |
383 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
384 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
385 | | std::string::npos) { |
386 | | break; |
387 | | } |
388 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
389 | | val_end = base.size(); |
390 | | } |
391 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
392 | | base.substr(val_pos, val_end - val_pos))); |
393 | | key_pos = val_end; |
394 | | if (key_pos != std::string::npos) { |
395 | | ++key_pos; |
396 | | } |
397 | | } |
398 | | |
399 | | return Status::OK(); |
400 | | } |
401 | | |
402 | | // This is considerably faster than glibc's implementation. |
403 | | // In the case of overflow, the max/min value for the data type will be returned. |
404 | | // Assumes s represents a decimal number. |
405 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
406 | | template <typename T, bool enable_strict_mode = false> |
407 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
408 | | |
409 | | // This is considerably faster than glibc's implementation. |
410 | | // In the case of overflow, the max/min value for the data type will be returned. |
411 | | // Assumes s represents a decimal number. |
412 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
413 | | template <typename T> |
414 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
415 | | ParseResult* result); |
416 | | |
417 | | // Convert a string s representing a number in given base into a decimal number. |
418 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
419 | | template <typename T> |
420 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
421 | | ParseResult* result); |
422 | | |
423 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
424 | | // and the number is positive. |
425 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
426 | | template <typename T, bool enable_strict_mode = false> |
427 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
428 | | ParseResult* result); |
429 | | |
430 | | // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next |
431 | | // char is not a digit. |
432 | | template <typename T> |
433 | | static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
434 | | ParseResult* result); |
435 | | |
436 | | // This is considerably faster than glibc's implementation (>100x why???) |
437 | | // No special case handling needs to be done for overflows, the floating point spec |
438 | | // already does it and will cap the values to -inf/inf |
439 | | // To avoid inaccurate conversions this function falls back to strtod for |
440 | | // scientific notation. |
441 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
442 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
443 | | template <typename T> |
444 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
445 | | ParseResult* result); |
446 | | |
447 | | // parses a string for 'true' or 'false', case insensitive |
448 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
449 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
450 | | ParseResult* result); |
451 | | |
452 | | // Returns true if s only contains whitespace. |
453 | 7.09k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
454 | 12.8k | for (int i = 0; i < len; ++i) { |
455 | 12.0k | if (!LIKELY(is_whitespace_ascii(s[i]))) { |
456 | 6.21k | return false; |
457 | 6.21k | } |
458 | 12.0k | } |
459 | 880 | return true; |
460 | 7.09k | } |
461 | | |
462 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
463 | 7.31k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
464 | 7.31k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
465 | 7.31k | } |
466 | | |
467 | 5.34k | static inline bool is_all_digit(const char* __restrict s, int len) { |
468 | 11.1k | for (int i = 0; i < len; ++i) { |
469 | 6.10k | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
470 | 302 | return false; |
471 | 302 | } |
472 | 6.10k | } |
473 | 5.04k | return true; |
474 | 5.34k | } |
475 | | }; // end of class StringParser |
476 | | |
477 | | template <typename T, bool enable_strict_mode> |
478 | 892k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
479 | 892k | if (UNLIKELY(len <= 0)) { |
480 | 4.61k | *result = PARSE_FAILURE; |
481 | 4.61k | return 0; |
482 | 4.61k | } |
483 | | |
484 | 887k | using UnsignedT = MakeUnsignedT<T>; |
485 | 887k | UnsignedT val = 0; |
486 | 887k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
487 | 887k | bool negative = false; |
488 | 887k | int i = 0; |
489 | 887k | switch (*s) { |
490 | 205k | case '-': |
491 | 205k | negative = true; |
492 | 205k | max_val += 1; |
493 | 205k | [[fallthrough]]; |
494 | 212k | case '+': |
495 | 212k | ++i; |
496 | | // only one '+'/'-' char, so could return failure directly |
497 | 212k | if (UNLIKELY(len == 1)) { |
498 | 18 | *result = PARSE_FAILURE; |
499 | 18 | return 0; |
500 | 18 | } |
501 | 887k | } |
502 | | |
503 | | // This is the fast path where the string cannot overflow. |
504 | 887k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { |
505 | 555k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); |
506 | 555k | return static_cast<T>(negative ? -val : val); |
507 | 555k | } |
508 | | |
509 | 332k | const T max_div_10 = max_val / 10; |
510 | 332k | const T max_mod_10 = max_val % 10; |
511 | | |
512 | 332k | int first = i; |
513 | 3.37M | for (; i < len; ++i) { |
514 | 3.23M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
515 | 3.14M | T digit = s[i] - '0'; |
516 | | // This is a tricky check to see if adding this digit will cause an overflow. |
517 | 3.14M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
518 | 96.8k | *result = PARSE_OVERFLOW; |
519 | 96.8k | return negative ? -max_val : max_val; |
520 | 96.8k | } |
521 | 3.04M | val = val * 10 + digit; |
522 | 3.04M | } else { |
523 | 91.7k | if constexpr (enable_strict_mode) { |
524 | 8.16k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
525 | | // Reject the string because the remaining chars are not all whitespace |
526 | 7.56k | *result = PARSE_FAILURE; |
527 | 7.56k | return 0; |
528 | 7.56k | } |
529 | 83.6k | } else { |
530 | | // Save original position where non-digit was found |
531 | 83.6k | int remaining_len = len - i; |
532 | 83.6k | const char* remaining_s = s + i; |
533 | | // Skip trailing whitespaces from the remaining portion |
534 | 83.6k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); |
535 | 83.6k | if ((UNLIKELY(i == first || (remaining_len != 0 && |
536 | 83.6k | !is_float_suffix(remaining_s, remaining_len))))) { |
537 | | // Reject the string because either the first char was not a digit, |
538 | | // or the remaining chars are not all whitespace |
539 | 57.8k | *result = PARSE_FAILURE; |
540 | 57.8k | return 0; |
541 | 57.8k | } |
542 | 83.6k | } |
543 | | // Returning here is slightly faster than breaking the loop. |
544 | 26.3k | *result = PARSE_SUCCESS; |
545 | 91.7k | return static_cast<T>(negative ? -val : val); |
546 | 91.7k | } |
547 | 3.23M | } |
548 | 143k | *result = PARSE_SUCCESS; |
549 | 143k | return static_cast<T>(negative ? -val : val); |
550 | 332k | } _ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 93.2k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 93.2k | if (UNLIKELY(len <= 0)) { | 480 | 88 | *result = PARSE_FAILURE; | 481 | 88 | return 0; | 482 | 88 | } | 483 | | | 484 | 93.1k | using UnsignedT = MakeUnsignedT<T>; | 485 | 93.1k | UnsignedT val = 0; | 486 | 93.1k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 93.1k | bool negative = false; | 488 | 93.1k | int i = 0; | 489 | 93.1k | switch (*s) { | 490 | 7.09k | case '-': | 491 | 7.09k | negative = true; | 492 | 7.09k | max_val += 1; | 493 | 7.09k | [[fallthrough]]; | 494 | 7.65k | case '+': | 495 | 7.65k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 7.65k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 93.1k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 93.1k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 83.8k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 83.8k | return static_cast<T>(negative ? -val : val); | 507 | 83.8k | } | 508 | | | 509 | 9.30k | const T max_div_10 = max_val / 10; | 510 | 9.30k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 9.30k | int first = i; | 513 | 345k | for (; i < len; ++i) { | 514 | 338k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 337k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 337k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.02k | *result = PARSE_OVERFLOW; | 519 | 1.02k | return negative ? -max_val : max_val; | 520 | 1.02k | } | 521 | 336k | val = val * 10 + digit; | 522 | 336k | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 1.07k | } else { | 530 | | // Save original position where non-digit was found | 531 | 1.07k | int remaining_len = len - i; | 532 | 1.07k | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 1.07k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 1.07k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 1.07k | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 752 | *result = PARSE_FAILURE; | 540 | 752 | return 0; | 541 | 752 | } | 542 | 1.07k | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 320 | *result = PARSE_SUCCESS; | 545 | 1.07k | return static_cast<T>(negative ? -val : val); | 546 | 1.07k | } | 547 | 338k | } | 548 | 7.21k | *result = PARSE_SUCCESS; | 549 | 7.21k | return static_cast<T>(negative ? -val : val); | 550 | 9.30k | } |
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 249k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 249k | if (UNLIKELY(len <= 0)) { | 480 | 436 | *result = PARSE_FAILURE; | 481 | 436 | return 0; | 482 | 436 | } | 483 | | | 484 | 248k | using UnsignedT = MakeUnsignedT<T>; | 485 | 248k | UnsignedT val = 0; | 486 | 248k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 248k | bool negative = false; | 488 | 248k | int i = 0; | 489 | 248k | switch (*s) { | 490 | 44.9k | case '-': | 491 | 44.9k | negative = true; | 492 | 44.9k | max_val += 1; | 493 | 44.9k | [[fallthrough]]; | 494 | 45.8k | case '+': | 495 | 45.8k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 45.8k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 248k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 248k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 120k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 120k | return static_cast<T>(negative ? -val : val); | 507 | 120k | } | 508 | | | 509 | 128k | const T max_div_10 = max_val / 10; | 510 | 128k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 128k | int first = i; | 513 | 308k | for (; i < len; ++i) { | 514 | 295k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 223k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 223k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 42.4k | *result = PARSE_OVERFLOW; | 519 | 42.4k | return negative ? -max_val : max_val; | 520 | 42.4k | } | 521 | 180k | val = val * 10 + digit; | 522 | 180k | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 71.9k | } else { | 530 | | // Save original position where non-digit was found | 531 | 71.9k | int remaining_len = len - i; | 532 | 71.9k | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 71.9k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 71.9k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 71.9k | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 49.0k | *result = PARSE_FAILURE; | 540 | 49.0k | return 0; | 541 | 49.0k | } | 542 | 71.9k | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 22.9k | *result = PARSE_SUCCESS; | 545 | 71.9k | return static_cast<T>(negative ? -val : val); | 546 | 71.9k | } | 547 | 295k | } | 548 | 13.6k | *result = PARSE_SUCCESS; | 549 | 13.6k | return static_cast<T>(negative ? -val : val); | 550 | 128k | } |
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 3.82k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 3.82k | if (UNLIKELY(len <= 0)) { | 480 | 16 | *result = PARSE_FAILURE; | 481 | 16 | return 0; | 482 | 16 | } | 483 | | | 484 | 3.80k | using UnsignedT = MakeUnsignedT<T>; | 485 | 3.80k | UnsignedT val = 0; | 486 | 3.80k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 3.80k | bool negative = false; | 488 | 3.80k | int i = 0; | 489 | 3.80k | switch (*s) { | 490 | 1.26k | case '-': | 491 | 1.26k | negative = true; | 492 | 1.26k | max_val += 1; | 493 | 1.26k | [[fallthrough]]; | 494 | 1.97k | case '+': | 495 | 1.97k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 1.97k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 3.80k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 3.80k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 96 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 96 | return static_cast<T>(negative ? -val : val); | 507 | 96 | } | 508 | | | 509 | 3.71k | const T max_div_10 = max_val / 10; | 510 | 3.71k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 3.71k | int first = i; | 513 | 13.1k | for (; i < len; ++i) { | 514 | 13.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 10.6k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 10.6k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.20k | *result = PARSE_OVERFLOW; | 519 | 1.20k | return negative ? -max_val : max_val; | 520 | 1.20k | } | 521 | 9.45k | val = val * 10 + digit; | 522 | 9.45k | } else { | 523 | 2.37k | if constexpr (enable_strict_mode) { | 524 | 2.37k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 2.20k | *result = PARSE_FAILURE; | 527 | 2.20k | return 0; | 528 | 2.20k | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 176 | *result = PARSE_SUCCESS; | 545 | 2.37k | return static_cast<T>(negative ? -val : val); | 546 | 2.37k | } | 547 | 13.0k | } | 548 | 136 | *result = PARSE_SUCCESS; | 549 | 136 | return static_cast<T>(negative ? -val : val); | 550 | 3.71k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 148k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 148k | if (UNLIKELY(len <= 0)) { | 480 | 16 | *result = PARSE_FAILURE; | 481 | 16 | return 0; | 482 | 16 | } | 483 | | | 484 | 148k | using UnsignedT = MakeUnsignedT<T>; | 485 | 148k | UnsignedT val = 0; | 486 | 148k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 148k | bool negative = false; | 488 | 148k | int i = 0; | 489 | 148k | switch (*s) { | 490 | 25.6k | case '-': | 491 | 25.6k | negative = true; | 492 | 25.6k | max_val += 1; | 493 | 25.6k | [[fallthrough]]; | 494 | 26.3k | case '+': | 495 | 26.3k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 26.3k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 148k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 148k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 101k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 101k | return static_cast<T>(negative ? -val : val); | 507 | 101k | } | 508 | | | 509 | 47.2k | const T max_div_10 = max_val / 10; | 510 | 47.2k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 47.2k | int first = i; | 513 | 246k | for (; i < len; ++i) { | 514 | 228k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 224k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 224k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 25.8k | *result = PARSE_OVERFLOW; | 519 | 25.8k | return negative ? -max_val : max_val; | 520 | 25.8k | } | 521 | 199k | val = val * 10 + digit; | 522 | 199k | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 3.81k | } else { | 530 | | // Save original position where non-digit was found | 531 | 3.81k | int remaining_len = len - i; | 532 | 3.81k | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 3.81k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 3.81k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 3.81k | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 2.59k | *result = PARSE_FAILURE; | 540 | 2.59k | return 0; | 541 | 2.59k | } | 542 | 3.81k | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 1.22k | *result = PARSE_SUCCESS; | 545 | 3.81k | return static_cast<T>(negative ? -val : val); | 546 | 3.81k | } | 547 | 228k | } | 548 | 17.6k | *result = PARSE_SUCCESS; | 549 | 17.6k | return static_cast<T>(negative ? -val : val); | 550 | 47.2k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 3.76k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 3.76k | if (UNLIKELY(len <= 0)) { | 480 | 16 | *result = PARSE_FAILURE; | 481 | 16 | return 0; | 482 | 16 | } | 483 | | | 484 | 3.74k | using UnsignedT = MakeUnsignedT<T>; | 485 | 3.74k | UnsignedT val = 0; | 486 | 3.74k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 3.74k | bool negative = false; | 488 | 3.74k | int i = 0; | 489 | 3.74k | switch (*s) { | 490 | 1.24k | case '-': | 491 | 1.24k | negative = true; | 492 | 1.24k | max_val += 1; | 493 | 1.24k | [[fallthrough]]; | 494 | 1.94k | case '+': | 495 | 1.94k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 1.94k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 3.74k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 3.74k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 336 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 336 | return static_cast<T>(negative ? -val : val); | 507 | 336 | } | 508 | | | 509 | 3.40k | const T max_div_10 = max_val / 10; | 510 | 3.40k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 3.40k | int first = i; | 513 | 15.7k | for (; i < len; ++i) { | 514 | 15.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 13.4k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 13.4k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.15k | *result = PARSE_OVERFLOW; | 519 | 1.15k | return negative ? -max_val : max_val; | 520 | 1.15k | } | 521 | 12.3k | val = val * 10 + digit; | 522 | 12.3k | } else { | 523 | 2.17k | if constexpr (enable_strict_mode) { | 524 | 2.17k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 2.00k | *result = PARSE_FAILURE; | 527 | 2.00k | return 0; | 528 | 2.00k | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 176 | *result = PARSE_SUCCESS; | 545 | 2.17k | return static_cast<T>(negative ? -val : val); | 546 | 2.17k | } | 547 | 15.6k | } | 548 | 80 | *result = PARSE_SUCCESS; | 549 | 80 | return static_cast<T>(negative ? -val : val); | 550 | 3.40k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 210k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 210k | if (UNLIKELY(len <= 0)) { | 480 | 3.95k | *result = PARSE_FAILURE; | 481 | 3.95k | return 0; | 482 | 3.95k | } | 483 | | | 484 | 206k | using UnsignedT = MakeUnsignedT<T>; | 485 | 206k | UnsignedT val = 0; | 486 | 206k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 206k | bool negative = false; | 488 | 206k | int i = 0; | 489 | 206k | switch (*s) { | 490 | 21.3k | case '-': | 491 | 21.3k | negative = true; | 492 | 21.3k | max_val += 1; | 493 | 21.3k | [[fallthrough]]; | 494 | 22.1k | case '+': | 495 | 22.1k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 22.1k | if (UNLIKELY(len == 1)) { | 498 | 18 | *result = PARSE_FAILURE; | 499 | 18 | return 0; | 500 | 18 | } | 501 | 206k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 206k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 179k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 179k | return static_cast<T>(negative ? -val : val); | 507 | 179k | } | 508 | | | 509 | 27.0k | const T max_div_10 = max_val / 10; | 510 | 27.0k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 27.0k | int first = i; | 513 | 257k | for (; i < len; ++i) { | 514 | 245k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 241k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 241k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 11.5k | *result = PARSE_OVERFLOW; | 519 | 11.5k | return negative ? -max_val : max_val; | 520 | 11.5k | } | 521 | 230k | val = val * 10 + digit; | 522 | 230k | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 3.76k | } else { | 530 | | // Save original position where non-digit was found | 531 | 3.76k | int remaining_len = len - i; | 532 | 3.76k | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 3.76k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 3.76k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 3.76k | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 2.99k | *result = PARSE_FAILURE; | 540 | 2.99k | return 0; | 541 | 2.99k | } | 542 | 3.76k | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 772 | *result = PARSE_SUCCESS; | 545 | 3.76k | return static_cast<T>(negative ? -val : val); | 546 | 3.76k | } | 547 | 245k | } | 548 | 11.6k | *result = PARSE_SUCCESS; | 549 | 11.6k | return static_cast<T>(negative ? -val : val); | 550 | 27.0k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 3.69k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 3.69k | if (UNLIKELY(len <= 0)) { | 480 | 16 | *result = PARSE_FAILURE; | 481 | 16 | return 0; | 482 | 16 | } | 483 | | | 484 | 3.68k | using UnsignedT = MakeUnsignedT<T>; | 485 | 3.68k | UnsignedT val = 0; | 486 | 3.68k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 3.68k | bool negative = false; | 488 | 3.68k | int i = 0; | 489 | 3.68k | switch (*s) { | 490 | 1.21k | case '-': | 491 | 1.21k | negative = true; | 492 | 1.21k | max_val += 1; | 493 | 1.21k | [[fallthrough]]; | 494 | 1.90k | case '+': | 495 | 1.90k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 1.90k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 3.68k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 3.68k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 922 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 922 | return static_cast<T>(negative ? -val : val); | 507 | 922 | } | 508 | | | 509 | 2.75k | const T max_div_10 = max_val / 10; | 510 | 2.75k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 2.75k | int first = i; | 513 | 21.4k | for (; i < len; ++i) { | 514 | 21.3k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 19.8k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 19.8k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.10k | *result = PARSE_OVERFLOW; | 519 | 1.10k | return negative ? -max_val : max_val; | 520 | 1.10k | } | 521 | 18.6k | val = val * 10 + digit; | 522 | 18.6k | } else { | 523 | 1.59k | if constexpr (enable_strict_mode) { | 524 | 1.59k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 1.47k | *result = PARSE_FAILURE; | 527 | 1.47k | return 0; | 528 | 1.47k | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 120 | *result = PARSE_SUCCESS; | 545 | 1.59k | return static_cast<T>(negative ? -val : val); | 546 | 1.59k | } | 547 | 21.3k | } | 548 | 64 | *result = PARSE_SUCCESS; | 549 | 64 | return static_cast<T>(negative ? -val : val); | 550 | 2.75k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 171k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 171k | if (UNLIKELY(len <= 0)) { | 480 | 28 | *result = PARSE_FAILURE; | 481 | 28 | return 0; | 482 | 28 | } | 483 | | | 484 | 170k | using UnsignedT = MakeUnsignedT<T>; | 485 | 170k | UnsignedT val = 0; | 486 | 170k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 170k | bool negative = false; | 488 | 170k | int i = 0; | 489 | 170k | switch (*s) { | 490 | 100k | case '-': | 491 | 100k | negative = true; | 492 | 100k | max_val += 1; | 493 | 100k | [[fallthrough]]; | 494 | 101k | case '+': | 495 | 101k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 101k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 170k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 170k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 64.6k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 64.6k | return static_cast<T>(negative ? -val : val); | 507 | 64.6k | } | 508 | | | 509 | 106k | const T max_div_10 = max_val / 10; | 510 | 106k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 106k | int first = i; | 513 | 2.07M | for (; i < len; ++i) { | 514 | 1.97M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 1.97M | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 1.97M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 10.4k | *result = PARSE_OVERFLOW; | 519 | 10.4k | return negative ? -max_val : max_val; | 520 | 10.4k | } | 521 | 1.96M | val = val * 10 + digit; | 522 | 1.96M | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 2.98k | } else { | 530 | | // Save original position where non-digit was found | 531 | 2.98k | int remaining_len = len - i; | 532 | 2.98k | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 2.98k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 2.98k | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 2.98k | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 2.49k | *result = PARSE_FAILURE; | 540 | 2.49k | return 0; | 541 | 2.49k | } | 542 | 2.98k | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 488 | *result = PARSE_SUCCESS; | 545 | 2.98k | return static_cast<T>(negative ? -val : val); | 546 | 2.98k | } | 547 | 1.97M | } | 548 | 92.9k | *result = PARSE_SUCCESS; | 549 | 92.9k | return static_cast<T>(negative ? -val : val); | 550 | 106k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 3.65k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 3.65k | if (UNLIKELY(len <= 0)) { | 480 | 20 | *result = PARSE_FAILURE; | 481 | 20 | return 0; | 482 | 20 | } | 483 | | | 484 | 3.63k | using UnsignedT = MakeUnsignedT<T>; | 485 | 3.63k | UnsignedT val = 0; | 486 | 3.63k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 3.63k | bool negative = false; | 488 | 3.63k | int i = 0; | 489 | 3.63k | switch (*s) { | 490 | 1.19k | case '-': | 491 | 1.19k | negative = true; | 492 | 1.19k | max_val += 1; | 493 | 1.19k | [[fallthrough]]; | 494 | 1.86k | case '+': | 495 | 1.86k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 1.86k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 3.63k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 3.63k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 1.47k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 1.47k | return static_cast<T>(negative ? -val : val); | 507 | 1.47k | } | 508 | | | 509 | 2.16k | const T max_div_10 = max_val / 10; | 510 | 2.16k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 2.16k | int first = i; | 513 | 33.8k | for (; i < len; ++i) { | 514 | 33.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 32.7k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 32.7k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.05k | *result = PARSE_OVERFLOW; | 519 | 1.05k | return negative ? -max_val : max_val; | 520 | 1.05k | } | 521 | 31.6k | val = val * 10 + digit; | 522 | 31.6k | } else { | 523 | 1.04k | if constexpr (enable_strict_mode) { | 524 | 1.04k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 982 | *result = PARSE_FAILURE; | 527 | 982 | return 0; | 528 | 982 | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 64 | *result = PARSE_SUCCESS; | 545 | 1.04k | return static_cast<T>(negative ? -val : val); | 546 | 1.04k | } | 547 | 33.7k | } | 548 | 64 | *result = PARSE_SUCCESS; | 549 | 64 | return static_cast<T>(negative ? -val : val); | 550 | 2.16k | } |
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 3.56k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 3.56k | if (UNLIKELY(len <= 0)) { | 480 | 16 | *result = PARSE_FAILURE; | 481 | 16 | return 0; | 482 | 16 | } | 483 | | | 484 | 3.55k | using UnsignedT = MakeUnsignedT<T>; | 485 | 3.55k | UnsignedT val = 0; | 486 | 3.55k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 3.55k | bool negative = false; | 488 | 3.55k | int i = 0; | 489 | 3.55k | switch (*s) { | 490 | 1.16k | case '-': | 491 | 1.16k | negative = true; | 492 | 1.16k | max_val += 1; | 493 | 1.16k | [[fallthrough]]; | 494 | 1.83k | case '+': | 495 | 1.83k | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 1.83k | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 3.55k | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 3.55k | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 1.50k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 1.50k | return static_cast<T>(negative ? -val : val); | 507 | 1.50k | } | 508 | | | 509 | 2.04k | const T max_div_10 = max_val / 10; | 510 | 2.04k | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 2.04k | int first = i; | 513 | 62.6k | for (; i < len; ++i) { | 514 | 62.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 61.5k | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 61.5k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 1.00k | *result = PARSE_OVERFLOW; | 519 | 1.00k | return negative ? -max_val : max_val; | 520 | 1.00k | } | 521 | 60.5k | val = val * 10 + digit; | 522 | 60.5k | } else { | 523 | 976 | if constexpr (enable_strict_mode) { | 524 | 976 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 912 | *result = PARSE_FAILURE; | 527 | 912 | return 0; | 528 | 912 | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 64 | *result = PARSE_SUCCESS; | 545 | 976 | return static_cast<T>(negative ? -val : val); | 546 | 976 | } | 547 | 62.5k | } | 548 | 64 | *result = PARSE_SUCCESS; | 549 | 64 | return static_cast<T>(negative ? -val : val); | 550 | 2.04k | } |
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 298 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 298 | if (UNLIKELY(len <= 0)) { | 480 | 4 | *result = PARSE_FAILURE; | 481 | 4 | return 0; | 482 | 4 | } | 483 | | | 484 | 294 | using UnsignedT = MakeUnsignedT<T>; | 485 | 294 | UnsignedT val = 0; | 486 | 294 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 294 | bool negative = false; | 488 | 294 | int i = 0; | 489 | 294 | switch (*s) { | 490 | 0 | case '-': | 491 | 0 | negative = true; | 492 | 0 | max_val += 1; | 493 | 0 | [[fallthrough]]; | 494 | 0 | case '+': | 495 | 0 | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 0 | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 294 | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 294 | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 294 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 294 | return static_cast<T>(negative ? -val : val); | 507 | 294 | } | 508 | | | 509 | 0 | const T max_div_10 = max_val / 10; | 510 | 0 | const T max_mod_10 = max_val % 10; | 511 | |
| 512 | 0 | int first = i; | 513 | 0 | for (; i < len; ++i) { | 514 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 0 | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 0 | *result = PARSE_OVERFLOW; | 519 | 0 | return negative ? -max_val : max_val; | 520 | 0 | } | 521 | 0 | val = val * 10 + digit; | 522 | 0 | } else { | 523 | 0 | if constexpr (enable_strict_mode) { | 524 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | 0 | *result = PARSE_FAILURE; | 527 | 0 | return 0; | 528 | 0 | } | 529 | | } else { | 530 | | // Save original position where non-digit was found | 531 | | int remaining_len = len - i; | 532 | | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | | *result = PARSE_FAILURE; | 540 | | return 0; | 541 | | } | 542 | | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 0 | *result = PARSE_SUCCESS; | 545 | 0 | return static_cast<T>(negative ? -val : val); | 546 | 0 | } | 547 | 0 | } | 548 | 0 | *result = PARSE_SUCCESS; | 549 | 0 | return static_cast<T>(negative ? -val : val); | 550 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 40 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 40 | if (UNLIKELY(len <= 0)) { | 480 | 0 | *result = PARSE_FAILURE; | 481 | 0 | return 0; | 482 | 0 | } | 483 | | | 484 | 40 | using UnsignedT = MakeUnsignedT<T>; | 485 | 40 | UnsignedT val = 0; | 486 | 40 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 40 | bool negative = false; | 488 | 40 | int i = 0; | 489 | 40 | switch (*s) { | 490 | 0 | case '-': | 491 | 0 | negative = true; | 492 | 0 | max_val += 1; | 493 | 0 | [[fallthrough]]; | 494 | 0 | case '+': | 495 | 0 | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 0 | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 40 | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 40 | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 40 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 40 | return static_cast<T>(negative ? -val : val); | 507 | 40 | } | 508 | | | 509 | 0 | const T max_div_10 = max_val / 10; | 510 | 0 | const T max_mod_10 = max_val % 10; | 511 | |
| 512 | 0 | int first = i; | 513 | 0 | for (; i < len; ++i) { | 514 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 0 | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 0 | *result = PARSE_OVERFLOW; | 519 | 0 | return negative ? -max_val : max_val; | 520 | 0 | } | 521 | 0 | val = val * 10 + digit; | 522 | 0 | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 0 | } else { | 530 | | // Save original position where non-digit was found | 531 | 0 | int remaining_len = len - i; | 532 | 0 | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 0 | *result = PARSE_FAILURE; | 540 | 0 | return 0; | 541 | 0 | } | 542 | 0 | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 0 | *result = PARSE_SUCCESS; | 545 | 0 | return static_cast<T>(negative ? -val : val); | 546 | 0 | } | 547 | 0 | } | 548 | 0 | *result = PARSE_SUCCESS; | 549 | 0 | return static_cast<T>(negative ? -val : val); | 550 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 8 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 8 | if (UNLIKELY(len <= 0)) { | 480 | 0 | *result = PARSE_FAILURE; | 481 | 0 | return 0; | 482 | 0 | } | 483 | | | 484 | 8 | using UnsignedT = MakeUnsignedT<T>; | 485 | 8 | UnsignedT val = 0; | 486 | 8 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 8 | bool negative = false; | 488 | 8 | int i = 0; | 489 | 8 | switch (*s) { | 490 | 0 | case '-': | 491 | 0 | negative = true; | 492 | 0 | max_val += 1; | 493 | 0 | [[fallthrough]]; | 494 | 0 | case '+': | 495 | 0 | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 0 | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 8 | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 8 | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 8 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 8 | return static_cast<T>(negative ? -val : val); | 507 | 8 | } | 508 | | | 509 | 0 | const T max_div_10 = max_val / 10; | 510 | 0 | const T max_mod_10 = max_val % 10; | 511 | |
| 512 | 0 | int first = i; | 513 | 0 | for (; i < len; ++i) { | 514 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 0 | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 0 | *result = PARSE_OVERFLOW; | 519 | 0 | return negative ? -max_val : max_val; | 520 | 0 | } | 521 | 0 | val = val * 10 + digit; | 522 | 0 | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 0 | } else { | 530 | | // Save original position where non-digit was found | 531 | 0 | int remaining_len = len - i; | 532 | 0 | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 0 | *result = PARSE_FAILURE; | 540 | 0 | return 0; | 541 | 0 | } | 542 | 0 | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 0 | *result = PARSE_SUCCESS; | 545 | 0 | return static_cast<T>(negative ? -val : val); | 546 | 0 | } | 547 | 0 | } | 548 | 0 | *result = PARSE_SUCCESS; | 549 | 0 | return static_cast<T>(negative ? -val : val); | 550 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 478 | 8 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 479 | 8 | if (UNLIKELY(len <= 0)) { | 480 | 0 | *result = PARSE_FAILURE; | 481 | 0 | return 0; | 482 | 0 | } | 483 | | | 484 | 8 | using UnsignedT = MakeUnsignedT<T>; | 485 | 8 | UnsignedT val = 0; | 486 | 8 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 487 | 8 | bool negative = false; | 488 | 8 | int i = 0; | 489 | 8 | switch (*s) { | 490 | 0 | case '-': | 491 | 0 | negative = true; | 492 | 0 | max_val += 1; | 493 | 0 | [[fallthrough]]; | 494 | 0 | case '+': | 495 | 0 | ++i; | 496 | | // only one '+'/'-' char, so could return failure directly | 497 | 0 | if (UNLIKELY(len == 1)) { | 498 | 0 | *result = PARSE_FAILURE; | 499 | 0 | return 0; | 500 | 0 | } | 501 | 8 | } | 502 | | | 503 | | // This is the fast path where the string cannot overflow. | 504 | 8 | if (LIKELY(len - i < NumberTraits::max_ascii_len<T>())) { | 505 | 0 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 506 | 0 | return static_cast<T>(negative ? -val : val); | 507 | 0 | } | 508 | | | 509 | 8 | const T max_div_10 = max_val / 10; | 510 | 8 | const T max_mod_10 = max_val % 10; | 511 | | | 512 | 8 | int first = i; | 513 | 168 | for (; i < len; ++i) { | 514 | 160 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 515 | 160 | T digit = s[i] - '0'; | 516 | | // This is a tricky check to see if adding this digit will cause an overflow. | 517 | 160 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 518 | 0 | *result = PARSE_OVERFLOW; | 519 | 0 | return negative ? -max_val : max_val; | 520 | 0 | } | 521 | 160 | val = val * 10 + digit; | 522 | 160 | } else { | 523 | | if constexpr (enable_strict_mode) { | 524 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 525 | | // Reject the string because the remaining chars are not all whitespace | 526 | | *result = PARSE_FAILURE; | 527 | | return 0; | 528 | | } | 529 | 0 | } else { | 530 | | // Save original position where non-digit was found | 531 | 0 | int remaining_len = len - i; | 532 | 0 | const char* remaining_s = s + i; | 533 | | // Skip trailing whitespaces from the remaining portion | 534 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 535 | 0 | if ((UNLIKELY(i == first || (remaining_len != 0 && | 536 | 0 | !is_float_suffix(remaining_s, remaining_len))))) { | 537 | | // Reject the string because either the first char was not a digit, | 538 | | // or the remaining chars are not all whitespace | 539 | 0 | *result = PARSE_FAILURE; | 540 | 0 | return 0; | 541 | 0 | } | 542 | 0 | } | 543 | | // Returning here is slightly faster than breaking the loop. | 544 | 0 | *result = PARSE_SUCCESS; | 545 | 0 | return static_cast<T>(negative ? -val : val); | 546 | 0 | } | 547 | 160 | } | 548 | 8 | *result = PARSE_SUCCESS; | 549 | 8 | return static_cast<T>(negative ? -val : val); | 550 | 8 | } |
|
551 | | |
552 | | template <typename T> |
553 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
554 | 2.74k | ParseResult* result) { |
555 | 2.74k | if (UNLIKELY(len <= 0)) { |
556 | 0 | *result = PARSE_FAILURE; |
557 | 0 | return 0; |
558 | 0 | } |
559 | | |
560 | 2.74k | T val = 0; |
561 | 2.74k | T max_val = std::numeric_limits<T>::max(); |
562 | 2.74k | int i = 0; |
563 | | |
564 | 2.74k | using signedT = MakeSignedT<T>; |
565 | | // This is the fast path where the string cannot overflow. |
566 | 2.74k | if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) { |
567 | 1.56k | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
568 | 1.56k | return val; |
569 | 1.56k | } |
570 | | |
571 | 1.17k | const T max_div_10 = max_val / 10; |
572 | 1.17k | const T max_mod_10 = max_val % 10; |
573 | | |
574 | 1.17k | int first = i; |
575 | 9.31k | for (; i < len; ++i) { |
576 | 8.62k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
577 | 8.52k | T digit = s[i] - '0'; |
578 | | // This is a tricky check to see if adding this digit will cause an overflow. |
579 | 8.52k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
580 | 392 | *result = PARSE_OVERFLOW; |
581 | 392 | return max_val; |
582 | 392 | } |
583 | 8.13k | val = val * 10 + digit; |
584 | 8.13k | } else { |
585 | 98 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
586 | | // Reject the string because either the first char was not a digit, |
587 | | // or the remaining chars are not all whitespace |
588 | 98 | *result = PARSE_FAILURE; |
589 | 98 | return 0; |
590 | 98 | } |
591 | | // Returning here is slightly faster than breaking the loop. |
592 | 0 | *result = PARSE_SUCCESS; |
593 | 0 | return val; |
594 | 98 | } |
595 | 8.62k | } |
596 | 686 | *result = PARSE_SUCCESS; |
597 | 686 | return val; |
598 | 1.17k | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 554 | 686 | ParseResult* result) { | 555 | 686 | if (UNLIKELY(len <= 0)) { | 556 | 0 | *result = PARSE_FAILURE; | 557 | 0 | return 0; | 558 | 0 | } | 559 | | | 560 | 686 | T val = 0; | 561 | 686 | T max_val = std::numeric_limits<T>::max(); | 562 | 686 | int i = 0; | 563 | | | 564 | 686 | using signedT = MakeSignedT<T>; | 565 | | // This is the fast path where the string cannot overflow. | 566 | 686 | if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) { | 567 | 196 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 568 | 196 | return val; | 569 | 196 | } | 570 | | | 571 | 490 | const T max_div_10 = max_val / 10; | 572 | 490 | const T max_mod_10 = max_val % 10; | 573 | | | 574 | 490 | int first = i; | 575 | 1.56k | for (; i < len; ++i) { | 576 | 1.27k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 577 | 1.17k | T digit = s[i] - '0'; | 578 | | // This is a tricky check to see if adding this digit will cause an overflow. | 579 | 1.17k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 580 | 98 | *result = PARSE_OVERFLOW; | 581 | 98 | return max_val; | 582 | 98 | } | 583 | 1.07k | val = val * 10 + digit; | 584 | 1.07k | } else { | 585 | 98 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 586 | | // Reject the string because either the first char was not a digit, | 587 | | // or the remaining chars are not all whitespace | 588 | 98 | *result = PARSE_FAILURE; | 589 | 98 | return 0; | 590 | 98 | } | 591 | | // Returning here is slightly faster than breaking the loop. | 592 | 0 | *result = PARSE_SUCCESS; | 593 | 0 | return val; | 594 | 98 | } | 595 | 1.27k | } | 596 | 294 | *result = PARSE_SUCCESS; | 597 | 294 | return val; | 598 | 490 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 554 | 686 | ParseResult* result) { | 555 | 686 | if (UNLIKELY(len <= 0)) { | 556 | 0 | *result = PARSE_FAILURE; | 557 | 0 | return 0; | 558 | 0 | } | 559 | | | 560 | 686 | T val = 0; | 561 | 686 | T max_val = std::numeric_limits<T>::max(); | 562 | 686 | int i = 0; | 563 | | | 564 | 686 | using signedT = MakeSignedT<T>; | 565 | | // This is the fast path where the string cannot overflow. | 566 | 686 | if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) { | 567 | 392 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 568 | 392 | return val; | 569 | 392 | } | 570 | | | 571 | 294 | const T max_div_10 = max_val / 10; | 572 | 294 | const T max_mod_10 = max_val % 10; | 573 | | | 574 | 294 | int first = i; | 575 | 1.66k | for (; i < len; ++i) { | 576 | 1.47k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 577 | 1.47k | T digit = s[i] - '0'; | 578 | | // This is a tricky check to see if adding this digit will cause an overflow. | 579 | 1.47k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 580 | 98 | *result = PARSE_OVERFLOW; | 581 | 98 | return max_val; | 582 | 98 | } | 583 | 1.37k | val = val * 10 + digit; | 584 | 1.37k | } else { | 585 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 586 | | // Reject the string because either the first char was not a digit, | 587 | | // or the remaining chars are not all whitespace | 588 | 0 | *result = PARSE_FAILURE; | 589 | 0 | return 0; | 590 | 0 | } | 591 | | // Returning here is slightly faster than breaking the loop. | 592 | 0 | *result = PARSE_SUCCESS; | 593 | 0 | return val; | 594 | 0 | } | 595 | 1.47k | } | 596 | 196 | *result = PARSE_SUCCESS; | 597 | 196 | return val; | 598 | 294 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 554 | 686 | ParseResult* result) { | 555 | 686 | if (UNLIKELY(len <= 0)) { | 556 | 0 | *result = PARSE_FAILURE; | 557 | 0 | return 0; | 558 | 0 | } | 559 | | | 560 | 686 | T val = 0; | 561 | 686 | T max_val = std::numeric_limits<T>::max(); | 562 | 686 | int i = 0; | 563 | | | 564 | 686 | using signedT = MakeSignedT<T>; | 565 | | // This is the fast path where the string cannot overflow. | 566 | 686 | if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) { | 567 | 490 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 568 | 490 | return val; | 569 | 490 | } | 570 | | | 571 | 196 | const T max_div_10 = max_val / 10; | 572 | 196 | const T max_mod_10 = max_val % 10; | 573 | | | 574 | 196 | int first = i; | 575 | 2.05k | for (; i < len; ++i) { | 576 | 1.96k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 577 | 1.96k | T digit = s[i] - '0'; | 578 | | // This is a tricky check to see if adding this digit will cause an overflow. | 579 | 1.96k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 580 | 98 | *result = PARSE_OVERFLOW; | 581 | 98 | return max_val; | 582 | 98 | } | 583 | 1.86k | val = val * 10 + digit; | 584 | 1.86k | } else { | 585 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 586 | | // Reject the string because either the first char was not a digit, | 587 | | // or the remaining chars are not all whitespace | 588 | 0 | *result = PARSE_FAILURE; | 589 | 0 | return 0; | 590 | 0 | } | 591 | | // Returning here is slightly faster than breaking the loop. | 592 | 0 | *result = PARSE_SUCCESS; | 593 | 0 | return val; | 594 | 0 | } | 595 | 1.96k | } | 596 | 98 | *result = PARSE_SUCCESS; | 597 | 98 | return val; | 598 | 196 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 554 | 686 | ParseResult* result) { | 555 | 686 | if (UNLIKELY(len <= 0)) { | 556 | 0 | *result = PARSE_FAILURE; | 557 | 0 | return 0; | 558 | 0 | } | 559 | | | 560 | 686 | T val = 0; | 561 | 686 | T max_val = std::numeric_limits<T>::max(); | 562 | 686 | int i = 0; | 563 | | | 564 | 686 | using signedT = MakeSignedT<T>; | 565 | | // This is the fast path where the string cannot overflow. | 566 | 686 | if (LIKELY(len - i < NumberTraits::max_ascii_len<signedT>())) { | 567 | 490 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 568 | 490 | return val; | 569 | 490 | } | 570 | | | 571 | 196 | const T max_div_10 = max_val / 10; | 572 | 196 | const T max_mod_10 = max_val % 10; | 573 | | | 574 | 196 | int first = i; | 575 | 4.01k | for (; i < len; ++i) { | 576 | 3.92k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 577 | 3.92k | T digit = s[i] - '0'; | 578 | | // This is a tricky check to see if adding this digit will cause an overflow. | 579 | 3.92k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 580 | 98 | *result = PARSE_OVERFLOW; | 581 | 98 | return max_val; | 582 | 98 | } | 583 | 3.82k | val = val * 10 + digit; | 584 | 3.82k | } else { | 585 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 586 | | // Reject the string because either the first char was not a digit, | 587 | | // or the remaining chars are not all whitespace | 588 | 0 | *result = PARSE_FAILURE; | 589 | 0 | return 0; | 590 | 0 | } | 591 | | // Returning here is slightly faster than breaking the loop. | 592 | 0 | *result = PARSE_SUCCESS; | 593 | 0 | return val; | 594 | 0 | } | 595 | 3.92k | } | 596 | 98 | *result = PARSE_SUCCESS; | 597 | 98 | return val; | 598 | 196 | } |
|
599 | | |
600 | | template <typename T> |
601 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
602 | 55.6k | ParseResult* result) { |
603 | 55.6k | using UnsignedT = MakeUnsignedT<T>; |
604 | 55.6k | UnsignedT val = 0; |
605 | 55.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
606 | 55.6k | bool negative = false; |
607 | 55.6k | if (UNLIKELY(len <= 0)) { |
608 | 0 | *result = PARSE_FAILURE; |
609 | 0 | return 0; |
610 | 0 | } |
611 | 55.6k | int i = 0; |
612 | 55.6k | switch (*s) { |
613 | 26.8k | case '-': |
614 | 26.8k | negative = true; |
615 | 26.8k | max_val = StringParser::numeric_limits<T>(false) + 1; |
616 | 26.8k | [[fallthrough]]; |
617 | 27.4k | case '+': |
618 | 27.4k | i = 1; |
619 | 55.6k | } |
620 | | |
621 | 55.6k | const T max_div_base = max_val / base; |
622 | 55.6k | const T max_mod_base = max_val % base; |
623 | | |
624 | 55.6k | int first = i; |
625 | 181k | for (; i < len; ++i) { |
626 | 153k | T digit; |
627 | 153k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
628 | 151k | digit = s[i] - '0'; |
629 | 151k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
630 | 1.27k | digit = (s[i] - 'a' + 10); |
631 | 1.27k | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
632 | 196 | digit = (s[i] - 'A' + 10); |
633 | 294 | } else { |
634 | 294 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
635 | | // Reject the string because either the first char was not an alpha/digit, |
636 | | // or the remaining chars are not all whitespace |
637 | 294 | *result = PARSE_FAILURE; |
638 | 294 | return 0; |
639 | 294 | } |
640 | | // skip trailing whitespace. |
641 | 0 | break; |
642 | 294 | } |
643 | | |
644 | | // Bail, if we encounter a digit that is not available in base. |
645 | 152k | if (digit >= base) { |
646 | 784 | break; |
647 | 784 | } |
648 | | |
649 | | // This is a tricky check to see if adding this digit will cause an overflow. |
650 | 152k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
651 | 25.9k | *result = PARSE_OVERFLOW; |
652 | 25.9k | return static_cast<T>(negative ? -max_val : max_val); |
653 | 25.9k | } |
654 | 126k | val = val * base + digit; |
655 | 126k | } |
656 | 29.4k | *result = PARSE_SUCCESS; |
657 | 29.4k | return static_cast<T>(negative ? -val : val); |
658 | 55.6k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 602 | 52.9k | ParseResult* result) { | 603 | 52.9k | using UnsignedT = MakeUnsignedT<T>; | 604 | 52.9k | UnsignedT val = 0; | 605 | 52.9k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 606 | 52.9k | bool negative = false; | 607 | 52.9k | if (UNLIKELY(len <= 0)) { | 608 | 0 | *result = PARSE_FAILURE; | 609 | 0 | return 0; | 610 | 0 | } | 611 | 52.9k | int i = 0; | 612 | 52.9k | switch (*s) { | 613 | 25.7k | case '-': | 614 | 25.7k | negative = true; | 615 | 25.7k | max_val = StringParser::numeric_limits<T>(false) + 1; | 616 | 25.7k | [[fallthrough]]; | 617 | 25.9k | case '+': | 618 | 25.9k | i = 1; | 619 | 52.9k | } | 620 | | | 621 | 52.9k | const T max_div_base = max_val / base; | 622 | 52.9k | const T max_mod_base = max_val % base; | 623 | | | 624 | 52.9k | int first = i; | 625 | 161k | for (; i < len; ++i) { | 626 | 134k | T digit; | 627 | 134k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 628 | 133k | digit = s[i] - '0'; | 629 | 133k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 630 | 1.07k | digit = (s[i] - 'a' + 10); | 631 | 1.07k | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 632 | 196 | digit = (s[i] - 'A' + 10); | 633 | 294 | } else { | 634 | 294 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 635 | | // Reject the string because either the first char was not an alpha/digit, | 636 | | // or the remaining chars are not all whitespace | 637 | 294 | *result = PARSE_FAILURE; | 638 | 294 | return 0; | 639 | 294 | } | 640 | | // skip trailing whitespace. | 641 | 0 | break; | 642 | 294 | } | 643 | | | 644 | | // Bail, if we encounter a digit that is not available in base. | 645 | 134k | if (digit >= base) { | 646 | 784 | break; | 647 | 784 | } | 648 | | | 649 | | // This is a tricky check to see if adding this digit will cause an overflow. | 650 | 133k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 651 | 25.3k | *result = PARSE_OVERFLOW; | 652 | 25.3k | return static_cast<T>(negative ? -max_val : max_val); | 653 | 25.3k | } | 654 | 108k | val = val * base + digit; | 655 | 108k | } | 656 | 27.2k | *result = PARSE_SUCCESS; | 657 | 27.2k | return static_cast<T>(negative ? -val : val); | 658 | 52.9k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 602 | 980 | ParseResult* result) { | 603 | 980 | using UnsignedT = MakeUnsignedT<T>; | 604 | 980 | UnsignedT val = 0; | 605 | 980 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 606 | 980 | bool negative = false; | 607 | 980 | if (UNLIKELY(len <= 0)) { | 608 | 0 | *result = PARSE_FAILURE; | 609 | 0 | return 0; | 610 | 0 | } | 611 | 980 | int i = 0; | 612 | 980 | switch (*s) { | 613 | 392 | case '-': | 614 | 392 | negative = true; | 615 | 392 | max_val = StringParser::numeric_limits<T>(false) + 1; | 616 | 392 | [[fallthrough]]; | 617 | 490 | case '+': | 618 | 490 | i = 1; | 619 | 980 | } | 620 | | | 621 | 980 | const T max_div_base = max_val / base; | 622 | 980 | const T max_mod_base = max_val % base; | 623 | | | 624 | 980 | int first = i; | 625 | 4.21k | for (; i < len; ++i) { | 626 | 3.43k | T digit; | 627 | 3.43k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 628 | 3.23k | digit = s[i] - '0'; | 629 | 3.23k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 630 | 196 | digit = (s[i] - 'a' + 10); | 631 | 196 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 632 | 0 | digit = (s[i] - 'A' + 10); | 633 | 0 | } else { | 634 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 635 | | // Reject the string because either the first char was not an alpha/digit, | 636 | | // or the remaining chars are not all whitespace | 637 | 0 | *result = PARSE_FAILURE; | 638 | 0 | return 0; | 639 | 0 | } | 640 | | // skip trailing whitespace. | 641 | 0 | break; | 642 | 0 | } | 643 | | | 644 | | // Bail, if we encounter a digit that is not available in base. | 645 | 3.43k | if (digit >= base) { | 646 | 0 | break; | 647 | 0 | } | 648 | | | 649 | | // This is a tricky check to see if adding this digit will cause an overflow. | 650 | 3.43k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 651 | 196 | *result = PARSE_OVERFLOW; | 652 | 196 | return static_cast<T>(negative ? -max_val : max_val); | 653 | 196 | } | 654 | 3.23k | val = val * base + digit; | 655 | 3.23k | } | 656 | 784 | *result = PARSE_SUCCESS; | 657 | 784 | return static_cast<T>(negative ? -val : val); | 658 | 980 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 602 | 882 | ParseResult* result) { | 603 | 882 | using UnsignedT = MakeUnsignedT<T>; | 604 | 882 | UnsignedT val = 0; | 605 | 882 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 606 | 882 | bool negative = false; | 607 | 882 | if (UNLIKELY(len <= 0)) { | 608 | 0 | *result = PARSE_FAILURE; | 609 | 0 | return 0; | 610 | 0 | } | 611 | 882 | int i = 0; | 612 | 882 | switch (*s) { | 613 | 294 | case '-': | 614 | 294 | negative = true; | 615 | 294 | max_val = StringParser::numeric_limits<T>(false) + 1; | 616 | 294 | [[fallthrough]]; | 617 | 490 | case '+': | 618 | 490 | i = 1; | 619 | 882 | } | 620 | | | 621 | 882 | const T max_div_base = max_val / base; | 622 | 882 | const T max_mod_base = max_val % base; | 623 | | | 624 | 882 | int first = i; | 625 | 6.07k | for (; i < len; ++i) { | 626 | 5.39k | T digit; | 627 | 5.39k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 628 | 5.39k | digit = s[i] - '0'; | 629 | 5.39k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 630 | 0 | digit = (s[i] - 'a' + 10); | 631 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 632 | 0 | digit = (s[i] - 'A' + 10); | 633 | 0 | } else { | 634 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 635 | | // Reject the string because either the first char was not an alpha/digit, | 636 | | // or the remaining chars are not all whitespace | 637 | 0 | *result = PARSE_FAILURE; | 638 | 0 | return 0; | 639 | 0 | } | 640 | | // skip trailing whitespace. | 641 | 0 | break; | 642 | 0 | } | 643 | | | 644 | | // Bail, if we encounter a digit that is not available in base. | 645 | 5.39k | if (digit >= base) { | 646 | 0 | break; | 647 | 0 | } | 648 | | | 649 | | // This is a tricky check to see if adding this digit will cause an overflow. | 650 | 5.39k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 651 | 196 | *result = PARSE_OVERFLOW; | 652 | 196 | return static_cast<T>(negative ? -max_val : max_val); | 653 | 196 | } | 654 | 5.19k | val = val * base + digit; | 655 | 5.19k | } | 656 | 686 | *result = PARSE_SUCCESS; | 657 | 686 | return static_cast<T>(negative ? -val : val); | 658 | 882 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 602 | 882 | ParseResult* result) { | 603 | 882 | using UnsignedT = MakeUnsignedT<T>; | 604 | 882 | UnsignedT val = 0; | 605 | 882 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 606 | 882 | bool negative = false; | 607 | 882 | if (UNLIKELY(len <= 0)) { | 608 | 0 | *result = PARSE_FAILURE; | 609 | 0 | return 0; | 610 | 0 | } | 611 | 882 | int i = 0; | 612 | 882 | switch (*s) { | 613 | 392 | case '-': | 614 | 392 | negative = true; | 615 | 392 | max_val = StringParser::numeric_limits<T>(false) + 1; | 616 | 392 | [[fallthrough]]; | 617 | 490 | case '+': | 618 | 490 | i = 1; | 619 | 882 | } | 620 | | | 621 | 882 | const T max_div_base = max_val / base; | 622 | 882 | const T max_mod_base = max_val % base; | 623 | | | 624 | 882 | int first = i; | 625 | 10.1k | for (; i < len; ++i) { | 626 | 9.50k | T digit; | 627 | 9.50k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 628 | 9.50k | digit = s[i] - '0'; | 629 | 9.50k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 630 | 0 | digit = (s[i] - 'a' + 10); | 631 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 632 | 0 | digit = (s[i] - 'A' + 10); | 633 | 0 | } else { | 634 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 635 | | // Reject the string because either the first char was not an alpha/digit, | 636 | | // or the remaining chars are not all whitespace | 637 | 0 | *result = PARSE_FAILURE; | 638 | 0 | return 0; | 639 | 0 | } | 640 | | // skip trailing whitespace. | 641 | 0 | break; | 642 | 0 | } | 643 | | | 644 | | // Bail, if we encounter a digit that is not available in base. | 645 | 9.50k | if (digit >= base) { | 646 | 0 | break; | 647 | 0 | } | 648 | | | 649 | | // This is a tricky check to see if adding this digit will cause an overflow. | 650 | 9.50k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 651 | 196 | *result = PARSE_OVERFLOW; | 652 | 196 | return static_cast<T>(negative ? -max_val : max_val); | 653 | 196 | } | 654 | 9.31k | val = val * base + digit; | 655 | 9.31k | } | 656 | 686 | *result = PARSE_SUCCESS; | 657 | 686 | return static_cast<T>(negative ? -val : val); | 658 | 882 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 602 | 2 | ParseResult* result) { | 603 | 2 | using UnsignedT = MakeUnsignedT<T>; | 604 | 2 | UnsignedT val = 0; | 605 | 2 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 606 | 2 | bool negative = false; | 607 | 2 | if (UNLIKELY(len <= 0)) { | 608 | 0 | *result = PARSE_FAILURE; | 609 | 0 | return 0; | 610 | 0 | } | 611 | 2 | int i = 0; | 612 | 2 | switch (*s) { | 613 | 0 | case '-': | 614 | 0 | negative = true; | 615 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 616 | 0 | [[fallthrough]]; | 617 | 0 | case '+': | 618 | 0 | i = 1; | 619 | 2 | } | 620 | | | 621 | 2 | const T max_div_base = max_val / base; | 622 | 2 | const T max_mod_base = max_val % base; | 623 | | | 624 | 2 | int first = i; | 625 | 6 | for (; i < len; ++i) { | 626 | 4 | T digit; | 627 | 4 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 628 | 0 | digit = s[i] - '0'; | 629 | 4 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 630 | 4 | digit = (s[i] - 'a' + 10); | 631 | 4 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 632 | 0 | digit = (s[i] - 'A' + 10); | 633 | 0 | } else { | 634 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 635 | | // Reject the string because either the first char was not an alpha/digit, | 636 | | // or the remaining chars are not all whitespace | 637 | 0 | *result = PARSE_FAILURE; | 638 | 0 | return 0; | 639 | 0 | } | 640 | | // skip trailing whitespace. | 641 | 0 | break; | 642 | 0 | } | 643 | | | 644 | | // Bail, if we encounter a digit that is not available in base. | 645 | 4 | if (digit >= base) { | 646 | 0 | break; | 647 | 0 | } | 648 | | | 649 | | // This is a tricky check to see if adding this digit will cause an overflow. | 650 | 4 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 651 | 0 | *result = PARSE_OVERFLOW; | 652 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 653 | 0 | } | 654 | 4 | val = val * base + digit; | 655 | 4 | } | 656 | 2 | *result = PARSE_SUCCESS; | 657 | 2 | return static_cast<T>(negative ? -val : val); | 658 | 2 | } |
|
659 | | |
660 | | template <typename T, bool enable_strict_mode> |
661 | 556k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
662 | 556k | T val = 0; |
663 | 556k | if (UNLIKELY(len == 0)) { |
664 | 0 | *result = PARSE_SUCCESS; |
665 | 0 | return val; |
666 | 0 | } |
667 | | // Factor out the first char for error handling speeds up the loop. |
668 | 556k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
669 | 544k | val = s[0] - '0'; |
670 | 544k | } else { |
671 | 12.6k | *result = PARSE_FAILURE; |
672 | 12.6k | return 0; |
673 | 12.6k | } |
674 | 1.10M | for (int i = 1; i < len; ++i) { |
675 | 567k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
676 | 560k | T digit = s[i] - '0'; |
677 | 560k | val = val * 10 + digit; |
678 | 560k | } else { |
679 | 7.64k | if constexpr (enable_strict_mode) { |
680 | 2.63k | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { |
681 | 2.35k | *result = PARSE_FAILURE; |
682 | 2.35k | return 0; |
683 | 2.35k | } |
684 | 5.01k | } else { |
685 | | // Save original position where non-digit was found |
686 | 5.01k | int remaining_len = len - i; |
687 | 5.01k | const char* remaining_s = s + i; |
688 | | // Skip trailing whitespaces from the remaining portion |
689 | 5.01k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); |
690 | 5.01k | if ((UNLIKELY(remaining_len != 0 && |
691 | 5.01k | !is_float_suffix(remaining_s, remaining_len)))) { |
692 | 1.00k | *result = PARSE_FAILURE; |
693 | 1.00k | return 0; |
694 | 1.00k | } |
695 | 5.01k | } |
696 | 4.28k | *result = PARSE_SUCCESS; |
697 | 7.64k | return val; |
698 | 7.64k | } |
699 | 567k | } |
700 | 536k | *result = PARSE_SUCCESS; |
701 | 536k | return val; |
702 | 544k | } _ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 83.8k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 83.8k | T val = 0; | 663 | 83.8k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 83.8k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 82.0k | val = s[0] - '0'; | 670 | 82.0k | } else { | 671 | 1.82k | *result = PARSE_FAILURE; | 672 | 1.82k | return 0; | 673 | 1.82k | } | 674 | 115k | for (int i = 1; i < len; ++i) { | 675 | 34.1k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 33.3k | T digit = s[i] - '0'; | 677 | 33.3k | val = val * 10 + digit; | 678 | 33.3k | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 756 | } else { | 685 | | // Save original position where non-digit was found | 686 | 756 | int remaining_len = len - i; | 687 | 756 | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 756 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 756 | if ((UNLIKELY(remaining_len != 0 && | 691 | 756 | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 196 | *result = PARSE_FAILURE; | 693 | 196 | return 0; | 694 | 196 | } | 695 | 756 | } | 696 | 560 | *result = PARSE_SUCCESS; | 697 | 756 | return val; | 698 | 756 | } | 699 | 34.1k | } | 700 | 81.2k | *result = PARSE_SUCCESS; | 701 | 81.2k | return val; | 702 | 82.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 121k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 121k | T val = 0; | 663 | 121k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 121k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 119k | val = s[0] - '0'; | 670 | 119k | } else { | 671 | 1.16k | *result = PARSE_FAILURE; | 672 | 1.16k | return 0; | 673 | 1.16k | } | 674 | 170k | for (int i = 1; i < len; ++i) { | 675 | 50.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 50.7k | T digit = s[i] - '0'; | 677 | 50.7k | val = val * 10 + digit; | 678 | 50.7k | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 4 | } else { | 685 | | // Save original position where non-digit was found | 686 | 4 | int remaining_len = len - i; | 687 | 4 | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 4 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 4 | if ((UNLIKELY(remaining_len != 0 && | 691 | 4 | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 4 | *result = PARSE_FAILURE; | 693 | 4 | return 0; | 694 | 4 | } | 695 | 4 | } | 696 | 0 | *result = PARSE_SUCCESS; | 697 | 4 | return val; | 698 | 4 | } | 699 | 50.7k | } | 700 | 119k | *result = PARSE_SUCCESS; | 701 | 119k | return val; | 702 | 119k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 96 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 96 | T val = 0; | 663 | 96 | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 96 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 44 | val = s[0] - '0'; | 670 | 52 | } else { | 671 | 52 | *result = PARSE_FAILURE; | 672 | 52 | return 0; | 673 | 52 | } | 674 | 44 | for (int i = 1; i < len; ++i) { | 675 | 4 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 0 | T digit = s[i] - '0'; | 677 | 0 | val = val * 10 + digit; | 678 | 4 | } else { | 679 | 4 | if constexpr (enable_strict_mode) { | 680 | 4 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | 4 | *result = PARSE_FAILURE; | 682 | 4 | return 0; | 683 | 4 | } | 684 | | } else { | 685 | | // Save original position where non-digit was found | 686 | | int remaining_len = len - i; | 687 | | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | | if ((UNLIKELY(remaining_len != 0 && | 691 | | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | | *result = PARSE_FAILURE; | 693 | | return 0; | 694 | | } | 695 | | } | 696 | 0 | *result = PARSE_SUCCESS; | 697 | 4 | return val; | 698 | 4 | } | 699 | 4 | } | 700 | 40 | *result = PARSE_SUCCESS; | 701 | 40 | return val; | 702 | 44 | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 102k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 102k | T val = 0; | 663 | 102k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 102k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 100k | val = s[0] - '0'; | 670 | 100k | } else { | 671 | 1.83k | *result = PARSE_FAILURE; | 672 | 1.83k | return 0; | 673 | 1.83k | } | 674 | 145k | for (int i = 1; i < len; ++i) { | 675 | 47.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 45.6k | T digit = s[i] - '0'; | 677 | 45.6k | val = val * 10 + digit; | 678 | 45.6k | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 1.91k | } else { | 685 | | // Save original position where non-digit was found | 686 | 1.91k | int remaining_len = len - i; | 687 | 1.91k | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 1.91k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 1.91k | if ((UNLIKELY(remaining_len != 0 && | 691 | 1.91k | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 148 | *result = PARSE_FAILURE; | 693 | 148 | return 0; | 694 | 148 | } | 695 | 1.91k | } | 696 | 1.76k | *result = PARSE_SUCCESS; | 697 | 1.91k | return val; | 698 | 1.91k | } | 699 | 47.5k | } | 700 | 98.2k | *result = PARSE_SUCCESS; | 701 | 98.2k | return val; | 702 | 100k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 336 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 336 | T val = 0; | 663 | 336 | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 336 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 260 | val = s[0] - '0'; | 670 | 260 | } else { | 671 | 76 | *result = PARSE_FAILURE; | 672 | 76 | return 0; | 673 | 76 | } | 674 | 412 | for (int i = 1; i < len; ++i) { | 675 | 316 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 152 | T digit = s[i] - '0'; | 677 | 152 | val = val * 10 + digit; | 678 | 164 | } else { | 679 | 164 | if constexpr (enable_strict_mode) { | 680 | 164 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | 164 | *result = PARSE_FAILURE; | 682 | 164 | return 0; | 683 | 164 | } | 684 | | } else { | 685 | | // Save original position where non-digit was found | 686 | | int remaining_len = len - i; | 687 | | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | | if ((UNLIKELY(remaining_len != 0 && | 691 | | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | | *result = PARSE_FAILURE; | 693 | | return 0; | 694 | | } | 695 | | } | 696 | 0 | *result = PARSE_SUCCESS; | 697 | 164 | return val; | 698 | 164 | } | 699 | 316 | } | 700 | 96 | *result = PARSE_SUCCESS; | 701 | 96 | return val; | 702 | 260 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 180k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 180k | T val = 0; | 663 | 180k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 180k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 175k | val = s[0] - '0'; | 670 | 175k | } else { | 671 | 4.43k | *result = PARSE_FAILURE; | 672 | 4.43k | return 0; | 673 | 4.43k | } | 674 | 531k | for (int i = 1; i < len; ++i) { | 675 | 356k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 355k | T digit = s[i] - '0'; | 677 | 355k | val = val * 10 + digit; | 678 | 355k | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 1.10k | } else { | 685 | | // Save original position where non-digit was found | 686 | 1.10k | int remaining_len = len - i; | 687 | 1.10k | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 1.10k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 1.10k | if ((UNLIKELY(remaining_len != 0 && | 691 | 1.10k | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 410 | *result = PARSE_FAILURE; | 693 | 410 | return 0; | 694 | 410 | } | 695 | 1.10k | } | 696 | 696 | *result = PARSE_SUCCESS; | 697 | 1.10k | return val; | 698 | 1.10k | } | 699 | 356k | } | 700 | 174k | *result = PARSE_SUCCESS; | 701 | 174k | return val; | 702 | 175k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 1.21k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 1.21k | T val = 0; | 663 | 1.21k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 1.21k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 1.04k | val = s[0] - '0'; | 670 | 1.04k | } else { | 671 | 170 | *result = PARSE_FAILURE; | 672 | 170 | return 0; | 673 | 170 | } | 674 | 2.57k | for (int i = 1; i < len; ++i) { | 675 | 2.16k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 1.52k | T digit = s[i] - '0'; | 677 | 1.52k | val = val * 10 + digit; | 678 | 1.52k | } else { | 679 | 640 | if constexpr (enable_strict_mode) { | 680 | 640 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | 584 | *result = PARSE_FAILURE; | 682 | 584 | return 0; | 683 | 584 | } | 684 | | } else { | 685 | | // Save original position where non-digit was found | 686 | | int remaining_len = len - i; | 687 | | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | | if ((UNLIKELY(remaining_len != 0 && | 691 | | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | | *result = PARSE_FAILURE; | 693 | | return 0; | 694 | | } | 695 | | } | 696 | 56 | *result = PARSE_SUCCESS; | 697 | 640 | return val; | 698 | 640 | } | 699 | 2.16k | } | 700 | 406 | *result = PARSE_SUCCESS; | 701 | 406 | return val; | 702 | 1.04k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 65.1k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 65.1k | T val = 0; | 663 | 65.1k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 65.1k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 63.0k | val = s[0] - '0'; | 670 | 63.0k | } else { | 671 | 2.15k | *result = PARSE_FAILURE; | 672 | 2.15k | return 0; | 673 | 2.15k | } | 674 | 132k | for (int i = 1; i < len; ++i) { | 675 | 70.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 69.3k | T digit = s[i] - '0'; | 677 | 69.3k | val = val * 10 + digit; | 678 | 69.3k | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 1.23k | } else { | 685 | | // Save original position where non-digit was found | 686 | 1.23k | int remaining_len = len - i; | 687 | 1.23k | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 1.23k | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 1.23k | if ((UNLIKELY(remaining_len != 0 && | 691 | 1.23k | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 248 | *result = PARSE_FAILURE; | 693 | 248 | return 0; | 694 | 248 | } | 695 | 1.23k | } | 696 | 982 | *result = PARSE_SUCCESS; | 697 | 1.23k | return val; | 698 | 1.23k | } | 699 | 70.5k | } | 700 | 61.7k | *result = PARSE_SUCCESS; | 701 | 61.7k | return val; | 702 | 63.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 1.47k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 1.47k | T val = 0; | 663 | 1.47k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 1.47k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 1.03k | val = s[0] - '0'; | 670 | 1.03k | } else { | 671 | 434 | *result = PARSE_FAILURE; | 672 | 434 | return 0; | 673 | 434 | } | 674 | 3.03k | for (int i = 1; i < len; ++i) { | 675 | 2.91k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 1.99k | T digit = s[i] - '0'; | 677 | 1.99k | val = val * 10 + digit; | 678 | 1.99k | } else { | 679 | 912 | if constexpr (enable_strict_mode) { | 680 | 912 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | 800 | *result = PARSE_FAILURE; | 682 | 800 | return 0; | 683 | 800 | } | 684 | | } else { | 685 | | // Save original position where non-digit was found | 686 | | int remaining_len = len - i; | 687 | | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | | if ((UNLIKELY(remaining_len != 0 && | 691 | | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | | *result = PARSE_FAILURE; | 693 | | return 0; | 694 | | } | 695 | | } | 696 | 112 | *result = PARSE_SUCCESS; | 697 | 912 | return val; | 698 | 912 | } | 699 | 2.91k | } | 700 | 124 | *result = PARSE_SUCCESS; | 701 | 124 | return val; | 702 | 1.03k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 1.50k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 1.50k | T val = 0; | 663 | 1.50k | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 1.50k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 1.02k | val = s[0] - '0'; | 670 | 1.02k | } else { | 671 | 480 | *result = PARSE_FAILURE; | 672 | 480 | return 0; | 673 | 480 | } | 674 | 2.99k | for (int i = 1; i < len; ++i) { | 675 | 2.88k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 1.97k | T digit = s[i] - '0'; | 677 | 1.97k | val = val * 10 + digit; | 678 | 1.97k | } else { | 679 | 912 | if constexpr (enable_strict_mode) { | 680 | 912 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | 800 | *result = PARSE_FAILURE; | 682 | 800 | return 0; | 683 | 800 | } | 684 | | } else { | 685 | | // Save original position where non-digit was found | 686 | | int remaining_len = len - i; | 687 | | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | | if ((UNLIKELY(remaining_len != 0 && | 691 | | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | | *result = PARSE_FAILURE; | 693 | | return 0; | 694 | | } | 695 | | } | 696 | 112 | *result = PARSE_SUCCESS; | 697 | 912 | return val; | 698 | 912 | } | 699 | 2.88k | } | 700 | 112 | *result = PARSE_SUCCESS; | 701 | 112 | return val; | 702 | 1.02k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 661 | 8 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 662 | 8 | T val = 0; | 663 | 8 | if (UNLIKELY(len == 0)) { | 664 | 0 | *result = PARSE_SUCCESS; | 665 | 0 | return val; | 666 | 0 | } | 667 | | // Factor out the first char for error handling speeds up the loop. | 668 | 8 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 669 | 8 | val = s[0] - '0'; | 670 | 8 | } else { | 671 | 0 | *result = PARSE_FAILURE; | 672 | 0 | return 0; | 673 | 0 | } | 674 | 8 | for (int i = 1; i < len; ++i) { | 675 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 676 | 0 | T digit = s[i] - '0'; | 677 | 0 | val = val * 10 + digit; | 678 | 0 | } else { | 679 | | if constexpr (enable_strict_mode) { | 680 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 681 | | *result = PARSE_FAILURE; | 682 | | return 0; | 683 | | } | 684 | 0 | } else { | 685 | | // Save original position where non-digit was found | 686 | 0 | int remaining_len = len - i; | 687 | 0 | const char* remaining_s = s + i; | 688 | | // Skip trailing whitespaces from the remaining portion | 689 | 0 | remaining_s = skip_trailing_whitespaces(remaining_s, remaining_len); | 690 | 0 | if ((UNLIKELY(remaining_len != 0 && | 691 | 0 | !is_float_suffix(remaining_s, remaining_len)))) { | 692 | 0 | *result = PARSE_FAILURE; | 693 | 0 | return 0; | 694 | 0 | } | 695 | 0 | } | 696 | 0 | *result = PARSE_SUCCESS; | 697 | 0 | return val; | 698 | 0 | } | 699 | 0 | } | 700 | 8 | *result = PARSE_SUCCESS; | 701 | 8 | return val; | 702 | 8 | } |
|
703 | | |
704 | | // at least the first char(if any) must be a digit. |
705 | | template <typename T> |
706 | | T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
707 | 272k | ParseResult* result) { |
708 | 272k | T val = 0; |
709 | 272k | if (max_len == 0) [[unlikely]] { |
710 | 270k | *result = PARSE_SUCCESS; |
711 | 270k | return val; |
712 | 270k | } |
713 | | // Factor out the first char for error handling speeds up the loop. |
714 | 2.28k | if (is_numeric_ascii(s[0])) [[likely]] { |
715 | 2.28k | val = s[0] - '0'; |
716 | 2.28k | } else { |
717 | 0 | *result = PARSE_FAILURE; |
718 | 0 | return 0; |
719 | 0 | } |
720 | 10.2k | for (int i = 1; i < max_len; ++i) { |
721 | 7.99k | if (is_numeric_ascii(s[i])) [[likely]] { |
722 | 7.99k | T digit = s[i] - '0'; |
723 | 7.99k | val = val * 10 + digit; |
724 | 7.99k | } else { |
725 | | // 123abc, return 123 |
726 | 0 | *result = PARSE_SUCCESS; |
727 | 0 | return val; |
728 | 0 | } |
729 | 7.99k | } |
730 | 2.28k | *result = PARSE_SUCCESS; |
731 | 2.28k | return val; |
732 | 2.28k | } |
733 | | |
734 | | template <typename T> |
735 | 305k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
736 | 305k | int i = 0; |
737 | | // skip leading spaces |
738 | 305k | for (; i < len; ++i) { |
739 | 305k | if (!is_whitespace_ascii(s[i])) { |
740 | 305k | break; |
741 | 305k | } |
742 | 305k | } |
743 | | |
744 | | // skip back spaces |
745 | 305k | int j = len - 1; |
746 | 305k | for (; j >= i; j--) { |
747 | 305k | if (!is_whitespace_ascii(s[j])) { |
748 | 305k | break; |
749 | 305k | } |
750 | 305k | } |
751 | | |
752 | | // skip leading '+', from_chars can handle '-' |
753 | 305k | if (i < len && s[i] == '+') { |
754 | 14.1k | i++; |
755 | | // ++ or +- are not valid, but the first + is already skipped, |
756 | | // if don't check here, from_chars will succeed. |
757 | | // |
758 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' |
759 | | // which may avoid this extra check here. |
760 | | // e.g.: |
761 | | // fast_float::chars_format format = |
762 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; |
763 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); |
764 | 14.1k | if (i < len && (s[i] == '+' || s[i] == '-')) { |
765 | 40 | *result = PARSE_FAILURE; |
766 | 40 | return 0; |
767 | 40 | } |
768 | 14.1k | } |
769 | 305k | if (UNLIKELY(i > j)) { |
770 | 64 | *result = PARSE_FAILURE; |
771 | 64 | return 0; |
772 | 64 | } |
773 | | |
774 | | // Use double here to not lose precision while accumulating the result |
775 | 305k | double val = 0; |
776 | 305k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
777 | | |
778 | 305k | if (res.ptr == s + j + 1) { |
779 | 296k | *result = PARSE_SUCCESS; |
780 | 296k | return val; |
781 | 296k | } else { |
782 | 9.22k | *result = PARSE_FAILURE; |
783 | 9.22k | } |
784 | 9.22k | return 0; |
785 | 305k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 735 | 175k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 736 | 175k | int i = 0; | 737 | | // skip leading spaces | 738 | 175k | for (; i < len; ++i) { | 739 | 175k | if (!is_whitespace_ascii(s[i])) { | 740 | 175k | break; | 741 | 175k | } | 742 | 175k | } | 743 | | | 744 | | // skip back spaces | 745 | 175k | int j = len - 1; | 746 | 175k | for (; j >= i; j--) { | 747 | 175k | if (!is_whitespace_ascii(s[j])) { | 748 | 175k | break; | 749 | 175k | } | 750 | 175k | } | 751 | | | 752 | | // skip leading '+', from_chars can handle '-' | 753 | 175k | if (i < len && s[i] == '+') { | 754 | 7.08k | i++; | 755 | | // ++ or +- are not valid, but the first + is already skipped, | 756 | | // if don't check here, from_chars will succeed. | 757 | | // | 758 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 759 | | // which may avoid this extra check here. | 760 | | // e.g.: | 761 | | // fast_float::chars_format format = | 762 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 763 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 764 | 7.08k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 765 | 20 | *result = PARSE_FAILURE; | 766 | 20 | return 0; | 767 | 20 | } | 768 | 7.08k | } | 769 | 175k | if (UNLIKELY(i > j)) { | 770 | 36 | *result = PARSE_FAILURE; | 771 | 36 | return 0; | 772 | 36 | } | 773 | | | 774 | | // Use double here to not lose precision while accumulating the result | 775 | 175k | double val = 0; | 776 | 175k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 777 | | | 778 | 175k | if (res.ptr == s + j + 1) { | 779 | 170k | *result = PARSE_SUCCESS; | 780 | 170k | return val; | 781 | 170k | } else { | 782 | 4.65k | *result = PARSE_FAILURE; | 783 | 4.65k | } | 784 | 4.65k | return 0; | 785 | 175k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 735 | 130k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 736 | 130k | int i = 0; | 737 | | // skip leading spaces | 738 | 130k | for (; i < len; ++i) { | 739 | 130k | if (!is_whitespace_ascii(s[i])) { | 740 | 130k | break; | 741 | 130k | } | 742 | 130k | } | 743 | | | 744 | | // skip back spaces | 745 | 130k | int j = len - 1; | 746 | 130k | for (; j >= i; j--) { | 747 | 130k | if (!is_whitespace_ascii(s[j])) { | 748 | 130k | break; | 749 | 130k | } | 750 | 130k | } | 751 | | | 752 | | // skip leading '+', from_chars can handle '-' | 753 | 130k | if (i < len && s[i] == '+') { | 754 | 7.08k | i++; | 755 | | // ++ or +- are not valid, but the first + is already skipped, | 756 | | // if don't check here, from_chars will succeed. | 757 | | // | 758 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 759 | | // which may avoid this extra check here. | 760 | | // e.g.: | 761 | | // fast_float::chars_format format = | 762 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 763 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 764 | 7.08k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 765 | 20 | *result = PARSE_FAILURE; | 766 | 20 | return 0; | 767 | 20 | } | 768 | 7.08k | } | 769 | 130k | if (UNLIKELY(i > j)) { | 770 | 28 | *result = PARSE_FAILURE; | 771 | 28 | return 0; | 772 | 28 | } | 773 | | | 774 | | // Use double here to not lose precision while accumulating the result | 775 | 130k | double val = 0; | 776 | 130k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 777 | | | 778 | 130k | if (res.ptr == s + j + 1) { | 779 | 125k | *result = PARSE_SUCCESS; | 780 | 125k | return val; | 781 | 125k | } else { | 782 | 4.57k | *result = PARSE_FAILURE; | 783 | 4.57k | } | 784 | 4.57k | return 0; | 785 | 130k | } |
|
786 | | |
787 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
788 | 22.7k | ParseResult* result) { |
789 | 22.7k | *result = PARSE_SUCCESS; |
790 | | |
791 | 22.7k | if (len == 1) { |
792 | 5.32k | if (s[0] == '1' || s[0] == 't' || s[0] == 'T') { |
793 | 666 | return true; |
794 | 666 | } |
795 | 4.65k | if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') { |
796 | 1.86k | return false; |
797 | 1.86k | } |
798 | 2.78k | *result = PARSE_FAILURE; |
799 | 2.78k | return false; |
800 | 4.65k | } |
801 | | |
802 | 17.4k | if (len == 2) { |
803 | 1.95k | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { |
804 | 20 | return true; |
805 | 20 | } |
806 | 1.93k | if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) { |
807 | 18 | return false; |
808 | 18 | } |
809 | 1.93k | } |
810 | | |
811 | 17.3k | if (len == 3) { |
812 | 84 | if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') && |
813 | 84 | (s[2] == 's' || s[2] == 'S')) { |
814 | 20 | return true; |
815 | 20 | } |
816 | 64 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') && |
817 | 64 | (s[2] == 'f' || s[2] == 'F')) { |
818 | 18 | return false; |
819 | 18 | } |
820 | 64 | } |
821 | | |
822 | 17.3k | if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') && |
823 | 17.3k | (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) { |
824 | 6.76k | return true; |
825 | 6.76k | } |
826 | | |
827 | 10.5k | if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') && |
828 | 10.5k | (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') && |
829 | 10.5k | (s[4] == 'e' || s[4] == 'E')) { |
830 | 6.85k | return false; |
831 | 6.85k | } |
832 | | |
833 | | // No valid boolean value found |
834 | 3.74k | *result = PARSE_FAILURE; |
835 | 3.74k | return false; |
836 | 10.5k | } |
837 | | #include "common/compile_check_avoid_end.h" |
838 | | } // end namespace doris |