/root/doris/be/src/util/string_parser.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | #include <sys/types.h> |
27 | | |
28 | | #include <algorithm> |
29 | | #include <cstdlib> |
30 | | // IWYU pragma: no_include <bits/std_abs.h> |
31 | | #include <cmath> // IWYU pragma: keep |
32 | | #include <cstdint> |
33 | | #include <limits> |
34 | | #include <map> |
35 | | #include <string> |
36 | | #include <type_traits> |
37 | | #include <utility> |
38 | | |
39 | | #include "common/compiler_util.h" // IWYU pragma: keep |
40 | | #include "common/status.h" |
41 | | #include "runtime/large_int_value.h" |
42 | | #include "runtime/primitive_type.h" |
43 | | #include "vec/common/int_exp.h" |
44 | | #include "vec/common/string_utils/string_utils.h" |
45 | | #include "vec/core/extended_types.h" |
46 | | #include "vec/data_types/number_traits.h" |
47 | | |
48 | | namespace doris { |
49 | | #include "common/compile_check_avoid_begin.h" |
50 | | namespace vectorized { |
51 | | template <DecimalNativeTypeConcept T> |
52 | | struct Decimal; |
53 | | } // namespace vectorized |
54 | | |
55 | | // they rely on the template parameter `IS_STRICT`. in strict mode, it will set error code and otherwise it will not. |
56 | | #ifndef SET_PARAMS_RET_FALSE_IFN |
57 | | #define SET_PARAMS_RET_FALSE_IFN(stmt, ...) \ |
58 | 6.44k | do { \ |
59 | 6.44k | if (!(stmt)) [[unlikely]] { \ |
60 | 292 | if constexpr (IsStrict) { \ |
61 | 127 | params.status = Status::InvalidArgument(__VA_ARGS__); \ |
62 | 127 | } \ |
63 | 292 | return false; \ |
64 | 292 | } \ |
65 | 6.44k | } while (false) |
66 | | #endif |
67 | | |
68 | | #ifndef SET_PARAMS_RET_FALSE_FROM_EXCEPTION |
69 | | #define SET_PARAMS_RET_FALSE_FROM_EXCEPTION(stmt) \ |
70 | 155 | do { \ |
71 | 155 | try { \ |
72 | 155 | { stmt; } \ |
73 | 155 | } catch (const doris::Exception& e) { \ |
74 | 15 | if constexpr (IsStrict) { \ |
75 | 5 | params.status = e.to_status(); \ |
76 | 5 | } \ |
77 | 15 | return false; \ |
78 | 15 | } \ |
79 | 155 | } while (false) |
80 | | #endif |
81 | | |
82 | | // skip leading and trailing ascii whitespaces, |
83 | | // return the pointer to the first non-whitespace char, |
84 | | // and update the len to the new length, which does not include |
85 | | // leading and trailing whitespaces |
86 | | template <typename T> |
87 | 908k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { |
88 | 1.48M | while (len > 0 && is_whitespace_ascii(*s)) { |
89 | 573k | ++s; |
90 | 573k | --len; |
91 | 573k | } |
92 | | |
93 | 1.47M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
94 | 563k | --len; |
95 | 563k | } |
96 | | |
97 | 908k | return s; |
98 | 908k | } _ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_ Line | Count | Source | 87 | 879k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 1.37M | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 497k | ++s; | 90 | 497k | --len; | 91 | 497k | } | 92 | | | 93 | 1.36M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 488k | --len; | 95 | 488k | } | 96 | | | 97 | 879k | return s; | 98 | 879k | } |
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_ Line | Count | Source | 87 | 1.37k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 4.90k | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 3.52k | ++s; | 90 | 3.52k | --len; | 91 | 3.52k | } | 92 | | | 93 | 4.90k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 3.52k | --len; | 95 | 3.52k | } | 96 | | | 97 | 1.37k | return s; | 98 | 1.37k | } |
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_ Line | Count | Source | 87 | 27.8k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 88 | 100k | while (len > 0 && is_whitespace_ascii(*s)) { | 89 | 72.4k | ++s; | 90 | 72.4k | --len; | 91 | 72.4k | } | 92 | | | 93 | 99.8k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 94 | 72.0k | --len; | 95 | 72.0k | } | 96 | | | 97 | 27.8k | return s; | 98 | 27.8k | } |
|
99 | | |
100 | | template <bool (*Pred)(char)> |
101 | 784 | bool range_suite(const char* s, const char* end) { |
102 | 784 | return std::ranges::all_of(s, end, Pred); |
103 | 784 | } _ZN5doris11range_suiteIXadL_Z16is_numeric_asciicEEEEbPKcS2_ Line | Count | Source | 101 | 708 | bool range_suite(const char* s, const char* end) { | 102 | 708 | return std::ranges::all_of(s, end, Pred); | 103 | 708 | } |
_ZN5doris11range_suiteIXadL_Z19is_whitespace_asciicEEEEbPKcS2_ Line | Count | Source | 101 | 76 | bool range_suite(const char* s, const char* end) { | 102 | 76 | return std::ranges::all_of(s, end, Pred); | 103 | 76 | } |
|
104 | | |
105 | | inline auto is_digit_range = range_suite<is_numeric_ascii>; |
106 | | inline auto is_space_range = range_suite<is_whitespace_ascii>; |
107 | | |
108 | | // combine in_bound and range_suite is ok. won't lead to duplicated calculation. |
109 | 791 | inline bool in_bound(const char* s, const char* end, size_t offset) { |
110 | 791 | if (s + offset >= end) [[unlikely]] { |
111 | 42 | return false; |
112 | 42 | } |
113 | 749 | return true; |
114 | 791 | } |
115 | | |
116 | | // LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more. |
117 | | // if need result, use StringRef{origin_s, s} outside |
118 | | template <int LEN, bool (*Pred)(char)> |
119 | 2.03k | bool skip_qualified_char(const char*& s, const char* end) { |
120 | 2.03k | if constexpr (LEN == 0) { |
121 | | // Consume any length of characters that match the predicate. |
122 | 2.26k | while (s != end && Pred(*s)) { |
123 | 1.34k | ++s; |
124 | 1.34k | } |
125 | 1.07k | } else if constexpr (LEN > 0) { |
126 | | // Consume exactly LEN characters that match the predicate. |
127 | 1.99k | for (int i = 0; i < LEN; ++i, ++s) { |
128 | 1.07k | if (s == end || !Pred(*s)) [[unlikely]] { |
129 | 157 | return false; |
130 | 157 | } |
131 | 1.07k | } |
132 | 1.07k | } else { // LEN < 0 |
133 | | // Consume at least -LEN characters that match the predicate. |
134 | 48 | int count = 0; |
135 | 314 | while (s != end && Pred(*s)) { |
136 | 266 | ++s; |
137 | 266 | ++count; |
138 | 266 | } |
139 | 48 | if (count < -LEN) [[unlikely]] { |
140 | 0 | return false; |
141 | 0 | } |
142 | 48 | } |
143 | 967 | return true; |
144 | 2.03k | } _ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 119 | 363 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | 363 | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | 395 | while (s != end && Pred(*s)) { | 123 | 32 | ++s; | 124 | 32 | } | 125 | | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | | for (int i = 0; i < LEN; ++i, ++s) { | 128 | | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | | return false; | 130 | | } | 131 | | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 363 | return true; | 144 | 363 | } |
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEEbRPKcS2_ Line | Count | Source | 119 | 552 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | 552 | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | 1.86k | while (s != end && Pred(*s)) { | 123 | 1.31k | ++s; | 124 | 1.31k | } | 125 | | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | | for (int i = 0; i < LEN; ++i, ++s) { | 128 | | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | | return false; | 130 | | } | 131 | | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 552 | return true; | 144 | 552 | } |
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEEbRPKcS2_ Line | Count | Source | 119 | 48 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | | while (s != end && Pred(*s)) { | 123 | | ++s; | 124 | | } | 125 | | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | | for (int i = 0; i < LEN; ++i, ++s) { | 128 | | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | | return false; | 130 | | } | 131 | | } | 132 | 48 | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | 48 | int count = 0; | 135 | 314 | while (s != end && Pred(*s)) { | 136 | 266 | ++s; | 137 | 266 | ++count; | 138 | 266 | } | 139 | 48 | if (count < -LEN) [[unlikely]] { | 140 | 0 | return false; | 141 | 0 | } | 142 | 48 | } | 143 | 48 | return true; | 144 | 48 | } |
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEEbRPKcS2_ _ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEEbRPKcS2_ Line | Count | Source | 119 | 282 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | | while (s != end && Pred(*s)) { | 123 | | ++s; | 124 | | } | 125 | 282 | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | 540 | for (int i = 0; i < LEN; ++i, ++s) { | 128 | 282 | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | 24 | return false; | 130 | 24 | } | 131 | 282 | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 258 | return true; | 144 | 282 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEEbRPKcS2_ Line | Count | Source | 119 | 225 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | | while (s != end && Pred(*s)) { | 123 | | ++s; | 124 | | } | 125 | 225 | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | 412 | for (int i = 0; i < LEN; ++i, ++s) { | 128 | 225 | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | 38 | return false; | 130 | 38 | } | 131 | 225 | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 187 | return true; | 144 | 225 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_6is_barEcEEEEbRPKcS2_ Line | Count | Source | 119 | 407 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | | while (s != end && Pred(*s)) { | 123 | | ++s; | 124 | | } | 125 | 407 | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | 743 | for (int i = 0; i < LEN; ++i, ++s) { | 128 | 407 | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | 71 | return false; | 130 | 71 | } | 131 | 407 | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 336 | return true; | 144 | 407 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEEbRPKcS2_ Line | Count | Source | 119 | 162 | bool skip_qualified_char(const char*& s, const char* end) { | 120 | | if constexpr (LEN == 0) { | 121 | | // Consume any length of characters that match the predicate. | 122 | | while (s != end && Pred(*s)) { | 123 | | ++s; | 124 | | } | 125 | 162 | } else if constexpr (LEN > 0) { | 126 | | // Consume exactly LEN characters that match the predicate. | 127 | 300 | for (int i = 0; i < LEN; ++i, ++s) { | 128 | 162 | if (s == end || !Pred(*s)) [[unlikely]] { | 129 | 24 | return false; | 130 | 24 | } | 131 | 162 | } | 132 | | } else { // LEN < 0 | 133 | | // Consume at least -LEN characters that match the predicate. | 134 | | int count = 0; | 135 | | while (s != end && Pred(*s)) { | 136 | | ++s; | 137 | | ++count; | 138 | | } | 139 | | if (count < -LEN) [[unlikely]] { | 140 | | return false; | 141 | | } | 142 | | } | 143 | 138 | return true; | 144 | 162 | } |
|
145 | | |
146 | | inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>; |
147 | | inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>; |
148 | | inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>; |
149 | | inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>; |
150 | | inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>; |
151 | | |
152 | 225 | inline bool is_delimiter(char c) { |
153 | 225 | return c == ' ' || c == 'T' || c == ':'; |
154 | 225 | } |
155 | | inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>; |
156 | | |
157 | 407 | inline bool is_bar(char c) { |
158 | 407 | return c == '-'; |
159 | 407 | } |
160 | | inline auto consume_one_bar = skip_qualified_char<1, is_bar>; |
161 | | |
162 | 162 | inline bool is_colon(char c) { |
163 | 162 | return c == ':'; |
164 | 162 | } |
165 | | inline auto consume_one_colon = skip_qualified_char<1, is_colon>; |
166 | | |
167 | | // only consume a string of digit, not include sign. |
168 | | // when has MAX_LEN > 0, do greedy match but at most MAX_LEN. |
169 | | // LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits. |
170 | | template <typename T, int LEN = 0, int MAX_LEN = -1> |
171 | 15 | bool consume_digit(const char*& s, const char* end, T& out) { |
172 | 15 | static_assert(LEN >= 0); |
173 | | if constexpr (MAX_LEN > 0) { |
174 | | out = 0; |
175 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { |
176 | | if (s == end || !is_numeric_ascii(*s)) { |
177 | | if (i < LEN) [[unlikely]] { |
178 | | return false; |
179 | | } |
180 | | break; // stop consuming if we have consumed enough digits. |
181 | | } |
182 | | out = out * 10 + (*s - '0'); |
183 | | } |
184 | | } else if constexpr (LEN == 0) { |
185 | | // Consume any length of digits. |
186 | | out = 0; |
187 | | while (s != end && is_numeric_ascii(*s)) { |
188 | | out = out * 10 + (*s - '0'); |
189 | | ++s; |
190 | | } |
191 | 15 | } else if constexpr (LEN > 0) { |
192 | | // Consume exactly LEN digits. |
193 | 15 | out = 0; |
194 | 60 | for (int i = 0; i < LEN; ++i, ++s) { |
195 | 45 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
196 | 0 | return false; |
197 | 0 | } |
198 | 45 | out = out * 10 + (*s - '0'); |
199 | 45 | } |
200 | 15 | } |
201 | 15 | return true; |
202 | 15 | } _ZN5doris13consume_digitIjLi4ELin1EEEbRPKcS2_RT_ Line | Count | Source | 171 | 10 | bool consume_digit(const char*& s, const char* end, T& out) { | 172 | 10 | static_assert(LEN >= 0); | 173 | | if constexpr (MAX_LEN > 0) { | 174 | | out = 0; | 175 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 176 | | if (s == end || !is_numeric_ascii(*s)) { | 177 | | if (i < LEN) [[unlikely]] { | 178 | | return false; | 179 | | } | 180 | | break; // stop consuming if we have consumed enough digits. | 181 | | } | 182 | | out = out * 10 + (*s - '0'); | 183 | | } | 184 | | } else if constexpr (LEN == 0) { | 185 | | // Consume any length of digits. | 186 | | out = 0; | 187 | | while (s != end && is_numeric_ascii(*s)) { | 188 | | out = out * 10 + (*s - '0'); | 189 | | ++s; | 190 | | } | 191 | 10 | } else if constexpr (LEN > 0) { | 192 | | // Consume exactly LEN digits. | 193 | 10 | out = 0; | 194 | 50 | for (int i = 0; i < LEN; ++i, ++s) { | 195 | 40 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 196 | 0 | return false; | 197 | 0 | } | 198 | 40 | out = out * 10 + (*s - '0'); | 199 | 40 | } | 200 | 10 | } | 201 | 10 | return true; | 202 | 10 | } |
_ZN5doris13consume_digitIjLi1ELin1EEEbRPKcS2_RT_ Line | Count | Source | 171 | 5 | bool consume_digit(const char*& s, const char* end, T& out) { | 172 | 5 | static_assert(LEN >= 0); | 173 | | if constexpr (MAX_LEN > 0) { | 174 | | out = 0; | 175 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 176 | | if (s == end || !is_numeric_ascii(*s)) { | 177 | | if (i < LEN) [[unlikely]] { | 178 | | return false; | 179 | | } | 180 | | break; // stop consuming if we have consumed enough digits. | 181 | | } | 182 | | out = out * 10 + (*s - '0'); | 183 | | } | 184 | | } else if constexpr (LEN == 0) { | 185 | | // Consume any length of digits. | 186 | | out = 0; | 187 | | while (s != end && is_numeric_ascii(*s)) { | 188 | | out = out * 10 + (*s - '0'); | 189 | | ++s; | 190 | | } | 191 | 5 | } else if constexpr (LEN > 0) { | 192 | | // Consume exactly LEN digits. | 193 | 5 | out = 0; | 194 | 10 | for (int i = 0; i < LEN; ++i, ++s) { | 195 | 5 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 196 | 0 | return false; | 197 | 0 | } | 198 | 5 | out = out * 10 + (*s - '0'); | 199 | 5 | } | 200 | 5 | } | 201 | 5 | return true; | 202 | 5 | } |
|
203 | | |
204 | | // specialized version for 2 digits, which is used very often in date/time parsing. |
205 | | template <> |
206 | 1.03k | inline bool consume_digit<uint32_t, 2, -1>(const char*& s, const char* end, uint32_t& out) { |
207 | 1.03k | out = 0; |
208 | 1.03k | if (s == end || s + 1 == end || !is_numeric_ascii(*s) || !is_numeric_ascii(*(s + 1))) |
209 | 46 | [[unlikely]] { |
210 | 46 | return false; |
211 | 46 | } |
212 | 986 | out = (s[0] - '0') * 10 + (s[1] - '0'); |
213 | 986 | s += 2; // consume 2 digits |
214 | 986 | return true; |
215 | 1.03k | } |
216 | | |
217 | | // specialized version for 1 or 2 digits, which is used very often in date/time parsing. |
218 | | template <> |
219 | 1.15k | inline bool consume_digit<uint32_t, 1, 2>(const char*& s, const char* end, uint32_t& out) { |
220 | 1.15k | out = 0; |
221 | 1.15k | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
222 | 32 | return false; |
223 | 1.12k | } else if (s + 1 != end && is_numeric_ascii(*(s + 1))) { |
224 | | // consume 2 digits |
225 | 1.00k | out = (*s - '0') * 10 + (*(s + 1) - '0'); |
226 | 1.00k | s += 2; |
227 | 1.00k | } else { |
228 | | // consume 1 digit |
229 | 121 | out = *s - '0'; |
230 | 121 | ++s; |
231 | 121 | } |
232 | 1.12k | return true; |
233 | 1.15k | } |
234 | | |
235 | | template <bool (*Pred)(char)> |
236 | 48 | uint32_t count_valid_length(const char* s, const char* end) { |
237 | 48 | DCHECK(s <= end) << "s: " << s << ", end: " << end; |
238 | 48 | uint32_t count = 0; |
239 | 149 | while (s != end && Pred(*s)) { |
240 | 101 | ++count; |
241 | 101 | ++s; |
242 | 101 | } |
243 | 48 | return count; |
244 | 48 | } |
245 | | |
246 | | inline auto count_digits = count_valid_length<is_numeric_ascii>; |
247 | | |
248 | 36 | inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { |
249 | 36 | std::string result(6, '0'); |
250 | 36 | result[0] = sign; |
251 | 36 | result[1] = '0' + (hour_offset / 10); |
252 | 36 | result[2] = '0' + (hour_offset % 10); |
253 | 36 | result[3] = ':'; |
254 | 36 | result[4] = '0' + (minute_offset / 10); |
255 | 36 | result[5] = '0' + (minute_offset % 10); |
256 | 36 | DCHECK_EQ(result.size(), 6); |
257 | 36 | return result; |
258 | 36 | } |
259 | | |
260 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
261 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
262 | | // |
263 | | // Strings with leading and trailing whitespaces are accepted. |
264 | | // Branching is heavily optimized for the non-whitespace successful case. |
265 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
266 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
267 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
268 | | // |
269 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
270 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
271 | | // and inf/-inf for float types. |
272 | | // |
273 | | // Things we tried that did not work: |
274 | | // - lookup table for converting character to digit |
275 | | // Improvements (TODO): |
276 | | // - Validate input using _simd_compare_ranges |
277 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
278 | | class StringParser { |
279 | | public: |
280 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
281 | | |
282 | | template <typename T> |
283 | 399k | static T numeric_limits(bool negative) { |
284 | 399k | if constexpr (std::is_same_v<T, __int128>) { |
285 | 46.8k | return negative ? MIN_INT128 : MAX_INT128; |
286 | 352k | } else { |
287 | 352k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
288 | 352k | } |
289 | 399k | } _ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 283 | 46.8k | static T numeric_limits(bool negative) { | 284 | 46.8k | if constexpr (std::is_same_v<T, __int128>) { | 285 | 46.8k | return negative ? MIN_INT128 : MAX_INT128; | 286 | | } else { | 287 | | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | | } | 289 | 46.8k | } |
_ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 283 | 136k | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 136k | } else { | 287 | 136k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 136k | } | 289 | 136k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 283 | 69.0k | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 69.0k | } else { | 287 | 69.0k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 69.0k | } | 289 | 69.0k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 283 | 62.8k | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 62.8k | } else { | 287 | 62.8k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 62.8k | } | 289 | 62.8k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 283 | 83.7k | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 83.7k | } else { | 287 | 83.7k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 83.7k | } | 289 | 83.7k | } |
_ZN5doris12StringParser14numeric_limitsIjEET_b Line | Count | Source | 283 | 145 | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 145 | } else { | 287 | 145 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 145 | } | 289 | 145 | } |
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Line | Count | Source | 283 | 4 | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 4 | } else { | 287 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 4 | } | 289 | 4 | } |
_ZN5doris12StringParser14numeric_limitsIoEET_b Line | Count | Source | 283 | 4 | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 4 | } else { | 287 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 4 | } | 289 | 4 | } |
_ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 283 | 21 | static T numeric_limits(bool negative) { | 284 | | if constexpr (std::is_same_v<T, __int128>) { | 285 | | return negative ? MIN_INT128 : MAX_INT128; | 286 | 21 | } else { | 287 | 21 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 288 | 21 | } | 289 | 21 | } |
|
290 | | |
291 | | template <typename T> |
292 | 943k | static T get_scale_multiplier(int scale) { |
293 | 943k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
294 | 943k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
295 | 943k | "You can only instantiate as int32_t, int64_t, __int128."); |
296 | 943k | if constexpr (std::is_same_v<T, int32_t>) { |
297 | 133k | return common::exp10_i32(scale); |
298 | 183k | } else if constexpr (std::is_same_v<T, int64_t>) { |
299 | 183k | return common::exp10_i64(scale); |
300 | 239k | } else if constexpr (std::is_same_v<T, __int128>) { |
301 | 239k | return common::exp10_i128(scale); |
302 | 386k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
303 | 386k | return common::exp10_i256(scale); |
304 | 386k | } |
305 | 943k | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 292 | 133k | static T get_scale_multiplier(int scale) { | 293 | 133k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 294 | 133k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 295 | 133k | "You can only instantiate as int32_t, int64_t, __int128."); | 296 | 133k | if constexpr (std::is_same_v<T, int32_t>) { | 297 | 133k | return common::exp10_i32(scale); | 298 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 299 | | return common::exp10_i64(scale); | 300 | | } else if constexpr (std::is_same_v<T, __int128>) { | 301 | | return common::exp10_i128(scale); | 302 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 303 | | return common::exp10_i256(scale); | 304 | | } | 305 | 133k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 292 | 183k | static T get_scale_multiplier(int scale) { | 293 | 183k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 294 | 183k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 295 | 183k | "You can only instantiate as int32_t, int64_t, __int128."); | 296 | | if constexpr (std::is_same_v<T, int32_t>) { | 297 | | return common::exp10_i32(scale); | 298 | 183k | } else if constexpr (std::is_same_v<T, int64_t>) { | 299 | 183k | return common::exp10_i64(scale); | 300 | | } else if constexpr (std::is_same_v<T, __int128>) { | 301 | | return common::exp10_i128(scale); | 302 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 303 | | return common::exp10_i256(scale); | 304 | | } | 305 | 183k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 292 | 239k | static T get_scale_multiplier(int scale) { | 293 | 239k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 294 | 239k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 295 | 239k | "You can only instantiate as int32_t, int64_t, __int128."); | 296 | | if constexpr (std::is_same_v<T, int32_t>) { | 297 | | return common::exp10_i32(scale); | 298 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 299 | | return common::exp10_i64(scale); | 300 | 239k | } else if constexpr (std::is_same_v<T, __int128>) { | 301 | 239k | return common::exp10_i128(scale); | 302 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 303 | | return common::exp10_i256(scale); | 304 | | } | 305 | 239k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 292 | 386k | static T get_scale_multiplier(int scale) { | 293 | 386k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 294 | 386k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 295 | 386k | "You can only instantiate as int32_t, int64_t, __int128."); | 296 | | if constexpr (std::is_same_v<T, int32_t>) { | 297 | | return common::exp10_i32(scale); | 298 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 299 | | return common::exp10_i64(scale); | 300 | | } else if constexpr (std::is_same_v<T, __int128>) { | 301 | | return common::exp10_i128(scale); | 302 | 386k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 303 | 386k | return common::exp10_i256(scale); | 304 | 386k | } | 305 | 386k | } |
|
306 | | |
307 | | // This is considerably faster than glibc's implementation (25x). |
308 | | // Assumes s represents a decimal number. |
309 | | template <typename T, bool enable_strict_mode = false> |
310 | 359k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
311 | 359k | s = skip_ascii_whitespaces(s, len); |
312 | 359k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); |
313 | 359k | } _ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 45.9k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 45.9k | s = skip_ascii_whitespaces(s, len); | 312 | 45.9k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 45.9k | } |
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 96.5k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 96.5k | s = skip_ascii_whitespaces(s, len); | 312 | 96.5k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 96.5k | } |
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 67.4k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 67.4k | s = skip_ascii_whitespaces(s, len); | 312 | 67.4k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 67.4k | } |
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 62.3k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 62.3k | s = skip_ascii_whitespaces(s, len); | 312 | 62.3k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 62.3k | } |
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 82.2k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 82.2k | s = skip_ascii_whitespaces(s, len); | 312 | 82.2k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 82.2k | } |
_ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 1.00k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 1.00k | s = skip_ascii_whitespaces(s, len); | 312 | 1.00k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 1.00k | } |
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 984 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 984 | s = skip_ascii_whitespaces(s, len); | 312 | 984 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 984 | } |
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 968 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 968 | s = skip_ascii_whitespaces(s, len); | 312 | 968 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 968 | } |
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 954 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 954 | s = skip_ascii_whitespaces(s, len); | 312 | 954 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 954 | } |
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 936 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 936 | s = skip_ascii_whitespaces(s, len); | 312 | 936 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 936 | } |
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 4 | s = skip_ascii_whitespaces(s, len); | 312 | 4 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 4 | } |
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 4 | s = skip_ascii_whitespaces(s, len); | 312 | 4 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 4 | } |
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 310 | 20 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 311 | 20 | s = skip_ascii_whitespaces(s, len); | 312 | 20 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 313 | 20 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE |
314 | | |
315 | | // This is considerably faster than glibc's implementation. |
316 | | // In the case of overflow, the max/min value for the data type will be returned. |
317 | | // Assumes s represents a decimal number. |
318 | | template <typename T> |
319 | 1.37k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
320 | 1.37k | s = skip_ascii_whitespaces(s, len); |
321 | 1.37k | return string_to_unsigned_int_internal<T>(s, len, result); |
322 | 1.37k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 319 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 320 | 343 | s = skip_ascii_whitespaces(s, len); | 321 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 322 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 319 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 320 | 343 | s = skip_ascii_whitespaces(s, len); | 321 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 322 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 319 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 320 | 343 | s = skip_ascii_whitespaces(s, len); | 321 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 322 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 319 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 320 | 343 | s = skip_ascii_whitespaces(s, len); | 321 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 322 | 343 | } |
|
323 | | |
324 | | // Convert a string s representing a number in given base into a decimal number. |
325 | | template <typename T> |
326 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
327 | 27.8k | ParseResult* result) { |
328 | 27.8k | s = skip_ascii_whitespaces(s, len); |
329 | 27.8k | return string_to_int_internal<T>(s, len, base, result); |
330 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 327 | 26.4k | ParseResult* result) { | 328 | 26.4k | s = skip_ascii_whitespaces(s, len); | 329 | 26.4k | return string_to_int_internal<T>(s, len, base, result); | 330 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 327 | 490 | ParseResult* result) { | 328 | 490 | s = skip_ascii_whitespaces(s, len); | 329 | 490 | return string_to_int_internal<T>(s, len, base, result); | 330 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 327 | 441 | ParseResult* result) { | 328 | 441 | s = skip_ascii_whitespaces(s, len); | 329 | 441 | return string_to_int_internal<T>(s, len, base, result); | 330 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 327 | 441 | ParseResult* result) { | 328 | 441 | s = skip_ascii_whitespaces(s, len); | 329 | 441 | return string_to_int_internal<T>(s, len, base, result); | 330 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 327 | 1 | ParseResult* result) { | 328 | 1 | s = skip_ascii_whitespaces(s, len); | 329 | 1 | return string_to_int_internal<T>(s, len, base, result); | 330 | 1 | } |
|
331 | | |
332 | | template <typename T> |
333 | 154k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
334 | 154k | s = skip_ascii_whitespaces(s, len); |
335 | 154k | return string_to_float_internal<T>(s, len, result); |
336 | 154k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 88.5k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 88.5k | s = skip_ascii_whitespaces(s, len); | 335 | 88.5k | return string_to_float_internal<T>(s, len, result); | 336 | 88.5k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 333 | 65.9k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 334 | 65.9k | s = skip_ascii_whitespaces(s, len); | 335 | 65.9k | return string_to_float_internal<T>(s, len, result); | 336 | 65.9k | } |
|
337 | | |
338 | | // Parses a string for 'true' or 'false', case insensitive. |
339 | 11.3k | static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) { |
340 | 11.3k | s = skip_ascii_whitespaces(s, len); |
341 | 11.3k | return string_to_bool_internal(s, len, result); |
342 | 11.3k | } |
343 | | |
344 | | template <PrimitiveType P> |
345 | | static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal( |
346 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
347 | | ParseResult* result); |
348 | | |
349 | | template <typename T> |
350 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
351 | | const T key_value_separator, |
352 | | std::map<std::string, std::string>* result) { |
353 | | int key_pos = 0; |
354 | | int key_end; |
355 | | int val_pos; |
356 | | int val_end; |
357 | | |
358 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
359 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
360 | | std::string::npos) { |
361 | | break; |
362 | | } |
363 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
364 | | val_end = base.size(); |
365 | | } |
366 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
367 | | base.substr(val_pos, val_end - val_pos))); |
368 | | key_pos = val_end; |
369 | | if (key_pos != std::string::npos) { |
370 | | ++key_pos; |
371 | | } |
372 | | } |
373 | | |
374 | | return Status::OK(); |
375 | | } |
376 | | |
377 | | // This is considerably faster than glibc's implementation. |
378 | | // In the case of overflow, the max/min value for the data type will be returned. |
379 | | // Assumes s represents a decimal number. |
380 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
381 | | template <typename T, bool enable_strict_mode = false> |
382 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
383 | | |
384 | | // This is considerably faster than glibc's implementation. |
385 | | // In the case of overflow, the max/min value for the data type will be returned. |
386 | | // Assumes s represents a decimal number. |
387 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
388 | | template <typename T> |
389 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
390 | | ParseResult* result); |
391 | | |
392 | | // Convert a string s representing a number in given base into a decimal number. |
393 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
394 | | template <typename T> |
395 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
396 | | ParseResult* result); |
397 | | |
398 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
399 | | // and the number is positive. |
400 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
401 | | template <typename T, bool enable_strict_mode = false> |
402 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
403 | | ParseResult* result); |
404 | | |
405 | | // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next |
406 | | // char is not a digit. |
407 | | template <typename T> |
408 | | static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
409 | | ParseResult* result); |
410 | | |
411 | | // This is considerably faster than glibc's implementation (>100x why???) |
412 | | // No special case handling needs to be done for overflows, the floating point spec |
413 | | // already does it and will cap the values to -inf/inf |
414 | | // To avoid inaccurate conversions this function falls back to strtod for |
415 | | // scientific notation. |
416 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
417 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
418 | | template <typename T> |
419 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
420 | | ParseResult* result); |
421 | | |
422 | | // parses a string for 'true' or 'false', case insensitive |
423 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
424 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
425 | | ParseResult* result); |
426 | | |
427 | | // Returns true if s only contains whitespace. |
428 | 5.46k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
429 | 5.91k | for (int i = 0; i < len; ++i) { |
430 | 5.91k | if (!LIKELY(is_whitespace_ascii(s[i]))) { |
431 | 5.46k | return false; |
432 | 5.46k | } |
433 | 5.91k | } |
434 | 0 | return true; |
435 | 5.46k | } |
436 | | |
437 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
438 | 3.39k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
439 | 3.39k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
440 | 3.39k | } |
441 | | |
442 | 2.61k | static inline bool is_all_digit(const char* __restrict s, int len) { |
443 | 5.33k | for (int i = 0; i < len; ++i) { |
444 | 2.80k | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
445 | 81 | return false; |
446 | 81 | } |
447 | 2.80k | } |
448 | 2.53k | return true; |
449 | 2.61k | } |
450 | | }; // end of class StringParser |
451 | | |
452 | | template <typename T, bool enable_strict_mode> |
453 | 359k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
454 | 359k | if (UNLIKELY(len <= 0)) { |
455 | 1.28k | *result = PARSE_FAILURE; |
456 | 1.28k | return 0; |
457 | 1.28k | } |
458 | | |
459 | 358k | using UnsignedT = MakeUnsignedT<T>; |
460 | 358k | UnsignedT val = 0; |
461 | 358k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
462 | 358k | bool negative = false; |
463 | 358k | int i = 0; |
464 | 358k | switch (*s) { |
465 | 94.0k | case '-': |
466 | 94.0k | negative = true; |
467 | 94.0k | max_val += 1; |
468 | 94.0k | [[fallthrough]]; |
469 | 96.7k | case '+': |
470 | 96.7k | ++i; |
471 | | // only one '+'/'-' char, so could return failure directly |
472 | 96.7k | if (UNLIKELY(len == 1)) { |
473 | 3 | *result = PARSE_FAILURE; |
474 | 3 | return 0; |
475 | 3 | } |
476 | 358k | } |
477 | | |
478 | | // This is the fast path where the string cannot overflow. |
479 | 358k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
480 | 247k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); |
481 | 247k | return static_cast<T>(negative ? -val : val); |
482 | 247k | } |
483 | | |
484 | 110k | const T max_div_10 = max_val / 10; |
485 | 110k | const T max_mod_10 = max_val % 10; |
486 | | |
487 | 110k | int first = i; |
488 | 1.49M | for (; i < len; ++i) { |
489 | 1.42M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
490 | 1.42M | T digit = s[i] - '0'; |
491 | | // This is a tricky check to see if adding this digit will cause an overflow. |
492 | 1.42M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
493 | 30.9k | *result = PARSE_OVERFLOW; |
494 | 30.9k | return negative ? -max_val : max_val; |
495 | 30.9k | } |
496 | 1.38M | val = val * 10 + digit; |
497 | 1.38M | } else { |
498 | 3.72k | if constexpr (enable_strict_mode) { |
499 | 1.10k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
500 | | // Reject the string because the remaining chars are not all whitespace |
501 | 1.10k | *result = PARSE_FAILURE; |
502 | 1.10k | return 0; |
503 | 1.10k | } |
504 | 2.62k | } else { |
505 | 2.62k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && |
506 | 2.62k | !is_float_suffix(s + i, len - i))))) { |
507 | | // Reject the string because either the first char was not a digit, |
508 | | // or the remaining chars are not all whitespace |
509 | 1.61k | *result = PARSE_FAILURE; |
510 | 1.61k | return 0; |
511 | 1.61k | } |
512 | 2.62k | } |
513 | | // Returning here is slightly faster than breaking the loop. |
514 | 1.00k | *result = PARSE_SUCCESS; |
515 | 3.72k | return static_cast<T>(negative ? -val : val); |
516 | 3.72k | } |
517 | 1.42M | } |
518 | 75.9k | *result = PARSE_SUCCESS; |
519 | 75.9k | return static_cast<T>(negative ? -val : val); |
520 | 110k | } _ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 45.9k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 45.9k | if (UNLIKELY(len <= 0)) { | 455 | 25 | *result = PARSE_FAILURE; | 456 | 25 | return 0; | 457 | 25 | } | 458 | | | 459 | 45.9k | using UnsignedT = MakeUnsignedT<T>; | 460 | 45.9k | UnsignedT val = 0; | 461 | 45.9k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 45.9k | bool negative = false; | 463 | 45.9k | int i = 0; | 464 | 45.9k | switch (*s) { | 465 | 3.49k | case '-': | 466 | 3.49k | negative = true; | 467 | 3.49k | max_val += 1; | 468 | 3.49k | [[fallthrough]]; | 469 | 3.72k | case '+': | 470 | 3.72k | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 3.72k | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 45.9k | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 45.9k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 41.7k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 41.7k | return static_cast<T>(negative ? -val : val); | 482 | 41.7k | } | 483 | | | 484 | 4.20k | const T max_div_10 = max_val / 10; | 485 | 4.20k | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 4.20k | int first = i; | 488 | 167k | for (; i < len; ++i) { | 489 | 163k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 163k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 163k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 340 | *result = PARSE_OVERFLOW; | 494 | 340 | return negative ? -max_val : max_val; | 495 | 340 | } | 496 | 163k | val = val * 10 + digit; | 497 | 163k | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 184 | } else { | 505 | 184 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 184 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 56 | *result = PARSE_FAILURE; | 510 | 56 | return 0; | 511 | 56 | } | 512 | 184 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 128 | *result = PARSE_SUCCESS; | 515 | 184 | return static_cast<T>(negative ? -val : val); | 516 | 184 | } | 517 | 163k | } | 518 | 3.68k | *result = PARSE_SUCCESS; | 519 | 3.68k | return static_cast<T>(negative ? -val : val); | 520 | 4.20k | } |
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 96.5k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 96.5k | if (UNLIKELY(len <= 0)) { | 455 | 209 | *result = PARSE_FAILURE; | 456 | 209 | return 0; | 457 | 209 | } | 458 | | | 459 | 96.3k | using UnsignedT = MakeUnsignedT<T>; | 460 | 96.3k | UnsignedT val = 0; | 461 | 96.3k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 96.3k | bool negative = false; | 463 | 96.3k | int i = 0; | 464 | 96.3k | switch (*s) { | 465 | 20.0k | case '-': | 466 | 20.0k | negative = true; | 467 | 20.0k | max_val += 1; | 468 | 20.0k | [[fallthrough]]; | 469 | 20.3k | case '+': | 470 | 20.3k | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 20.3k | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 96.3k | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 96.3k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 69.3k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 69.3k | return static_cast<T>(negative ? -val : val); | 482 | 69.3k | } | 483 | | | 484 | 27.0k | const T max_div_10 = max_val / 10; | 485 | 27.0k | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 27.0k | int first = i; | 488 | 91.0k | for (; i < len; ++i) { | 489 | 81.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 80.2k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 80.2k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 16.2k | *result = PARSE_OVERFLOW; | 494 | 16.2k | return negative ? -max_val : max_val; | 495 | 16.2k | } | 496 | 63.9k | val = val * 10 + digit; | 497 | 63.9k | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 1.29k | } else { | 505 | 1.29k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 1.29k | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 940 | *result = PARSE_FAILURE; | 510 | 940 | return 0; | 511 | 940 | } | 512 | 1.29k | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 352 | *result = PARSE_SUCCESS; | 515 | 1.29k | return static_cast<T>(negative ? -val : val); | 516 | 1.29k | } | 517 | 81.4k | } | 518 | 9.52k | *result = PARSE_SUCCESS; | 519 | 9.52k | return static_cast<T>(negative ? -val : val); | 520 | 27.0k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 67.4k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 67.4k | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 67.4k | using UnsignedT = MakeUnsignedT<T>; | 460 | 67.4k | UnsignedT val = 0; | 461 | 67.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 67.4k | bool negative = false; | 463 | 67.4k | int i = 0; | 464 | 67.4k | switch (*s) { | 465 | 10.5k | case '-': | 466 | 10.5k | negative = true; | 467 | 10.5k | max_val += 1; | 468 | 10.5k | [[fallthrough]]; | 469 | 10.8k | case '+': | 470 | 10.8k | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 10.8k | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 67.4k | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 67.4k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 51.2k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 51.2k | return static_cast<T>(negative ? -val : val); | 482 | 51.2k | } | 483 | | | 484 | 16.2k | const T max_div_10 = max_val / 10; | 485 | 16.2k | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 16.2k | int first = i; | 488 | 90.8k | for (; i < len; ++i) { | 489 | 81.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 81.3k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 81.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 6.71k | *result = PARSE_OVERFLOW; | 494 | 6.71k | return negative ? -max_val : max_val; | 495 | 6.71k | } | 496 | 74.6k | val = val * 10 + digit; | 497 | 74.6k | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 405 | } else { | 505 | 405 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 405 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 133 | *result = PARSE_FAILURE; | 510 | 133 | return 0; | 511 | 133 | } | 512 | 405 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 272 | *result = PARSE_SUCCESS; | 515 | 405 | return static_cast<T>(negative ? -val : val); | 516 | 405 | } | 517 | 81.7k | } | 518 | 9.09k | *result = PARSE_SUCCESS; | 519 | 9.09k | return static_cast<T>(negative ? -val : val); | 520 | 16.2k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 62.3k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 62.3k | if (UNLIKELY(len <= 0)) { | 455 | 999 | *result = PARSE_FAILURE; | 456 | 999 | return 0; | 457 | 999 | } | 458 | | | 459 | 61.3k | using UnsignedT = MakeUnsignedT<T>; | 460 | 61.3k | UnsignedT val = 0; | 461 | 61.3k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 61.3k | bool negative = false; | 463 | 61.3k | int i = 0; | 464 | 61.3k | switch (*s) { | 465 | 8.54k | case '-': | 466 | 8.54k | negative = true; | 467 | 8.54k | max_val += 1; | 468 | 8.54k | [[fallthrough]]; | 469 | 8.88k | case '+': | 470 | 8.88k | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 8.88k | if (UNLIKELY(len == 1)) { | 473 | 3 | *result = PARSE_FAILURE; | 474 | 3 | return 0; | 475 | 3 | } | 476 | 61.3k | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 61.3k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 51.6k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 51.6k | return static_cast<T>(negative ? -val : val); | 482 | 51.6k | } | 483 | | | 484 | 9.64k | const T max_div_10 = max_val / 10; | 485 | 9.64k | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 9.64k | int first = i; | 488 | 99.9k | for (; i < len; ++i) { | 489 | 93.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 93.4k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 93.4k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 3.08k | *result = PARSE_OVERFLOW; | 494 | 3.08k | return negative ? -max_val : max_val; | 495 | 3.08k | } | 496 | 90.3k | val = val * 10 + digit; | 497 | 90.3k | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 543 | } else { | 505 | 543 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 543 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 415 | *result = PARSE_FAILURE; | 510 | 415 | return 0; | 511 | 415 | } | 512 | 543 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 128 | *result = PARSE_SUCCESS; | 515 | 543 | return static_cast<T>(negative ? -val : val); | 516 | 543 | } | 517 | 93.9k | } | 518 | 6.02k | *result = PARSE_SUCCESS; | 519 | 6.02k | return static_cast<T>(negative ? -val : val); | 520 | 9.64k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 82.2k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 82.2k | if (UNLIKELY(len <= 0)) { | 455 | 11 | *result = PARSE_FAILURE; | 456 | 11 | return 0; | 457 | 11 | } | 458 | | | 459 | 82.2k | using UnsignedT = MakeUnsignedT<T>; | 460 | 82.2k | UnsignedT val = 0; | 461 | 82.2k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 82.2k | bool negative = false; | 463 | 82.2k | int i = 0; | 464 | 82.2k | switch (*s) { | 465 | 49.2k | case '-': | 466 | 49.2k | negative = true; | 467 | 49.2k | max_val += 1; | 468 | 49.2k | [[fallthrough]]; | 469 | 49.5k | case '+': | 470 | 49.5k | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 49.5k | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 82.2k | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 82.2k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 31.9k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 31.9k | return static_cast<T>(negative ? -val : val); | 482 | 31.9k | } | 483 | | | 484 | 50.2k | const T max_div_10 = max_val / 10; | 485 | 50.2k | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 50.2k | int first = i; | 488 | 1.00M | for (; i < len; ++i) { | 489 | 954k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 953k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 953k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 2.78k | *result = PARSE_OVERFLOW; | 494 | 2.78k | return negative ? -max_val : max_val; | 495 | 2.78k | } | 496 | 951k | val = val * 10 + digit; | 497 | 951k | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 199 | } else { | 505 | 199 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 199 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 71 | *result = PARSE_FAILURE; | 510 | 71 | return 0; | 511 | 71 | } | 512 | 199 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 128 | *result = PARSE_SUCCESS; | 515 | 199 | return static_cast<T>(negative ? -val : val); | 516 | 199 | } | 517 | 954k | } | 518 | 47.2k | *result = PARSE_SUCCESS; | 519 | 47.2k | return static_cast<T>(negative ? -val : val); | 520 | 50.2k | } |
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 147 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 147 | if (UNLIKELY(len <= 0)) { | 455 | 2 | *result = PARSE_FAILURE; | 456 | 2 | return 0; | 457 | 2 | } | 458 | | | 459 | 145 | using UnsignedT = MakeUnsignedT<T>; | 460 | 145 | UnsignedT val = 0; | 461 | 145 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 145 | bool negative = false; | 463 | 145 | int i = 0; | 464 | 145 | switch (*s) { | 465 | 0 | case '-': | 466 | 0 | negative = true; | 467 | 0 | max_val += 1; | 468 | 0 | [[fallthrough]]; | 469 | 0 | case '+': | 470 | 0 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 0 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 145 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 145 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 145 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 145 | return static_cast<T>(negative ? -val : val); | 482 | 145 | } | 483 | | | 484 | 0 | const T max_div_10 = max_val / 10; | 485 | 0 | const T max_mod_10 = max_val % 10; | 486 | |
| 487 | 0 | int first = i; | 488 | 0 | for (; i < len; ++i) { | 489 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 0 | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 0 | *result = PARSE_OVERFLOW; | 494 | 0 | return negative ? -max_val : max_val; | 495 | 0 | } | 496 | 0 | val = val * 10 + digit; | 497 | 0 | } else { | 498 | 0 | if constexpr (enable_strict_mode) { | 499 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 0 | *result = PARSE_FAILURE; | 502 | 0 | return 0; | 503 | 0 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 0 | return static_cast<T>(negative ? -val : val); | 516 | 0 | } | 517 | 0 | } | 518 | 0 | *result = PARSE_SUCCESS; | 519 | 0 | return static_cast<T>(negative ? -val : val); | 520 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 1.00k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 1.00k | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 993 | using UnsignedT = MakeUnsignedT<T>; | 460 | 993 | UnsignedT val = 0; | 461 | 993 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 993 | bool negative = false; | 463 | 993 | int i = 0; | 464 | 993 | switch (*s) { | 465 | 446 | case '-': | 466 | 446 | negative = true; | 467 | 446 | max_val += 1; | 468 | 446 | [[fallthrough]]; | 469 | 697 | case '+': | 470 | 697 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 697 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 993 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 993 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 51 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 51 | return static_cast<T>(negative ? -val : val); | 482 | 51 | } | 483 | | | 484 | 942 | const T max_div_10 = max_val / 10; | 485 | 942 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 942 | int first = i; | 488 | 4.25k | for (; i < len; ++i) { | 489 | 4.12k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 3.71k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 3.71k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 400 | *result = PARSE_OVERFLOW; | 494 | 400 | return negative ? -max_val : max_val; | 495 | 400 | } | 496 | 3.31k | val = val * 10 + digit; | 497 | 3.31k | } else { | 498 | 406 | if constexpr (enable_strict_mode) { | 499 | 406 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 406 | *result = PARSE_FAILURE; | 502 | 406 | return 0; | 503 | 406 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 406 | return static_cast<T>(negative ? -val : val); | 516 | 406 | } | 517 | 4.12k | } | 518 | 136 | *result = PARSE_SUCCESS; | 519 | 136 | return static_cast<T>(negative ? -val : val); | 520 | 942 | } |
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 984 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 984 | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 977 | using UnsignedT = MakeUnsignedT<T>; | 460 | 977 | UnsignedT val = 0; | 461 | 977 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 977 | bool negative = false; | 463 | 977 | int i = 0; | 464 | 977 | switch (*s) { | 465 | 438 | case '-': | 466 | 438 | negative = true; | 467 | 438 | max_val += 1; | 468 | 438 | [[fallthrough]]; | 469 | 685 | case '+': | 470 | 685 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 685 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 977 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 977 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 203 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 203 | return static_cast<T>(negative ? -val : val); | 482 | 203 | } | 483 | | | 484 | 774 | const T max_div_10 = max_val / 10; | 485 | 774 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 774 | int first = i; | 488 | 4.92k | for (; i < len; ++i) { | 489 | 4.84k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 4.53k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 4.53k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 384 | *result = PARSE_OVERFLOW; | 494 | 384 | return negative ? -max_val : max_val; | 495 | 384 | } | 496 | 4.14k | val = val * 10 + digit; | 497 | 4.14k | } else { | 498 | 310 | if constexpr (enable_strict_mode) { | 499 | 310 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 310 | *result = PARSE_FAILURE; | 502 | 310 | return 0; | 503 | 310 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 310 | return static_cast<T>(negative ? -val : val); | 516 | 310 | } | 517 | 4.84k | } | 518 | 80 | *result = PARSE_SUCCESS; | 519 | 80 | return static_cast<T>(negative ? -val : val); | 520 | 774 | } |
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 968 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 968 | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 961 | using UnsignedT = MakeUnsignedT<T>; | 460 | 961 | UnsignedT val = 0; | 461 | 961 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 961 | bool negative = false; | 463 | 961 | int i = 0; | 464 | 961 | switch (*s) { | 465 | 430 | case '-': | 466 | 430 | negative = true; | 467 | 430 | max_val += 1; | 468 | 430 | [[fallthrough]]; | 469 | 673 | case '+': | 470 | 673 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 673 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 961 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 961 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 399 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 399 | return static_cast<T>(negative ? -val : val); | 482 | 399 | } | 483 | | | 484 | 562 | const T max_div_10 = max_val / 10; | 485 | 562 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 562 | int first = i; | 488 | 6.65k | for (; i < len; ++i) { | 489 | 6.58k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 6.45k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 6.45k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 368 | *result = PARSE_OVERFLOW; | 494 | 368 | return negative ? -max_val : max_val; | 495 | 368 | } | 496 | 6.08k | val = val * 10 + digit; | 497 | 6.08k | } else { | 498 | 130 | if constexpr (enable_strict_mode) { | 499 | 130 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 130 | *result = PARSE_FAILURE; | 502 | 130 | return 0; | 503 | 130 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 130 | return static_cast<T>(negative ? -val : val); | 516 | 130 | } | 517 | 6.58k | } | 518 | 64 | *result = PARSE_SUCCESS; | 519 | 64 | return static_cast<T>(negative ? -val : val); | 520 | 562 | } |
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 954 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 954 | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 947 | using UnsignedT = MakeUnsignedT<T>; | 460 | 947 | UnsignedT val = 0; | 461 | 947 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 947 | bool negative = false; | 463 | 947 | int i = 0; | 464 | 947 | switch (*s) { | 465 | 422 | case '-': | 466 | 422 | negative = true; | 467 | 422 | max_val += 1; | 468 | 422 | [[fallthrough]]; | 469 | 661 | case '+': | 470 | 661 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 661 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 947 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 947 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 402 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 402 | return static_cast<T>(negative ? -val : val); | 482 | 402 | } | 483 | | | 484 | 545 | const T max_div_10 = max_val / 10; | 485 | 545 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 545 | int first = i; | 488 | 11.5k | for (; i < len; ++i) { | 489 | 11.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 11.3k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 11.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 352 | *result = PARSE_OVERFLOW; | 494 | 352 | return negative ? -max_val : max_val; | 495 | 352 | } | 496 | 10.9k | val = val * 10 + digit; | 497 | 10.9k | } else { | 498 | 129 | if constexpr (enable_strict_mode) { | 499 | 129 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 129 | *result = PARSE_FAILURE; | 502 | 129 | return 0; | 503 | 129 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 129 | return static_cast<T>(negative ? -val : val); | 516 | 129 | } | 517 | 11.4k | } | 518 | 64 | *result = PARSE_SUCCESS; | 519 | 64 | return static_cast<T>(negative ? -val : val); | 520 | 545 | } |
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 936 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 936 | if (UNLIKELY(len <= 0)) { | 455 | 7 | *result = PARSE_FAILURE; | 456 | 7 | return 0; | 457 | 7 | } | 458 | | | 459 | 929 | using UnsignedT = MakeUnsignedT<T>; | 460 | 929 | UnsignedT val = 0; | 461 | 929 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 929 | bool negative = false; | 463 | 929 | int i = 0; | 464 | 929 | switch (*s) { | 465 | 414 | case '-': | 466 | 414 | negative = true; | 467 | 414 | max_val += 1; | 468 | 414 | [[fallthrough]]; | 469 | 649 | case '+': | 470 | 649 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 649 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 929 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 929 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 401 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 401 | return static_cast<T>(negative ? -val : val); | 482 | 401 | } | 483 | | | 484 | 528 | const T max_div_10 = max_val / 10; | 485 | 528 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 528 | int first = i; | 488 | 21.5k | for (; i < len; ++i) { | 489 | 21.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 21.3k | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 21.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 336 | *result = PARSE_OVERFLOW; | 494 | 336 | return negative ? -max_val : max_val; | 495 | 336 | } | 496 | 21.0k | val = val * 10 + digit; | 497 | 21.0k | } else { | 498 | 128 | if constexpr (enable_strict_mode) { | 499 | 128 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | 128 | *result = PARSE_FAILURE; | 502 | 128 | return 0; | 503 | 128 | } | 504 | | } else { | 505 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | | *result = PARSE_FAILURE; | 510 | | return 0; | 511 | | } | 512 | | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 128 | return static_cast<T>(negative ? -val : val); | 516 | 128 | } | 517 | 21.5k | } | 518 | 64 | *result = PARSE_SUCCESS; | 519 | 64 | return static_cast<T>(negative ? -val : val); | 520 | 528 | } |
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 4 | if (UNLIKELY(len <= 0)) { | 455 | 0 | *result = PARSE_FAILURE; | 456 | 0 | return 0; | 457 | 0 | } | 458 | | | 459 | 4 | using UnsignedT = MakeUnsignedT<T>; | 460 | 4 | UnsignedT val = 0; | 461 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 4 | bool negative = false; | 463 | 4 | int i = 0; | 464 | 4 | switch (*s) { | 465 | 0 | case '-': | 466 | 0 | negative = true; | 467 | 0 | max_val += 1; | 468 | 0 | [[fallthrough]]; | 469 | 0 | case '+': | 470 | 0 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 0 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 4 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 4 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 4 | return static_cast<T>(negative ? -val : val); | 482 | 4 | } | 483 | | | 484 | 0 | const T max_div_10 = max_val / 10; | 485 | 0 | const T max_mod_10 = max_val % 10; | 486 | |
| 487 | 0 | int first = i; | 488 | 0 | for (; i < len; ++i) { | 489 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 0 | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 0 | *result = PARSE_OVERFLOW; | 494 | 0 | return negative ? -max_val : max_val; | 495 | 0 | } | 496 | 0 | val = val * 10 + digit; | 497 | 0 | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 0 | } else { | 505 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 0 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 0 | *result = PARSE_FAILURE; | 510 | 0 | return 0; | 511 | 0 | } | 512 | 0 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 0 | return static_cast<T>(negative ? -val : val); | 516 | 0 | } | 517 | 0 | } | 518 | 0 | *result = PARSE_SUCCESS; | 519 | 0 | return static_cast<T>(negative ? -val : val); | 520 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 4 | if (UNLIKELY(len <= 0)) { | 455 | 0 | *result = PARSE_FAILURE; | 456 | 0 | return 0; | 457 | 0 | } | 458 | | | 459 | 4 | using UnsignedT = MakeUnsignedT<T>; | 460 | 4 | UnsignedT val = 0; | 461 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 4 | bool negative = false; | 463 | 4 | int i = 0; | 464 | 4 | switch (*s) { | 465 | 0 | case '-': | 466 | 0 | negative = true; | 467 | 0 | max_val += 1; | 468 | 0 | [[fallthrough]]; | 469 | 0 | case '+': | 470 | 0 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 0 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 4 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 0 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 0 | return static_cast<T>(negative ? -val : val); | 482 | 0 | } | 483 | | | 484 | 4 | const T max_div_10 = max_val / 10; | 485 | 4 | const T max_mod_10 = max_val % 10; | 486 | | | 487 | 4 | int first = i; | 488 | 84 | for (; i < len; ++i) { | 489 | 80 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 80 | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 80 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 0 | *result = PARSE_OVERFLOW; | 494 | 0 | return negative ? -max_val : max_val; | 495 | 0 | } | 496 | 80 | val = val * 10 + digit; | 497 | 80 | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 0 | } else { | 505 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 0 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 0 | *result = PARSE_FAILURE; | 510 | 0 | return 0; | 511 | 0 | } | 512 | 0 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 0 | return static_cast<T>(negative ? -val : val); | 516 | 0 | } | 517 | 80 | } | 518 | 4 | *result = PARSE_SUCCESS; | 519 | 4 | return static_cast<T>(negative ? -val : val); | 520 | 4 | } |
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 453 | 20 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 454 | 20 | if (UNLIKELY(len <= 0)) { | 455 | 0 | *result = PARSE_FAILURE; | 456 | 0 | return 0; | 457 | 0 | } | 458 | | | 459 | 20 | using UnsignedT = MakeUnsignedT<T>; | 460 | 20 | UnsignedT val = 0; | 461 | 20 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 462 | 20 | bool negative = false; | 463 | 20 | int i = 0; | 464 | 20 | switch (*s) { | 465 | 0 | case '-': | 466 | 0 | negative = true; | 467 | 0 | max_val += 1; | 468 | 0 | [[fallthrough]]; | 469 | 0 | case '+': | 470 | 0 | ++i; | 471 | | // only one '+'/'-' char, so could return failure directly | 472 | 0 | if (UNLIKELY(len == 1)) { | 473 | 0 | *result = PARSE_FAILURE; | 474 | 0 | return 0; | 475 | 0 | } | 476 | 20 | } | 477 | | | 478 | | // This is the fast path where the string cannot overflow. | 479 | 20 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 480 | 20 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 481 | 20 | return static_cast<T>(negative ? -val : val); | 482 | 20 | } | 483 | | | 484 | 0 | const T max_div_10 = max_val / 10; | 485 | 0 | const T max_mod_10 = max_val % 10; | 486 | |
| 487 | 0 | int first = i; | 488 | 0 | for (; i < len; ++i) { | 489 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 490 | 0 | T digit = s[i] - '0'; | 491 | | // This is a tricky check to see if adding this digit will cause an overflow. | 492 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 493 | 0 | *result = PARSE_OVERFLOW; | 494 | 0 | return negative ? -max_val : max_val; | 495 | 0 | } | 496 | 0 | val = val * 10 + digit; | 497 | 0 | } else { | 498 | | if constexpr (enable_strict_mode) { | 499 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 500 | | // Reject the string because the remaining chars are not all whitespace | 501 | | *result = PARSE_FAILURE; | 502 | | return 0; | 503 | | } | 504 | 0 | } else { | 505 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 506 | 0 | !is_float_suffix(s + i, len - i))))) { | 507 | | // Reject the string because either the first char was not a digit, | 508 | | // or the remaining chars are not all whitespace | 509 | 0 | *result = PARSE_FAILURE; | 510 | 0 | return 0; | 511 | 0 | } | 512 | 0 | } | 513 | | // Returning here is slightly faster than breaking the loop. | 514 | 0 | *result = PARSE_SUCCESS; | 515 | 0 | return static_cast<T>(negative ? -val : val); | 516 | 0 | } | 517 | 0 | } | 518 | 0 | *result = PARSE_SUCCESS; | 519 | 0 | return static_cast<T>(negative ? -val : val); | 520 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE |
521 | | |
522 | | template <typename T> |
523 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
524 | 1.37k | ParseResult* result) { |
525 | 1.37k | if (UNLIKELY(len <= 0)) { |
526 | 0 | *result = PARSE_FAILURE; |
527 | 0 | return 0; |
528 | 0 | } |
529 | | |
530 | 1.37k | T val = 0; |
531 | 1.37k | T max_val = std::numeric_limits<T>::max(); |
532 | 1.37k | int i = 0; |
533 | | |
534 | 1.37k | using signedT = MakeSignedT<T>; |
535 | | // This is the fast path where the string cannot overflow. |
536 | 1.37k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
537 | 784 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
538 | 784 | return val; |
539 | 784 | } |
540 | | |
541 | 588 | const T max_div_10 = max_val / 10; |
542 | 588 | const T max_mod_10 = max_val % 10; |
543 | | |
544 | 588 | int first = i; |
545 | 4.65k | for (; i < len; ++i) { |
546 | 4.31k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
547 | 4.26k | T digit = s[i] - '0'; |
548 | | // This is a tricky check to see if adding this digit will cause an overflow. |
549 | 4.26k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
550 | 196 | *result = PARSE_OVERFLOW; |
551 | 196 | return max_val; |
552 | 196 | } |
553 | 4.06k | val = val * 10 + digit; |
554 | 4.06k | } else { |
555 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
556 | | // Reject the string because either the first char was not a digit, |
557 | | // or the remaining chars are not all whitespace |
558 | 49 | *result = PARSE_FAILURE; |
559 | 49 | return 0; |
560 | 49 | } |
561 | | // Returning here is slightly faster than breaking the loop. |
562 | 0 | *result = PARSE_SUCCESS; |
563 | 0 | return val; |
564 | 49 | } |
565 | 4.31k | } |
566 | 343 | *result = PARSE_SUCCESS; |
567 | 343 | return val; |
568 | 588 | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 524 | 343 | ParseResult* result) { | 525 | 343 | if (UNLIKELY(len <= 0)) { | 526 | 0 | *result = PARSE_FAILURE; | 527 | 0 | return 0; | 528 | 0 | } | 529 | | | 530 | 343 | T val = 0; | 531 | 343 | T max_val = std::numeric_limits<T>::max(); | 532 | 343 | int i = 0; | 533 | | | 534 | 343 | using signedT = MakeSignedT<T>; | 535 | | // This is the fast path where the string cannot overflow. | 536 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 537 | 98 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 538 | 98 | return val; | 539 | 98 | } | 540 | | | 541 | 245 | const T max_div_10 = max_val / 10; | 542 | 245 | const T max_mod_10 = max_val % 10; | 543 | | | 544 | 245 | int first = i; | 545 | 784 | for (; i < len; ++i) { | 546 | 637 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 547 | 588 | T digit = s[i] - '0'; | 548 | | // This is a tricky check to see if adding this digit will cause an overflow. | 549 | 588 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 550 | 49 | *result = PARSE_OVERFLOW; | 551 | 49 | return max_val; | 552 | 49 | } | 553 | 539 | val = val * 10 + digit; | 554 | 539 | } else { | 555 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 556 | | // Reject the string because either the first char was not a digit, | 557 | | // or the remaining chars are not all whitespace | 558 | 49 | *result = PARSE_FAILURE; | 559 | 49 | return 0; | 560 | 49 | } | 561 | | // Returning here is slightly faster than breaking the loop. | 562 | 0 | *result = PARSE_SUCCESS; | 563 | 0 | return val; | 564 | 49 | } | 565 | 637 | } | 566 | 147 | *result = PARSE_SUCCESS; | 567 | 147 | return val; | 568 | 245 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 524 | 343 | ParseResult* result) { | 525 | 343 | if (UNLIKELY(len <= 0)) { | 526 | 0 | *result = PARSE_FAILURE; | 527 | 0 | return 0; | 528 | 0 | } | 529 | | | 530 | 343 | T val = 0; | 531 | 343 | T max_val = std::numeric_limits<T>::max(); | 532 | 343 | int i = 0; | 533 | | | 534 | 343 | using signedT = MakeSignedT<T>; | 535 | | // This is the fast path where the string cannot overflow. | 536 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 537 | 196 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 538 | 196 | return val; | 539 | 196 | } | 540 | | | 541 | 147 | const T max_div_10 = max_val / 10; | 542 | 147 | const T max_mod_10 = max_val % 10; | 543 | | | 544 | 147 | int first = i; | 545 | 833 | for (; i < len; ++i) { | 546 | 735 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 547 | 735 | T digit = s[i] - '0'; | 548 | | // This is a tricky check to see if adding this digit will cause an overflow. | 549 | 735 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 550 | 49 | *result = PARSE_OVERFLOW; | 551 | 49 | return max_val; | 552 | 49 | } | 553 | 686 | val = val * 10 + digit; | 554 | 686 | } else { | 555 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 556 | | // Reject the string because either the first char was not a digit, | 557 | | // or the remaining chars are not all whitespace | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | // Returning here is slightly faster than breaking the loop. | 562 | 0 | *result = PARSE_SUCCESS; | 563 | 0 | return val; | 564 | 0 | } | 565 | 735 | } | 566 | 98 | *result = PARSE_SUCCESS; | 567 | 98 | return val; | 568 | 147 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 524 | 343 | ParseResult* result) { | 525 | 343 | if (UNLIKELY(len <= 0)) { | 526 | 0 | *result = PARSE_FAILURE; | 527 | 0 | return 0; | 528 | 0 | } | 529 | | | 530 | 343 | T val = 0; | 531 | 343 | T max_val = std::numeric_limits<T>::max(); | 532 | 343 | int i = 0; | 533 | | | 534 | 343 | using signedT = MakeSignedT<T>; | 535 | | // This is the fast path where the string cannot overflow. | 536 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 537 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 538 | 245 | return val; | 539 | 245 | } | 540 | | | 541 | 98 | const T max_div_10 = max_val / 10; | 542 | 98 | const T max_mod_10 = max_val % 10; | 543 | | | 544 | 98 | int first = i; | 545 | 1.02k | for (; i < len; ++i) { | 546 | 980 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 547 | 980 | T digit = s[i] - '0'; | 548 | | // This is a tricky check to see if adding this digit will cause an overflow. | 549 | 980 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 550 | 49 | *result = PARSE_OVERFLOW; | 551 | 49 | return max_val; | 552 | 49 | } | 553 | 931 | val = val * 10 + digit; | 554 | 931 | } else { | 555 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 556 | | // Reject the string because either the first char was not a digit, | 557 | | // or the remaining chars are not all whitespace | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | // Returning here is slightly faster than breaking the loop. | 562 | 0 | *result = PARSE_SUCCESS; | 563 | 0 | return val; | 564 | 0 | } | 565 | 980 | } | 566 | 49 | *result = PARSE_SUCCESS; | 567 | 49 | return val; | 568 | 98 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 524 | 343 | ParseResult* result) { | 525 | 343 | if (UNLIKELY(len <= 0)) { | 526 | 0 | *result = PARSE_FAILURE; | 527 | 0 | return 0; | 528 | 0 | } | 529 | | | 530 | 343 | T val = 0; | 531 | 343 | T max_val = std::numeric_limits<T>::max(); | 532 | 343 | int i = 0; | 533 | | | 534 | 343 | using signedT = MakeSignedT<T>; | 535 | | // This is the fast path where the string cannot overflow. | 536 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 537 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 538 | 245 | return val; | 539 | 245 | } | 540 | | | 541 | 98 | const T max_div_10 = max_val / 10; | 542 | 98 | const T max_mod_10 = max_val % 10; | 543 | | | 544 | 98 | int first = i; | 545 | 2.00k | for (; i < len; ++i) { | 546 | 1.96k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 547 | 1.96k | T digit = s[i] - '0'; | 548 | | // This is a tricky check to see if adding this digit will cause an overflow. | 549 | 1.96k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 550 | 49 | *result = PARSE_OVERFLOW; | 551 | 49 | return max_val; | 552 | 49 | } | 553 | 1.91k | val = val * 10 + digit; | 554 | 1.91k | } else { | 555 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 556 | | // Reject the string because either the first char was not a digit, | 557 | | // or the remaining chars are not all whitespace | 558 | 0 | *result = PARSE_FAILURE; | 559 | 0 | return 0; | 560 | 0 | } | 561 | | // Returning here is slightly faster than breaking the loop. | 562 | 0 | *result = PARSE_SUCCESS; | 563 | 0 | return val; | 564 | 0 | } | 565 | 1.96k | } | 566 | 49 | *result = PARSE_SUCCESS; | 567 | 49 | return val; | 568 | 98 | } |
|
569 | | |
570 | | template <typename T> |
571 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
572 | 27.8k | ParseResult* result) { |
573 | 27.8k | using UnsignedT = MakeUnsignedT<T>; |
574 | 27.8k | UnsignedT val = 0; |
575 | 27.8k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
576 | 27.8k | bool negative = false; |
577 | 27.8k | if (UNLIKELY(len <= 0)) { |
578 | 0 | *result = PARSE_FAILURE; |
579 | 0 | return 0; |
580 | 0 | } |
581 | 27.8k | int i = 0; |
582 | 27.8k | switch (*s) { |
583 | 13.4k | case '-': |
584 | 13.4k | negative = true; |
585 | 13.4k | max_val = StringParser::numeric_limits<T>(false) + 1; |
586 | 13.4k | [[fallthrough]]; |
587 | 13.7k | case '+': |
588 | 13.7k | i = 1; |
589 | 27.8k | } |
590 | | |
591 | 27.8k | const T max_div_base = max_val / base; |
592 | 27.8k | const T max_mod_base = max_val % base; |
593 | | |
594 | 27.8k | int first = i; |
595 | 90.9k | for (; i < len; ++i) { |
596 | 76.6k | T digit; |
597 | 76.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
598 | 75.7k | digit = s[i] - '0'; |
599 | 75.7k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
600 | 639 | digit = (s[i] - 'a' + 10); |
601 | 639 | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
602 | 98 | digit = (s[i] - 'A' + 10); |
603 | 147 | } else { |
604 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
605 | | // Reject the string because either the first char was not an alpha/digit, |
606 | | // or the remaining chars are not all whitespace |
607 | 147 | *result = PARSE_FAILURE; |
608 | 147 | return 0; |
609 | 147 | } |
610 | | // skip trailing whitespace. |
611 | 0 | break; |
612 | 147 | } |
613 | | |
614 | | // Bail, if we encounter a digit that is not available in base. |
615 | 76.4k | if (digit >= base) { |
616 | 392 | break; |
617 | 392 | } |
618 | | |
619 | | // This is a tricky check to see if adding this digit will cause an overflow. |
620 | 76.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
621 | 12.9k | *result = PARSE_OVERFLOW; |
622 | 12.9k | return static_cast<T>(negative ? -max_val : max_val); |
623 | 12.9k | } |
624 | 63.1k | val = val * base + digit; |
625 | 63.1k | } |
626 | 14.7k | *result = PARSE_SUCCESS; |
627 | 14.7k | return static_cast<T>(negative ? -val : val); |
628 | 27.8k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 572 | 26.4k | ParseResult* result) { | 573 | 26.4k | using UnsignedT = MakeUnsignedT<T>; | 574 | 26.4k | UnsignedT val = 0; | 575 | 26.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 576 | 26.4k | bool negative = false; | 577 | 26.4k | if (UNLIKELY(len <= 0)) { | 578 | 0 | *result = PARSE_FAILURE; | 579 | 0 | return 0; | 580 | 0 | } | 581 | 26.4k | int i = 0; | 582 | 26.4k | switch (*s) { | 583 | 12.8k | case '-': | 584 | 12.8k | negative = true; | 585 | 12.8k | max_val = StringParser::numeric_limits<T>(false) + 1; | 586 | 12.8k | [[fallthrough]]; | 587 | 12.9k | case '+': | 588 | 12.9k | i = 1; | 589 | 26.4k | } | 590 | | | 591 | 26.4k | const T max_div_base = max_val / base; | 592 | 26.4k | const T max_mod_base = max_val % base; | 593 | | | 594 | 26.4k | int first = i; | 595 | 80.7k | for (; i < len; ++i) { | 596 | 67.4k | T digit; | 597 | 67.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 598 | 66.6k | digit = s[i] - '0'; | 599 | 66.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 600 | 539 | digit = (s[i] - 'a' + 10); | 601 | 539 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 602 | 98 | digit = (s[i] - 'A' + 10); | 603 | 147 | } else { | 604 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 605 | | // Reject the string because either the first char was not an alpha/digit, | 606 | | // or the remaining chars are not all whitespace | 607 | 147 | *result = PARSE_FAILURE; | 608 | 147 | return 0; | 609 | 147 | } | 610 | | // skip trailing whitespace. | 611 | 0 | break; | 612 | 147 | } | 613 | | | 614 | | // Bail, if we encounter a digit that is not available in base. | 615 | 67.3k | if (digit >= base) { | 616 | 392 | break; | 617 | 392 | } | 618 | | | 619 | | // This is a tricky check to see if adding this digit will cause an overflow. | 620 | 66.9k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 621 | 12.6k | *result = PARSE_OVERFLOW; | 622 | 12.6k | return static_cast<T>(negative ? -max_val : max_val); | 623 | 12.6k | } | 624 | 54.2k | val = val * base + digit; | 625 | 54.2k | } | 626 | 13.6k | *result = PARSE_SUCCESS; | 627 | 13.6k | return static_cast<T>(negative ? -val : val); | 628 | 26.4k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 572 | 490 | ParseResult* result) { | 573 | 490 | using UnsignedT = MakeUnsignedT<T>; | 574 | 490 | UnsignedT val = 0; | 575 | 490 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 576 | 490 | bool negative = false; | 577 | 490 | if (UNLIKELY(len <= 0)) { | 578 | 0 | *result = PARSE_FAILURE; | 579 | 0 | return 0; | 580 | 0 | } | 581 | 490 | int i = 0; | 582 | 490 | switch (*s) { | 583 | 196 | case '-': | 584 | 196 | negative = true; | 585 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 586 | 196 | [[fallthrough]]; | 587 | 245 | case '+': | 588 | 245 | i = 1; | 589 | 490 | } | 590 | | | 591 | 490 | const T max_div_base = max_val / base; | 592 | 490 | const T max_mod_base = max_val % base; | 593 | | | 594 | 490 | int first = i; | 595 | 2.10k | for (; i < len; ++i) { | 596 | 1.71k | T digit; | 597 | 1.71k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 598 | 1.61k | digit = s[i] - '0'; | 599 | 1.61k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 600 | 98 | digit = (s[i] - 'a' + 10); | 601 | 98 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 602 | 0 | digit = (s[i] - 'A' + 10); | 603 | 0 | } else { | 604 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 605 | | // Reject the string because either the first char was not an alpha/digit, | 606 | | // or the remaining chars are not all whitespace | 607 | 0 | *result = PARSE_FAILURE; | 608 | 0 | return 0; | 609 | 0 | } | 610 | | // skip trailing whitespace. | 611 | 0 | break; | 612 | 0 | } | 613 | | | 614 | | // Bail, if we encounter a digit that is not available in base. | 615 | 1.71k | if (digit >= base) { | 616 | 0 | break; | 617 | 0 | } | 618 | | | 619 | | // This is a tricky check to see if adding this digit will cause an overflow. | 620 | 1.71k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 621 | 98 | *result = PARSE_OVERFLOW; | 622 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 623 | 98 | } | 624 | 1.61k | val = val * base + digit; | 625 | 1.61k | } | 626 | 392 | *result = PARSE_SUCCESS; | 627 | 392 | return static_cast<T>(negative ? -val : val); | 628 | 490 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 572 | 441 | ParseResult* result) { | 573 | 441 | using UnsignedT = MakeUnsignedT<T>; | 574 | 441 | UnsignedT val = 0; | 575 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 576 | 441 | bool negative = false; | 577 | 441 | if (UNLIKELY(len <= 0)) { | 578 | 0 | *result = PARSE_FAILURE; | 579 | 0 | return 0; | 580 | 0 | } | 581 | 441 | int i = 0; | 582 | 441 | switch (*s) { | 583 | 147 | case '-': | 584 | 147 | negative = true; | 585 | 147 | max_val = StringParser::numeric_limits<T>(false) + 1; | 586 | 147 | [[fallthrough]]; | 587 | 245 | case '+': | 588 | 245 | i = 1; | 589 | 441 | } | 590 | | | 591 | 441 | const T max_div_base = max_val / base; | 592 | 441 | const T max_mod_base = max_val % base; | 593 | | | 594 | 441 | int first = i; | 595 | 3.03k | for (; i < len; ++i) { | 596 | 2.69k | T digit; | 597 | 2.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 598 | 2.69k | digit = s[i] - '0'; | 599 | 2.69k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 600 | 0 | digit = (s[i] - 'a' + 10); | 601 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 602 | 0 | digit = (s[i] - 'A' + 10); | 603 | 0 | } else { | 604 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 605 | | // Reject the string because either the first char was not an alpha/digit, | 606 | | // or the remaining chars are not all whitespace | 607 | 0 | *result = PARSE_FAILURE; | 608 | 0 | return 0; | 609 | 0 | } | 610 | | // skip trailing whitespace. | 611 | 0 | break; | 612 | 0 | } | 613 | | | 614 | | // Bail, if we encounter a digit that is not available in base. | 615 | 2.69k | if (digit >= base) { | 616 | 0 | break; | 617 | 0 | } | 618 | | | 619 | | // This is a tricky check to see if adding this digit will cause an overflow. | 620 | 2.69k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 621 | 98 | *result = PARSE_OVERFLOW; | 622 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 623 | 98 | } | 624 | 2.59k | val = val * base + digit; | 625 | 2.59k | } | 626 | 343 | *result = PARSE_SUCCESS; | 627 | 343 | return static_cast<T>(negative ? -val : val); | 628 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 572 | 441 | ParseResult* result) { | 573 | 441 | using UnsignedT = MakeUnsignedT<T>; | 574 | 441 | UnsignedT val = 0; | 575 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 576 | 441 | bool negative = false; | 577 | 441 | if (UNLIKELY(len <= 0)) { | 578 | 0 | *result = PARSE_FAILURE; | 579 | 0 | return 0; | 580 | 0 | } | 581 | 441 | int i = 0; | 582 | 441 | switch (*s) { | 583 | 196 | case '-': | 584 | 196 | negative = true; | 585 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 586 | 196 | [[fallthrough]]; | 587 | 245 | case '+': | 588 | 245 | i = 1; | 589 | 441 | } | 590 | | | 591 | 441 | const T max_div_base = max_val / base; | 592 | 441 | const T max_mod_base = max_val % base; | 593 | | | 594 | 441 | int first = i; | 595 | 5.09k | for (; i < len; ++i) { | 596 | 4.75k | T digit; | 597 | 4.75k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 598 | 4.75k | digit = s[i] - '0'; | 599 | 4.75k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 600 | 0 | digit = (s[i] - 'a' + 10); | 601 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 602 | 0 | digit = (s[i] - 'A' + 10); | 603 | 0 | } else { | 604 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 605 | | // Reject the string because either the first char was not an alpha/digit, | 606 | | // or the remaining chars are not all whitespace | 607 | 0 | *result = PARSE_FAILURE; | 608 | 0 | return 0; | 609 | 0 | } | 610 | | // skip trailing whitespace. | 611 | 0 | break; | 612 | 0 | } | 613 | | | 614 | | // Bail, if we encounter a digit that is not available in base. | 615 | 4.75k | if (digit >= base) { | 616 | 0 | break; | 617 | 0 | } | 618 | | | 619 | | // This is a tricky check to see if adding this digit will cause an overflow. | 620 | 4.75k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 621 | 98 | *result = PARSE_OVERFLOW; | 622 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 623 | 98 | } | 624 | 4.65k | val = val * base + digit; | 625 | 4.65k | } | 626 | 343 | *result = PARSE_SUCCESS; | 627 | 343 | return static_cast<T>(negative ? -val : val); | 628 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 572 | 1 | ParseResult* result) { | 573 | 1 | using UnsignedT = MakeUnsignedT<T>; | 574 | 1 | UnsignedT val = 0; | 575 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 576 | 1 | bool negative = false; | 577 | 1 | if (UNLIKELY(len <= 0)) { | 578 | 0 | *result = PARSE_FAILURE; | 579 | 0 | return 0; | 580 | 0 | } | 581 | 1 | int i = 0; | 582 | 1 | switch (*s) { | 583 | 0 | case '-': | 584 | 0 | negative = true; | 585 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 586 | 0 | [[fallthrough]]; | 587 | 0 | case '+': | 588 | 0 | i = 1; | 589 | 1 | } | 590 | | | 591 | 1 | const T max_div_base = max_val / base; | 592 | 1 | const T max_mod_base = max_val % base; | 593 | | | 594 | 1 | int first = i; | 595 | 3 | for (; i < len; ++i) { | 596 | 2 | T digit; | 597 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 598 | 0 | digit = s[i] - '0'; | 599 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 600 | 2 | digit = (s[i] - 'a' + 10); | 601 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 602 | 0 | digit = (s[i] - 'A' + 10); | 603 | 0 | } else { | 604 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 605 | | // Reject the string because either the first char was not an alpha/digit, | 606 | | // or the remaining chars are not all whitespace | 607 | 0 | *result = PARSE_FAILURE; | 608 | 0 | return 0; | 609 | 0 | } | 610 | | // skip trailing whitespace. | 611 | 0 | break; | 612 | 0 | } | 613 | | | 614 | | // Bail, if we encounter a digit that is not available in base. | 615 | 2 | if (digit >= base) { | 616 | 0 | break; | 617 | 0 | } | 618 | | | 619 | | // This is a tricky check to see if adding this digit will cause an overflow. | 620 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 621 | 0 | *result = PARSE_OVERFLOW; | 622 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 623 | 0 | } | 624 | 2 | val = val * base + digit; | 625 | 2 | } | 626 | 1 | *result = PARSE_SUCCESS; | 627 | 1 | return static_cast<T>(negative ? -val : val); | 628 | 1 | } |
|
629 | | |
630 | | template <typename T, bool enable_strict_mode> |
631 | 248k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
632 | 248k | T val = 0; |
633 | 248k | if (UNLIKELY(len == 0)) { |
634 | 0 | *result = PARSE_SUCCESS; |
635 | 0 | return val; |
636 | 0 | } |
637 | | // Factor out the first char for error handling speeds up the loop. |
638 | 248k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
639 | 246k | val = s[0] - '0'; |
640 | 246k | } else { |
641 | 2.30k | *result = PARSE_FAILURE; |
642 | 2.30k | return 0; |
643 | 2.30k | } |
644 | 393k | for (int i = 1; i < len; ++i) { |
645 | 149k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
646 | 147k | T digit = s[i] - '0'; |
647 | 147k | val = val * 10 + digit; |
648 | 147k | } else { |
649 | 2.71k | if constexpr (enable_strict_mode) { |
650 | 860 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { |
651 | 860 | *result = PARSE_FAILURE; |
652 | 860 | return 0; |
653 | 860 | } |
654 | 1.85k | } else { |
655 | 1.85k | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && |
656 | 1.85k | !is_float_suffix(s + i, len - i)))) { |
657 | 329 | *result = PARSE_FAILURE; |
658 | 329 | return 0; |
659 | 329 | } |
660 | 1.85k | } |
661 | 1.52k | *result = PARSE_SUCCESS; |
662 | 2.71k | return val; |
663 | 2.71k | } |
664 | 149k | } |
665 | 243k | *result = PARSE_SUCCESS; |
666 | 243k | return val; |
667 | 246k | } _ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 41.7k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 41.7k | T val = 0; | 633 | 41.7k | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 41.7k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 41.5k | val = s[0] - '0'; | 640 | 41.5k | } else { | 641 | 185 | *result = PARSE_FAILURE; | 642 | 185 | return 0; | 643 | 185 | } | 644 | 58.3k | for (int i = 1; i < len; ++i) { | 645 | 17.1k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 16.8k | T digit = s[i] - '0'; | 647 | 16.8k | val = val * 10 + digit; | 648 | 16.8k | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 289 | } else { | 655 | 289 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 289 | !is_float_suffix(s + i, len - i)))) { | 657 | 65 | *result = PARSE_FAILURE; | 658 | 65 | return 0; | 659 | 65 | } | 660 | 289 | } | 661 | 224 | *result = PARSE_SUCCESS; | 662 | 289 | return val; | 663 | 289 | } | 664 | 17.1k | } | 665 | 41.2k | *result = PARSE_SUCCESS; | 666 | 41.2k | return val; | 667 | 41.5k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 69.4k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 69.4k | T val = 0; | 633 | 69.4k | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 69.4k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 69.2k | val = s[0] - '0'; | 640 | 69.2k | } else { | 641 | 125 | *result = PARSE_FAILURE; | 642 | 125 | return 0; | 643 | 125 | } | 644 | 102k | for (int i = 1; i < len; ++i) { | 645 | 33.3k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 33.3k | T digit = s[i] - '0'; | 647 | 33.3k | val = val * 10 + digit; | 648 | 33.3k | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 1 | } else { | 655 | 1 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 1 | !is_float_suffix(s + i, len - i)))) { | 657 | 1 | *result = PARSE_FAILURE; | 658 | 1 | return 0; | 659 | 1 | } | 660 | 1 | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 1 | return val; | 663 | 1 | } | 664 | 33.3k | } | 665 | 69.2k | *result = PARSE_SUCCESS; | 666 | 69.2k | return val; | 667 | 69.2k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 51.4k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 51.4k | T val = 0; | 633 | 51.4k | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 51.4k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 50.9k | val = s[0] - '0'; | 640 | 50.9k | } else { | 641 | 494 | *result = PARSE_FAILURE; | 642 | 494 | return 0; | 643 | 494 | } | 644 | 74.2k | for (int i = 1; i < len; ++i) { | 645 | 24.2k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 23.3k | T digit = s[i] - '0'; | 647 | 23.3k | val = val * 10 + digit; | 648 | 23.3k | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 913 | } else { | 655 | 913 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 913 | !is_float_suffix(s + i, len - i)))) { | 657 | 63 | *result = PARSE_FAILURE; | 658 | 63 | return 0; | 659 | 63 | } | 660 | 913 | } | 661 | 850 | *result = PARSE_SUCCESS; | 662 | 913 | return val; | 663 | 913 | } | 664 | 24.2k | } | 665 | 50.0k | *result = PARSE_SUCCESS; | 666 | 50.0k | return val; | 667 | 50.9k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 51.9k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 51.9k | T val = 0; | 633 | 51.9k | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 51.9k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 50.9k | val = s[0] - '0'; | 640 | 50.9k | } else { | 641 | 1.00k | *result = PARSE_FAILURE; | 642 | 1.00k | return 0; | 643 | 1.00k | } | 644 | 86.9k | for (int i = 1; i < len; ++i) { | 645 | 36.3k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 36.0k | T digit = s[i] - '0'; | 647 | 36.0k | val = val * 10 + digit; | 648 | 36.0k | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 340 | } else { | 655 | 340 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 340 | !is_float_suffix(s + i, len - i)))) { | 657 | 116 | *result = PARSE_FAILURE; | 658 | 116 | return 0; | 659 | 116 | } | 660 | 340 | } | 661 | 224 | *result = PARSE_SUCCESS; | 662 | 340 | return val; | 663 | 340 | } | 664 | 36.3k | } | 665 | 50.5k | *result = PARSE_SUCCESS; | 666 | 50.5k | return val; | 667 | 50.9k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 32.2k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 32.2k | T val = 0; | 633 | 32.2k | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 32.2k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 31.8k | val = s[0] - '0'; | 640 | 31.8k | } else { | 641 | 376 | *result = PARSE_FAILURE; | 642 | 376 | return 0; | 643 | 376 | } | 644 | 66.9k | for (int i = 1; i < len; ++i) { | 645 | 35.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 35.1k | T digit = s[i] - '0'; | 647 | 35.1k | val = val * 10 + digit; | 648 | 35.1k | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 309 | } else { | 655 | 309 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 309 | !is_float_suffix(s + i, len - i)))) { | 657 | 84 | *result = PARSE_FAILURE; | 658 | 84 | return 0; | 659 | 84 | } | 660 | 309 | } | 661 | 225 | *result = PARSE_SUCCESS; | 662 | 309 | return val; | 663 | 309 | } | 664 | 35.4k | } | 665 | 31.5k | *result = PARSE_SUCCESS; | 666 | 31.5k | return val; | 667 | 31.8k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 544 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 544 | T val = 0; | 633 | 544 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 544 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 513 | val = s[0] - '0'; | 640 | 513 | } else { | 641 | 31 | *result = PARSE_FAILURE; | 642 | 31 | return 0; | 643 | 31 | } | 644 | 1.46k | for (int i = 1; i < len; ++i) { | 645 | 1.20k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 949 | T digit = s[i] - '0'; | 647 | 949 | val = val * 10 + digit; | 648 | 949 | } else { | 649 | 256 | if constexpr (enable_strict_mode) { | 650 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | 256 | *result = PARSE_FAILURE; | 652 | 256 | return 0; | 653 | 256 | } | 654 | | } else { | 655 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | | !is_float_suffix(s + i, len - i)))) { | 657 | | *result = PARSE_FAILURE; | 658 | | return 0; | 659 | | } | 660 | | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 256 | return val; | 663 | 256 | } | 664 | 1.20k | } | 665 | 257 | *result = PARSE_SUCCESS; | 666 | 257 | return val; | 667 | 513 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 51 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 51 | T val = 0; | 633 | 51 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 51 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 41 | val = s[0] - '0'; | 640 | 41 | } else { | 641 | 10 | *result = PARSE_FAILURE; | 642 | 10 | return 0; | 643 | 10 | } | 644 | 41 | for (int i = 1; i < len; ++i) { | 645 | 1 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 0 | T digit = s[i] - '0'; | 647 | 0 | val = val * 10 + digit; | 648 | 1 | } else { | 649 | 1 | if constexpr (enable_strict_mode) { | 650 | 1 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | 1 | *result = PARSE_FAILURE; | 652 | 1 | return 0; | 653 | 1 | } | 654 | | } else { | 655 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | | !is_float_suffix(s + i, len - i)))) { | 657 | | *result = PARSE_FAILURE; | 658 | | return 0; | 659 | | } | 660 | | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 1 | return val; | 663 | 1 | } | 664 | 1 | } | 665 | 40 | *result = PARSE_SUCCESS; | 666 | 40 | return val; | 667 | 41 | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 203 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 203 | T val = 0; | 633 | 203 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 203 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 187 | val = s[0] - '0'; | 640 | 187 | } else { | 641 | 16 | *result = PARSE_FAILURE; | 642 | 16 | return 0; | 643 | 16 | } | 644 | 339 | for (int i = 1; i < len; ++i) { | 645 | 243 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 152 | T digit = s[i] - '0'; | 647 | 152 | val = val * 10 + digit; | 648 | 152 | } else { | 649 | 91 | if constexpr (enable_strict_mode) { | 650 | 91 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | 91 | *result = PARSE_FAILURE; | 652 | 91 | return 0; | 653 | 91 | } | 654 | | } else { | 655 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | | !is_float_suffix(s + i, len - i)))) { | 657 | | *result = PARSE_FAILURE; | 658 | | return 0; | 659 | | } | 660 | | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 91 | return val; | 663 | 91 | } | 664 | 243 | } | 665 | 96 | *result = PARSE_SUCCESS; | 666 | 96 | return val; | 667 | 187 | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 402 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 402 | T val = 0; | 633 | 402 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 402 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 369 | val = s[0] - '0'; | 640 | 369 | } else { | 641 | 33 | *result = PARSE_FAILURE; | 642 | 33 | return 0; | 643 | 33 | } | 644 | 1.10k | for (int i = 1; i < len; ++i) { | 645 | 990 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 734 | T digit = s[i] - '0'; | 647 | 734 | val = val * 10 + digit; | 648 | 734 | } else { | 649 | 256 | if constexpr (enable_strict_mode) { | 650 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | 256 | *result = PARSE_FAILURE; | 652 | 256 | return 0; | 653 | 256 | } | 654 | | } else { | 655 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | | !is_float_suffix(s + i, len - i)))) { | 657 | | *result = PARSE_FAILURE; | 658 | | return 0; | 659 | | } | 660 | | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 256 | return val; | 663 | 256 | } | 664 | 990 | } | 665 | 113 | *result = PARSE_SUCCESS; | 666 | 113 | return val; | 667 | 369 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 401 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 401 | T val = 0; | 633 | 401 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 401 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 368 | val = s[0] - '0'; | 640 | 368 | } else { | 641 | 33 | *result = PARSE_FAILURE; | 642 | 33 | return 0; | 643 | 33 | } | 644 | 1.09k | for (int i = 1; i < len; ++i) { | 645 | 981 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 725 | T digit = s[i] - '0'; | 647 | 725 | val = val * 10 + digit; | 648 | 725 | } else { | 649 | 256 | if constexpr (enable_strict_mode) { | 650 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | 256 | *result = PARSE_FAILURE; | 652 | 256 | return 0; | 653 | 256 | } | 654 | | } else { | 655 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | | !is_float_suffix(s + i, len - i)))) { | 657 | | *result = PARSE_FAILURE; | 658 | | return 0; | 659 | | } | 660 | | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 256 | return val; | 663 | 256 | } | 664 | 981 | } | 665 | 112 | *result = PARSE_SUCCESS; | 666 | 112 | return val; | 667 | 368 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 631 | 4 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 632 | 4 | T val = 0; | 633 | 4 | if (UNLIKELY(len == 0)) { | 634 | 0 | *result = PARSE_SUCCESS; | 635 | 0 | return val; | 636 | 0 | } | 637 | | // Factor out the first char for error handling speeds up the loop. | 638 | 4 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 639 | 4 | val = s[0] - '0'; | 640 | 4 | } else { | 641 | 0 | *result = PARSE_FAILURE; | 642 | 0 | return 0; | 643 | 0 | } | 644 | 4 | for (int i = 1; i < len; ++i) { | 645 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 646 | 0 | T digit = s[i] - '0'; | 647 | 0 | val = val * 10 + digit; | 648 | 0 | } else { | 649 | | if constexpr (enable_strict_mode) { | 650 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 651 | | *result = PARSE_FAILURE; | 652 | | return 0; | 653 | | } | 654 | 0 | } else { | 655 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 656 | 0 | !is_float_suffix(s + i, len - i)))) { | 657 | 0 | *result = PARSE_FAILURE; | 658 | 0 | return 0; | 659 | 0 | } | 660 | 0 | } | 661 | 0 | *result = PARSE_SUCCESS; | 662 | 0 | return val; | 663 | 0 | } | 664 | 0 | } | 665 | 4 | *result = PARSE_SUCCESS; | 666 | 4 | return val; | 667 | 4 | } |
|
668 | | |
669 | | // at least the first char(if any) must be a digit. |
670 | | template <typename T> |
671 | | T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
672 | 100 | ParseResult* result) { |
673 | 100 | T val = 0; |
674 | 100 | if (max_len == 0) [[unlikely]] { |
675 | 10 | *result = PARSE_SUCCESS; |
676 | 10 | return val; |
677 | 10 | } |
678 | | // Factor out the first char for error handling speeds up the loop. |
679 | 90 | if (is_numeric_ascii(s[0])) [[likely]] { |
680 | 90 | val = s[0] - '0'; |
681 | 90 | } else { |
682 | 0 | *result = PARSE_FAILURE; |
683 | 0 | return 0; |
684 | 0 | } |
685 | 400 | for (int i = 1; i < max_len; ++i) { |
686 | 310 | if (is_numeric_ascii(s[i])) [[likely]] { |
687 | 310 | T digit = s[i] - '0'; |
688 | 310 | val = val * 10 + digit; |
689 | 310 | } else { |
690 | | // 123abc, return 123 |
691 | 0 | *result = PARSE_SUCCESS; |
692 | 0 | return val; |
693 | 0 | } |
694 | 310 | } |
695 | 90 | *result = PARSE_SUCCESS; |
696 | 90 | return val; |
697 | 90 | } |
698 | | |
699 | | template <typename T> |
700 | 154k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
701 | 154k | int i = 0; |
702 | | // skip leading spaces |
703 | 154k | for (; i < len; ++i) { |
704 | 154k | if (!is_whitespace_ascii(s[i])) { |
705 | 154k | break; |
706 | 154k | } |
707 | 154k | } |
708 | | |
709 | | // skip back spaces |
710 | 154k | int j = len - 1; |
711 | 154k | for (; j >= i; j--) { |
712 | 154k | if (!is_whitespace_ascii(s[j])) { |
713 | 154k | break; |
714 | 154k | } |
715 | 154k | } |
716 | | |
717 | | // skip leading '+', from_chars can handle '-' |
718 | 154k | if (i < len && s[i] == '+') { |
719 | 7.08k | i++; |
720 | | // ++ or +- are not valid, but the first + is already skipped, |
721 | | // if don't check here, from_chars will succeed. |
722 | | // |
723 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' |
724 | | // which may avoid this extra check here. |
725 | | // e.g.: |
726 | | // fast_float::chars_format format = |
727 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; |
728 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); |
729 | 7.08k | if (i < len && (s[i] == '+' || s[i] == '-')) { |
730 | 20 | *result = PARSE_FAILURE; |
731 | 20 | return 0; |
732 | 20 | } |
733 | 7.08k | } |
734 | 154k | if (UNLIKELY(i > j)) { |
735 | 32 | *result = PARSE_FAILURE; |
736 | 32 | return 0; |
737 | 32 | } |
738 | | |
739 | | // Use double here to not lose precision while accumulating the result |
740 | 154k | double val = 0; |
741 | 154k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
742 | | |
743 | 154k | if (res.ptr == s + j + 1) { |
744 | 149k | *result = PARSE_SUCCESS; |
745 | 149k | return val; |
746 | 149k | } else { |
747 | 4.72k | *result = PARSE_FAILURE; |
748 | 4.72k | } |
749 | 4.72k | return 0; |
750 | 154k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 700 | 88.5k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 701 | 88.5k | int i = 0; | 702 | | // skip leading spaces | 703 | 88.5k | for (; i < len; ++i) { | 704 | 88.5k | if (!is_whitespace_ascii(s[i])) { | 705 | 88.5k | break; | 706 | 88.5k | } | 707 | 88.5k | } | 708 | | | 709 | | // skip back spaces | 710 | 88.5k | int j = len - 1; | 711 | 88.5k | for (; j >= i; j--) { | 712 | 88.5k | if (!is_whitespace_ascii(s[j])) { | 713 | 88.5k | break; | 714 | 88.5k | } | 715 | 88.5k | } | 716 | | | 717 | | // skip leading '+', from_chars can handle '-' | 718 | 88.5k | if (i < len && s[i] == '+') { | 719 | 3.54k | i++; | 720 | | // ++ or +- are not valid, but the first + is already skipped, | 721 | | // if don't check here, from_chars will succeed. | 722 | | // | 723 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 724 | | // which may avoid this extra check here. | 725 | | // e.g.: | 726 | | // fast_float::chars_format format = | 727 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 728 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 729 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 730 | 10 | *result = PARSE_FAILURE; | 731 | 10 | return 0; | 732 | 10 | } | 733 | 3.54k | } | 734 | 88.5k | if (UNLIKELY(i > j)) { | 735 | 18 | *result = PARSE_FAILURE; | 736 | 18 | return 0; | 737 | 18 | } | 738 | | | 739 | | // Use double here to not lose precision while accumulating the result | 740 | 88.5k | double val = 0; | 741 | 88.5k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 742 | | | 743 | 88.5k | if (res.ptr == s + j + 1) { | 744 | 86.1k | *result = PARSE_SUCCESS; | 745 | 86.1k | return val; | 746 | 86.1k | } else { | 747 | 2.38k | *result = PARSE_FAILURE; | 748 | 2.38k | } | 749 | 2.38k | return 0; | 750 | 88.5k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 700 | 65.9k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 701 | 65.9k | int i = 0; | 702 | | // skip leading spaces | 703 | 65.9k | for (; i < len; ++i) { | 704 | 65.9k | if (!is_whitespace_ascii(s[i])) { | 705 | 65.9k | break; | 706 | 65.9k | } | 707 | 65.9k | } | 708 | | | 709 | | // skip back spaces | 710 | 65.9k | int j = len - 1; | 711 | 65.9k | for (; j >= i; j--) { | 712 | 65.9k | if (!is_whitespace_ascii(s[j])) { | 713 | 65.9k | break; | 714 | 65.9k | } | 715 | 65.9k | } | 716 | | | 717 | | // skip leading '+', from_chars can handle '-' | 718 | 65.9k | if (i < len && s[i] == '+') { | 719 | 3.54k | i++; | 720 | | // ++ or +- are not valid, but the first + is already skipped, | 721 | | // if don't check here, from_chars will succeed. | 722 | | // | 723 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 724 | | // which may avoid this extra check here. | 725 | | // e.g.: | 726 | | // fast_float::chars_format format = | 727 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 728 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 729 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 730 | 10 | *result = PARSE_FAILURE; | 731 | 10 | return 0; | 732 | 10 | } | 733 | 3.54k | } | 734 | 65.9k | if (UNLIKELY(i > j)) { | 735 | 14 | *result = PARSE_FAILURE; | 736 | 14 | return 0; | 737 | 14 | } | 738 | | | 739 | | // Use double here to not lose precision while accumulating the result | 740 | 65.9k | double val = 0; | 741 | 65.9k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 742 | | | 743 | 65.9k | if (res.ptr == s + j + 1) { | 744 | 63.6k | *result = PARSE_SUCCESS; | 745 | 63.6k | return val; | 746 | 63.6k | } else { | 747 | 2.34k | *result = PARSE_FAILURE; | 748 | 2.34k | } | 749 | 2.34k | return 0; | 750 | 65.9k | } |
|
751 | | |
752 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
753 | 11.3k | ParseResult* result) { |
754 | 11.3k | *result = PARSE_SUCCESS; |
755 | | |
756 | 11.3k | if (len == 1) { |
757 | 2.66k | if (s[0] == '1' || s[0] == 't' || s[0] == 'T') { |
758 | 333 | return true; |
759 | 333 | } |
760 | 2.32k | if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') { |
761 | 934 | return false; |
762 | 934 | } |
763 | 1.39k | *result = PARSE_FAILURE; |
764 | 1.39k | return false; |
765 | 2.32k | } |
766 | | |
767 | 8.71k | if (len == 2) { |
768 | 975 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { |
769 | 10 | return true; |
770 | 10 | } |
771 | 965 | if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) { |
772 | 9 | return false; |
773 | 9 | } |
774 | 965 | } |
775 | | |
776 | 8.69k | if (len == 3) { |
777 | 42 | if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') && |
778 | 42 | (s[2] == 's' || s[2] == 'S')) { |
779 | 10 | return true; |
780 | 10 | } |
781 | 32 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') && |
782 | 32 | (s[2] == 'f' || s[2] == 'F')) { |
783 | 9 | return false; |
784 | 9 | } |
785 | 32 | } |
786 | | |
787 | 8.67k | if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') && |
788 | 8.67k | (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) { |
789 | 3.38k | return true; |
790 | 3.38k | } |
791 | | |
792 | 5.29k | if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') && |
793 | 5.29k | (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') && |
794 | 5.29k | (s[4] == 'e' || s[4] == 'E')) { |
795 | 3.42k | return false; |
796 | 3.42k | } |
797 | | |
798 | | // No valid boolean value found |
799 | 1.87k | *result = PARSE_FAILURE; |
800 | 1.87k | return false; |
801 | 5.29k | } |
802 | | #include "common/compile_check_avoid_end.h" |
803 | | } // end namespace doris |