/root/doris/be/src/util/string_parser.hpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | |
27 | | #include <algorithm> |
28 | | #include <cstdlib> |
29 | | // IWYU pragma: no_include <bits/std_abs.h> |
30 | | #include <cmath> // IWYU pragma: keep |
31 | | #include <cstdint> |
32 | | #include <limits> |
33 | | #include <map> |
34 | | #include <string> |
35 | | #include <type_traits> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/compiler_util.h" // IWYU pragma: keep |
39 | | #include "common/status.h" |
40 | | #include "runtime/large_int_value.h" |
41 | | #include "runtime/primitive_type.h" |
42 | | #include "vec/common/int_exp.h" |
43 | | #include "vec/common/string_utils/string_utils.h" |
44 | | #include "vec/core/extended_types.h" |
45 | | #include "vec/data_types/number_traits.h" |
46 | | |
47 | | namespace doris { |
48 | | namespace vectorized { |
49 | | template <DecimalNativeTypeConcept T> |
50 | | struct Decimal; |
51 | | } // namespace vectorized |
52 | | |
53 | | #define RETURN_INVALID_ARG_IF_NOT(stmt, ...) \ |
54 | 1.57k | do { \ |
55 | 1.57k | if (UNLIKELY(!(stmt))) { \ |
56 | 55 | return Status::InvalidArgument(__VA_ARGS__); \ |
57 | 55 | } \ |
58 | 1.57k | } while (false) |
59 | | |
60 | | // skip leading and trailing ascii whitespaces, |
61 | | // return the pointer to the first non-whitespace char, |
62 | | // and update the len to the new length, which does not include |
63 | | // leading and trailing whitespaces |
64 | | template <typename T> |
65 | 901k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { |
66 | 1.47M | while (len > 0 && is_whitespace_ascii(*s)) { |
67 | 573k | ++s; |
68 | 573k | --len; |
69 | 573k | } |
70 | | |
71 | 1.46M | while (len > 0 && is_whitespace_ascii(s[len - 1])) { |
72 | 563k | --len; |
73 | 563k | } |
74 | | |
75 | 901k | return s; |
76 | 901k | } _ZN5doris22skip_ascii_whitespacesImEEPKcS2_RT_ Line | Count | Source | 65 | 520k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 66 | 688k | while (len > 0 && is_whitespace_ascii(*s)) { | 67 | 168k | ++s; | 68 | 168k | --len; | 69 | 168k | } | 70 | | | 71 | 681k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 72 | 161k | --len; | 73 | 161k | } | 74 | | | 75 | 520k | return s; | 76 | 520k | } |
_ZN5doris22skip_ascii_whitespacesIiEEPKcS2_RT_ Line | Count | Source | 65 | 353k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 66 | 686k | while (len > 0 && is_whitespace_ascii(*s)) { | 67 | 333k | ++s; | 68 | 333k | --len; | 69 | 333k | } | 70 | | | 71 | 684k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 72 | 330k | --len; | 73 | 330k | } | 74 | | | 75 | 353k | return s; | 76 | 353k | } |
_ZN5doris22skip_ascii_whitespacesIlEEPKcS2_RT_ Line | Count | Source | 65 | 27.8k | inline const char* skip_ascii_whitespaces(const char* s, T& len) { | 66 | 100k | while (len > 0 && is_whitespace_ascii(*s)) { | 67 | 72.4k | ++s; | 68 | 72.4k | --len; | 69 | 72.4k | } | 70 | | | 71 | 99.8k | while (len > 0 && is_whitespace_ascii(s[len - 1])) { | 72 | 72.0k | --len; | 73 | 72.0k | } | 74 | | | 75 | 27.8k | return s; | 76 | 27.8k | } |
|
77 | | |
78 | | template <bool (*Pred)(char)> |
79 | 259 | bool range_suite(const char* s, const char* end) { |
80 | 259 | return std::ranges::all_of(s, end, Pred); |
81 | 259 | } |
82 | | |
83 | | inline auto is_digit_range = range_suite<is_numeric_ascii>; |
84 | | |
85 | 552 | inline Status assert_within_bound(const char* s, const char* end, size_t offset) { |
86 | 552 | if (s + offset >= end) [[unlikely]] { |
87 | 44 | return Status::InvalidArgument( |
88 | 44 | "StringParser: failed because we need at least {} but only got '{}'", offset, |
89 | 44 | std::string {s, end}); |
90 | 44 | } |
91 | 508 | return Status::OK(); |
92 | 552 | } |
93 | | |
94 | | // LEN = 0 means any length(include zero). LEN = 1 means only one character. so on. LEN = -x means x or more. |
95 | | // if need result, use StringRef{origin_s, s} outside |
96 | | template <int LEN, bool (*Pred)(char)> |
97 | 1.33k | Status skip_qualified_char(const char*& s, const char* end) { |
98 | 1.33k | if constexpr (LEN == 0) { |
99 | | // Consume any length of characters that match the predicate. |
100 | 1.75k | while (s != end && Pred(*s)) { |
101 | 950 | ++s; |
102 | 950 | } |
103 | 807 | } else if constexpr (LEN > 0) { |
104 | | // Consume exactly LEN characters that match the predicate. |
105 | 928 | for (int i = 0; i < LEN; ++i, ++s) { |
106 | 497 | if (s == end || !Pred(*s)) [[unlikely]] { |
107 | 66 | return Status::InvalidArgument( |
108 | 66 | "StringParser: failed to consume {} characters, got '{}'", LEN - i, |
109 | 66 | std::string {s, end}); |
110 | 66 | } |
111 | 497 | } |
112 | 497 | } else { // LEN < 0 |
113 | | // Consume at least -LEN characters that match the predicate. |
114 | 28 | int count = 0; |
115 | 170 | while (s != end && Pred(*s)) { |
116 | 142 | ++s; |
117 | 142 | ++count; |
118 | 142 | } |
119 | 28 | if (count < -LEN) [[unlikely]] { |
120 | 0 | return Status::InvalidArgument( |
121 | 0 | "StringParser: failed to consume at least {} characters, got '{}'", |
122 | 0 | -LEN - count, std::string {s, end}); |
123 | 0 | } |
124 | 28 | } |
125 | 459 | return Status::OK(); |
126 | 1.33k | } _ZN5doris19skip_qualified_charILi0EXadL_Z19is_whitespace_asciicEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 406 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | 406 | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | 421 | while (s != end && Pred(*s)) { | 101 | 15 | ++s; | 102 | 15 | } | 103 | | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | | for (int i = 0; i < LEN; ++i, ++s) { | 106 | | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | | return Status::InvalidArgument( | 108 | | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | | std::string {s, end}); | 110 | | } | 111 | | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 406 | return Status::OK(); | 126 | 406 | } |
_ZN5doris19skip_qualified_charILi0EXadL_Z16is_numeric_asciicEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 401 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | 401 | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | 1.33k | while (s != end && Pred(*s)) { | 101 | 935 | ++s; | 102 | 935 | } | 103 | | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | | for (int i = 0; i < LEN; ++i, ++s) { | 106 | | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | | return Status::InvalidArgument( | 108 | | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | | std::string {s, end}); | 110 | | } | 111 | | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 401 | return Status::OK(); | 126 | 401 | } |
_ZN5doris19skip_qualified_charILin1EXadL_Z23is_not_whitespace_asciicEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 28 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | | while (s != end && Pred(*s)) { | 101 | | ++s; | 102 | | } | 103 | | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | | for (int i = 0; i < LEN; ++i, ++s) { | 106 | | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | | return Status::InvalidArgument( | 108 | | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | | std::string {s, end}); | 110 | | } | 111 | | } | 112 | 28 | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | 28 | int count = 0; | 115 | 170 | while (s != end && Pred(*s)) { | 116 | 142 | ++s; | 117 | 142 | ++count; | 118 | 142 | } | 119 | 28 | if (count < -LEN) [[unlikely]] { | 120 | 0 | return Status::InvalidArgument( | 121 | 0 | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | 0 | -LEN - count, std::string {s, end}); | 123 | 0 | } | 124 | 28 | } | 125 | 28 | return Status::OK(); | 126 | 28 | } |
Unexecuted instantiation: _ZN5doris19skip_qualified_charILi1EXadL_Z14is_slash_asciicEEEENS_6StatusERPKcS3_ _ZN5doris19skip_qualified_charILi1EXadL_Z12is_non_alnumcEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 176 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | | while (s != end && Pred(*s)) { | 101 | | ++s; | 102 | | } | 103 | 176 | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | 324 | for (int i = 0; i < LEN; ++i, ++s) { | 106 | 176 | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | 28 | return Status::InvalidArgument( | 108 | 28 | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | 28 | std::string {s, end}); | 110 | 28 | } | 111 | 176 | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 148 | return Status::OK(); | 126 | 176 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_12is_delimiterEcEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 111 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | | while (s != end && Pred(*s)) { | 101 | | ++s; | 102 | | } | 103 | 111 | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | 212 | for (int i = 0; i < LEN; ++i, ++s) { | 106 | 111 | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | 10 | return Status::InvalidArgument( | 108 | 10 | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | 10 | std::string {s, end}); | 110 | 10 | } | 111 | 111 | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 101 | return Status::OK(); | 126 | 111 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_6is_barEcEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 136 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | | while (s != end && Pred(*s)) { | 101 | | ++s; | 102 | | } | 103 | 136 | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | 256 | for (int i = 0; i < LEN; ++i, ++s) { | 106 | 136 | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | 16 | return Status::InvalidArgument( | 108 | 16 | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | 16 | std::string {s, end}); | 110 | 16 | } | 111 | 136 | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 120 | return Status::OK(); | 126 | 136 | } |
_ZN5doris19skip_qualified_charILi1EXadL_ZNS_8is_colonEcEEEENS_6StatusERPKcS3_ Line | Count | Source | 97 | 74 | Status skip_qualified_char(const char*& s, const char* end) { | 98 | | if constexpr (LEN == 0) { | 99 | | // Consume any length of characters that match the predicate. | 100 | | while (s != end && Pred(*s)) { | 101 | | ++s; | 102 | | } | 103 | 74 | } else if constexpr (LEN > 0) { | 104 | | // Consume exactly LEN characters that match the predicate. | 105 | 136 | for (int i = 0; i < LEN; ++i, ++s) { | 106 | 74 | if (s == end || !Pred(*s)) [[unlikely]] { | 107 | 12 | return Status::InvalidArgument( | 108 | 12 | "StringParser: failed to consume {} characters, got '{}'", LEN - i, | 109 | 12 | std::string {s, end}); | 110 | 12 | } | 111 | 74 | } | 112 | | } else { // LEN < 0 | 113 | | // Consume at least -LEN characters that match the predicate. | 114 | | int count = 0; | 115 | | while (s != end && Pred(*s)) { | 116 | | ++s; | 117 | | ++count; | 118 | | } | 119 | | if (count < -LEN) [[unlikely]] { | 120 | | return Status::InvalidArgument( | 121 | | "StringParser: failed to consume at least {} characters, got '{}'", | 122 | | -LEN - count, std::string {s, end}); | 123 | | } | 124 | | } | 125 | 62 | return Status::OK(); | 126 | 74 | } |
|
127 | | |
128 | | inline auto skip_any_whitespace = skip_qualified_char<0, is_whitespace_ascii>; |
129 | | inline auto skip_any_digit = skip_qualified_char<0, is_numeric_ascii>; |
130 | | inline auto skip_tz_name_part = skip_qualified_char<-1, is_not_whitespace_ascii>; |
131 | | inline auto skip_one_slash = skip_qualified_char<1, is_slash_ascii>; |
132 | | inline auto skip_one_non_alnum = skip_qualified_char<1, is_non_alnum>; |
133 | | |
134 | 111 | inline bool is_delimiter(char c) { |
135 | 111 | return c == ' ' || c == 'T'; |
136 | 111 | } |
137 | | inline auto consume_one_delimiter = skip_qualified_char<1, is_delimiter>; |
138 | | |
139 | 136 | inline bool is_bar(char c) { |
140 | 136 | return c == '-'; |
141 | 136 | } |
142 | | inline auto consume_one_bar = skip_qualified_char<1, is_bar>; |
143 | | |
144 | 74 | inline bool is_colon(char c) { |
145 | 74 | return c == ':'; |
146 | 74 | } |
147 | | inline auto consume_one_colon = skip_qualified_char<1, is_colon>; |
148 | | |
149 | | // only consume a string of digit, not include sign. |
150 | | // when has MAX_LEN > 0, do greedy match but at most MAX_LEN. |
151 | | // LEN = 0 means any length, otherwise(must > 0) it means exactly LEN digits. |
152 | | template <typename T, int LEN = 0, int MAX_LEN = -1> |
153 | 1.12k | Status consume_digit(const char*& s, const char* end, T& out) { |
154 | 1.12k | static_assert(LEN >= 0); |
155 | 1.12k | if constexpr (MAX_LEN > 0) { |
156 | 653 | out = 0; |
157 | 1.89k | for (int i = 0; i < MAX_LEN; ++i, ++s) { |
158 | 1.30k | if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] { |
159 | 69 | if (i < LEN) [[unlikely]] { |
160 | 0 | return Status::InvalidArgument( |
161 | 0 | "StringParser: got \"{}\" before get at least {} digit", |
162 | 0 | std::string {s, end}, LEN - i); |
163 | 0 | } |
164 | 69 | break; // stop consuming if we have consumed enough digits. |
165 | 69 | } |
166 | 1.23k | out = out * 10 + (*s - '0'); |
167 | 1.23k | } |
168 | | } else if constexpr (LEN == 0) { |
169 | | // Consume any length of digits. |
170 | | out = 0; |
171 | | while (s != end && is_numeric_ascii(*s)) { |
172 | | out = out * 10 + (*s - '0'); |
173 | | ++s; |
174 | | } |
175 | 475 | } else if constexpr (LEN > 0) { |
176 | | // Consume exactly LEN digits. |
177 | 475 | out = 0; |
178 | 1.37k | for (int i = 0; i < LEN; ++i, ++s) { |
179 | 927 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { |
180 | 29 | return Status::InvalidArgument( |
181 | 29 | "StringParser: failed to consume {} digits, got '{}'", LEN - i, |
182 | 29 | std::string {s, end}); |
183 | 29 | } |
184 | 898 | out = out * 10 + (*s - '0'); |
185 | 898 | } |
186 | 475 | } |
187 | 1.09k | return Status::OK(); |
188 | 1.12k | } _ZN5doris13consume_digitIjLi2ELin1EEENS_6StatusERPKcS3_RT_ Line | Count | Source | 153 | 469 | Status consume_digit(const char*& s, const char* end, T& out) { | 154 | 469 | static_assert(LEN >= 0); | 155 | | if constexpr (MAX_LEN > 0) { | 156 | | out = 0; | 157 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 158 | | if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] { | 159 | | if (i < LEN) [[unlikely]] { | 160 | | return Status::InvalidArgument( | 161 | | "StringParser: got \"{}\" before get at least {} digit", | 162 | | std::string {s, end}, LEN - i); | 163 | | } | 164 | | break; // stop consuming if we have consumed enough digits. | 165 | | } | 166 | | out = out * 10 + (*s - '0'); | 167 | | } | 168 | | } else if constexpr (LEN == 0) { | 169 | | // Consume any length of digits. | 170 | | out = 0; | 171 | | while (s != end && is_numeric_ascii(*s)) { | 172 | | out = out * 10 + (*s - '0'); | 173 | | ++s; | 174 | | } | 175 | 469 | } else if constexpr (LEN > 0) { | 176 | | // Consume exactly LEN digits. | 177 | 469 | out = 0; | 178 | 1.35k | for (int i = 0; i < LEN; ++i, ++s) { | 179 | 915 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 180 | 29 | return Status::InvalidArgument( | 181 | 29 | "StringParser: failed to consume {} digits, got '{}'", LEN - i, | 182 | 29 | std::string {s, end}); | 183 | 29 | } | 184 | 886 | out = out * 10 + (*s - '0'); | 185 | 886 | } | 186 | 469 | } | 187 | 440 | return Status::OK(); | 188 | 469 | } |
_ZN5doris13consume_digitIjLi1ELi2EEENS_6StatusERPKcS3_RT_ Line | Count | Source | 153 | 653 | Status consume_digit(const char*& s, const char* end, T& out) { | 154 | 653 | static_assert(LEN >= 0); | 155 | 653 | if constexpr (MAX_LEN > 0) { | 156 | 653 | out = 0; | 157 | 1.89k | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 158 | 1.30k | if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] { | 159 | 69 | if (i < LEN) [[unlikely]] { | 160 | 0 | return Status::InvalidArgument( | 161 | 0 | "StringParser: got \"{}\" before get at least {} digit", | 162 | 0 | std::string {s, end}, LEN - i); | 163 | 0 | } | 164 | 69 | break; // stop consuming if we have consumed enough digits. | 165 | 69 | } | 166 | 1.23k | out = out * 10 + (*s - '0'); | 167 | 1.23k | } | 168 | | } else if constexpr (LEN == 0) { | 169 | | // Consume any length of digits. | 170 | | out = 0; | 171 | | while (s != end && is_numeric_ascii(*s)) { | 172 | | out = out * 10 + (*s - '0'); | 173 | | ++s; | 174 | | } | 175 | | } else if constexpr (LEN > 0) { | 176 | | // Consume exactly LEN digits. | 177 | | out = 0; | 178 | | for (int i = 0; i < LEN; ++i, ++s) { | 179 | | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 180 | | return Status::InvalidArgument( | 181 | | "StringParser: failed to consume {} digits, got '{}'", LEN - i, | 182 | | std::string {s, end}); | 183 | | } | 184 | | out = out * 10 + (*s - '0'); | 185 | | } | 186 | | } | 187 | 653 | return Status::OK(); | 188 | 653 | } |
_ZN5doris13consume_digitIjLi1ELin1EEENS_6StatusERPKcS3_RT_ Line | Count | Source | 153 | 4 | Status consume_digit(const char*& s, const char* end, T& out) { | 154 | 4 | static_assert(LEN >= 0); | 155 | | if constexpr (MAX_LEN > 0) { | 156 | | out = 0; | 157 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 158 | | if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] { | 159 | | if (i < LEN) [[unlikely]] { | 160 | | return Status::InvalidArgument( | 161 | | "StringParser: got \"{}\" before get at least {} digit", | 162 | | std::string {s, end}, LEN - i); | 163 | | } | 164 | | break; // stop consuming if we have consumed enough digits. | 165 | | } | 166 | | out = out * 10 + (*s - '0'); | 167 | | } | 168 | | } else if constexpr (LEN == 0) { | 169 | | // Consume any length of digits. | 170 | | out = 0; | 171 | | while (s != end && is_numeric_ascii(*s)) { | 172 | | out = out * 10 + (*s - '0'); | 173 | | ++s; | 174 | | } | 175 | 4 | } else if constexpr (LEN > 0) { | 176 | | // Consume exactly LEN digits. | 177 | 4 | out = 0; | 178 | 8 | for (int i = 0; i < LEN; ++i, ++s) { | 179 | 4 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 180 | 0 | return Status::InvalidArgument( | 181 | 0 | "StringParser: failed to consume {} digits, got '{}'", LEN - i, | 182 | 0 | std::string {s, end}); | 183 | 0 | } | 184 | 4 | out = out * 10 + (*s - '0'); | 185 | 4 | } | 186 | 4 | } | 187 | 4 | return Status::OK(); | 188 | 4 | } |
_ZN5doris13consume_digitIjLi4ELin1EEENS_6StatusERPKcS3_RT_ Line | Count | Source | 153 | 2 | Status consume_digit(const char*& s, const char* end, T& out) { | 154 | 2 | static_assert(LEN >= 0); | 155 | | if constexpr (MAX_LEN > 0) { | 156 | | out = 0; | 157 | | for (int i = 0; i < MAX_LEN; ++i, ++s) { | 158 | | if ((s == end || !is_numeric_ascii(*s))) [[unlikely]] { | 159 | | if (i < LEN) [[unlikely]] { | 160 | | return Status::InvalidArgument( | 161 | | "StringParser: got \"{}\" before get at least {} digit", | 162 | | std::string {s, end}, LEN - i); | 163 | | } | 164 | | break; // stop consuming if we have consumed enough digits. | 165 | | } | 166 | | out = out * 10 + (*s - '0'); | 167 | | } | 168 | | } else if constexpr (LEN == 0) { | 169 | | // Consume any length of digits. | 170 | | out = 0; | 171 | | while (s != end && is_numeric_ascii(*s)) { | 172 | | out = out * 10 + (*s - '0'); | 173 | | ++s; | 174 | | } | 175 | 2 | } else if constexpr (LEN > 0) { | 176 | | // Consume exactly LEN digits. | 177 | 2 | out = 0; | 178 | 10 | for (int i = 0; i < LEN; ++i, ++s) { | 179 | 8 | if (s == end || !is_numeric_ascii(*s)) [[unlikely]] { | 180 | 0 | return Status::InvalidArgument( | 181 | 0 | "StringParser: failed to consume {} digits, got '{}'", LEN - i, | 182 | 0 | std::string {s, end}); | 183 | 0 | } | 184 | 8 | out = out * 10 + (*s - '0'); | 185 | 8 | } | 186 | 2 | } | 187 | 2 | return Status::OK(); | 188 | 2 | } |
|
189 | | |
190 | | template <bool (*Pred)(char)> |
191 | 28 | uint32_t count_valid_length(const char* s, const char* end) { |
192 | 28 | DCHECK(s <= end) << "s: " << s << ", end: " << end; |
193 | 28 | uint32_t count = 0; |
194 | 86 | while (s != end && Pred(*s)) { |
195 | 58 | ++count; |
196 | 58 | ++s; |
197 | 58 | } |
198 | 28 | return count; |
199 | 28 | } |
200 | | |
201 | | inline auto count_digits = count_valid_length<is_numeric_ascii>; |
202 | | |
203 | 20 | inline PURE std::string combine_tz_offset(char sign, uint32_t hour_offset, uint32_t minute_offset) { |
204 | 20 | std::string result(6, '0'); |
205 | 20 | result[0] = sign; |
206 | 20 | result[1] = '0' + (hour_offset / 10); |
207 | 20 | result[2] = '0' + (hour_offset % 10); |
208 | 20 | result[3] = ':'; |
209 | 20 | result[4] = '0' + (minute_offset / 10); |
210 | 20 | result[5] = '0' + (minute_offset % 10); |
211 | 20 | DCHECK_EQ(result.size(), 6); |
212 | 20 | return result; |
213 | 20 | } |
214 | | |
215 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
216 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
217 | | // |
218 | | // Strings with leading and trailing whitespaces are accepted. |
219 | | // Branching is heavily optimized for the non-whitespace successful case. |
220 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
221 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
222 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
223 | | // |
224 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
225 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
226 | | // and inf/-inf for float types. |
227 | | // |
228 | | // Things we tried that did not work: |
229 | | // - lookup table for converting character to digit |
230 | | // Improvements (TODO): |
231 | | // - Validate input using _simd_compare_ranges |
232 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
233 | | class StringParser { |
234 | | public: |
235 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
236 | | |
237 | | template <typename T> |
238 | 395k | static T numeric_limits(bool negative) { |
239 | 395k | if constexpr (std::is_same_v<T, __int128>) { |
240 | 46.3k | return negative ? MIN_INT128 : MAX_INT128; |
241 | 349k | } else { |
242 | 349k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
243 | 349k | } |
244 | 395k | } _ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 238 | 46.3k | static T numeric_limits(bool negative) { | 239 | 46.3k | if constexpr (std::is_same_v<T, __int128>) { | 240 | 46.3k | return negative ? MIN_INT128 : MAX_INT128; | 241 | | } else { | 242 | | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | | } | 244 | 46.3k | } |
_ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 238 | 135k | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 135k | } else { | 242 | 135k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 135k | } | 244 | 135k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 238 | 68.3k | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 68.3k | } else { | 242 | 68.3k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 68.3k | } | 244 | 68.3k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 238 | 62.1k | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 62.1k | } else { | 242 | 62.1k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 62.1k | } | 244 | 62.1k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 238 | 82.8k | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 82.8k | } else { | 242 | 82.8k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 82.8k | } | 244 | 82.8k | } |
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Line | Count | Source | 238 | 4 | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 4 | } else { | 242 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 4 | } | 244 | 4 | } |
_ZN5doris12StringParser14numeric_limitsIoEET_b Line | Count | Source | 238 | 4 | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 4 | } else { | 242 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 4 | } | 244 | 4 | } |
_ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 238 | 21 | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 21 | } else { | 242 | 21 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 21 | } | 244 | 21 | } |
_ZN5doris12StringParser14numeric_limitsIjEET_b Line | Count | Source | 238 | 132 | static T numeric_limits(bool negative) { | 239 | | if constexpr (std::is_same_v<T, __int128>) { | 240 | | return negative ? MIN_INT128 : MAX_INT128; | 241 | 132 | } else { | 242 | 132 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 243 | 132 | } | 244 | 132 | } |
|
245 | | |
246 | | template <typename T> |
247 | 943k | static T get_scale_multiplier(int scale) { |
248 | 943k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
249 | 943k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
250 | 943k | "You can only instantiate as int32_t, int64_t, __int128."); |
251 | 943k | if constexpr (std::is_same_v<T, int32_t>) { |
252 | 133k | return common::exp10_i32(scale); |
253 | 183k | } else if constexpr (std::is_same_v<T, int64_t>) { |
254 | 183k | return common::exp10_i64(scale); |
255 | 239k | } else if constexpr (std::is_same_v<T, __int128>) { |
256 | 239k | return common::exp10_i128(scale); |
257 | 387k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
258 | 387k | return common::exp10_i256(scale); |
259 | 387k | } |
260 | 943k | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 247 | 133k | static T get_scale_multiplier(int scale) { | 248 | 133k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 249 | 133k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 250 | 133k | "You can only instantiate as int32_t, int64_t, __int128."); | 251 | 133k | if constexpr (std::is_same_v<T, int32_t>) { | 252 | 133k | return common::exp10_i32(scale); | 253 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 254 | | return common::exp10_i64(scale); | 255 | | } else if constexpr (std::is_same_v<T, __int128>) { | 256 | | return common::exp10_i128(scale); | 257 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 258 | | return common::exp10_i256(scale); | 259 | | } | 260 | 133k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 247 | 183k | static T get_scale_multiplier(int scale) { | 248 | 183k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 249 | 183k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 250 | 183k | "You can only instantiate as int32_t, int64_t, __int128."); | 251 | | if constexpr (std::is_same_v<T, int32_t>) { | 252 | | return common::exp10_i32(scale); | 253 | 183k | } else if constexpr (std::is_same_v<T, int64_t>) { | 254 | 183k | return common::exp10_i64(scale); | 255 | | } else if constexpr (std::is_same_v<T, __int128>) { | 256 | | return common::exp10_i128(scale); | 257 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 258 | | return common::exp10_i256(scale); | 259 | | } | 260 | 183k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 247 | 239k | static T get_scale_multiplier(int scale) { | 248 | 239k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 249 | 239k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 250 | 239k | "You can only instantiate as int32_t, int64_t, __int128."); | 251 | | if constexpr (std::is_same_v<T, int32_t>) { | 252 | | return common::exp10_i32(scale); | 253 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 254 | | return common::exp10_i64(scale); | 255 | 239k | } else if constexpr (std::is_same_v<T, __int128>) { | 256 | 239k | return common::exp10_i128(scale); | 257 | | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 258 | | return common::exp10_i256(scale); | 259 | | } | 260 | 239k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 247 | 387k | static T get_scale_multiplier(int scale) { | 248 | 387k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 249 | 387k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 250 | 387k | "You can only instantiate as int32_t, int64_t, __int128."); | 251 | | if constexpr (std::is_same_v<T, int32_t>) { | 252 | | return common::exp10_i32(scale); | 253 | | } else if constexpr (std::is_same_v<T, int64_t>) { | 254 | | return common::exp10_i64(scale); | 255 | | } else if constexpr (std::is_same_v<T, __int128>) { | 256 | | return common::exp10_i128(scale); | 257 | 387k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 258 | 387k | return common::exp10_i256(scale); | 259 | 387k | } | 260 | 387k | } |
|
261 | | |
262 | | // This is considerably faster than glibc's implementation (25x). |
263 | | // Assumes s represents a decimal number. |
264 | | template <typename T, bool enable_strict_mode = false> |
265 | 355k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
266 | 355k | s = skip_ascii_whitespaces(s, len); |
267 | 355k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); |
268 | 355k | } _ZN5doris12StringParser13string_to_intInLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 45.4k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 45.4k | s = skip_ascii_whitespaces(s, len); | 267 | 45.4k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 45.4k | } |
_ZN5doris12StringParser13string_to_intIaLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 95.7k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 95.7k | s = skip_ascii_whitespaces(s, len); | 267 | 95.7k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 95.7k | } |
_ZN5doris12StringParser13string_to_intIsLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 66.6k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 66.6k | s = skip_ascii_whitespaces(s, len); | 267 | 66.6k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 66.6k | } |
_ZN5doris12StringParser13string_to_intIiLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 61.6k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 61.6k | s = skip_ascii_whitespaces(s, len); | 267 | 61.6k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 61.6k | } |
_ZN5doris12StringParser13string_to_intIlLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 81.2k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 81.2k | s = skip_ascii_whitespaces(s, len); | 267 | 81.2k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 81.2k | } |
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEELb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 4 | s = skip_ascii_whitespaces(s, len); | 267 | 4 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 4 | } |
_ZN5doris12StringParser13string_to_intIoLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 4 | s = skip_ascii_whitespaces(s, len); | 267 | 4 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 4 | } |
_ZN5doris12StringParser13string_to_intImLb0EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 20 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 20 | s = skip_ascii_whitespaces(s, len); | 267 | 20 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 20 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjLb0EEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intIaLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 1.00k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 1.00k | s = skip_ascii_whitespaces(s, len); | 267 | 1.00k | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 1.00k | } |
_ZN5doris12StringParser13string_to_intIsLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 984 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 984 | s = skip_ascii_whitespaces(s, len); | 267 | 984 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 984 | } |
_ZN5doris12StringParser13string_to_intIiLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 968 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 968 | s = skip_ascii_whitespaces(s, len); | 267 | 968 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 968 | } |
_ZN5doris12StringParser13string_to_intIlLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 952 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 952 | s = skip_ascii_whitespaces(s, len); | 267 | 952 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 952 | } |
_ZN5doris12StringParser13string_to_intInLb1EEET_PKcmPNS0_11ParseResultE Line | Count | Source | 265 | 936 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 266 | 936 | s = skip_ascii_whitespaces(s, len); | 267 | 936 | return string_to_int_internal<T, enable_strict_mode>(s, len, result); | 268 | 936 | } |
|
269 | | |
270 | | // This is considerably faster than glibc's implementation. |
271 | | // In the case of overflow, the max/min value for the data type will be returned. |
272 | | // Assumes s represents a decimal number. |
273 | | template <typename T> |
274 | 1.37k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
275 | 1.37k | s = skip_ascii_whitespaces(s, len); |
276 | 1.37k | return string_to_unsigned_int_internal<T>(s, len, result); |
277 | 1.37k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 274 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 275 | 343 | s = skip_ascii_whitespaces(s, len); | 276 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 277 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 274 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 275 | 343 | s = skip_ascii_whitespaces(s, len); | 276 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 277 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 274 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 275 | 343 | s = skip_ascii_whitespaces(s, len); | 276 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 277 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 274 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 275 | 343 | s = skip_ascii_whitespaces(s, len); | 276 | 343 | return string_to_unsigned_int_internal<T>(s, len, result); | 277 | 343 | } |
|
278 | | |
279 | | // Convert a string s representing a number in given base into a decimal number. |
280 | | template <typename T> |
281 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
282 | 27.8k | ParseResult* result) { |
283 | 27.8k | s = skip_ascii_whitespaces(s, len); |
284 | 27.8k | return string_to_int_internal<T>(s, len, base, result); |
285 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 282 | 26.4k | ParseResult* result) { | 283 | 26.4k | s = skip_ascii_whitespaces(s, len); | 284 | 26.4k | return string_to_int_internal<T>(s, len, base, result); | 285 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 282 | 490 | ParseResult* result) { | 283 | 490 | s = skip_ascii_whitespaces(s, len); | 284 | 490 | return string_to_int_internal<T>(s, len, base, result); | 285 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 282 | 441 | ParseResult* result) { | 283 | 441 | s = skip_ascii_whitespaces(s, len); | 284 | 441 | return string_to_int_internal<T>(s, len, base, result); | 285 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 282 | 441 | ParseResult* result) { | 283 | 441 | s = skip_ascii_whitespaces(s, len); | 284 | 441 | return string_to_int_internal<T>(s, len, base, result); | 285 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 282 | 1 | ParseResult* result) { | 283 | 1 | s = skip_ascii_whitespaces(s, len); | 284 | 1 | return string_to_int_internal<T>(s, len, base, result); | 285 | 1 | } |
|
286 | | |
287 | | template <typename T> |
288 | 153k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
289 | 153k | s = skip_ascii_whitespaces(s, len); |
290 | 153k | return string_to_float_internal<T>(s, len, result); |
291 | 153k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 288 | 87.7k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 289 | 87.7k | s = skip_ascii_whitespaces(s, len); | 290 | 87.7k | return string_to_float_internal<T>(s, len, result); | 291 | 87.7k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 288 | 65.3k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 289 | 65.3k | s = skip_ascii_whitespaces(s, len); | 290 | 65.3k | return string_to_float_internal<T>(s, len, result); | 291 | 65.3k | } |
|
292 | | |
293 | | // Parses a string for 'true' or 'false', case insensitive. |
294 | 11.7k | static inline bool string_to_bool(const char* __restrict s, size_t len, ParseResult* result) { |
295 | 11.7k | s = skip_ascii_whitespaces(s, len); |
296 | 11.7k | return string_to_bool_internal(s, len, result); |
297 | 11.7k | } |
298 | | |
299 | | template <PrimitiveType P> |
300 | | static typename PrimitiveTypeTraits<P>::CppType::NativeType string_to_decimal( |
301 | | const char* __restrict s, int len, int type_precision, int type_scale, |
302 | | ParseResult* result); |
303 | | |
304 | | template <typename T> |
305 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
306 | | const T key_value_separator, |
307 | | std::map<std::string, std::string>* result) { |
308 | | int key_pos = 0; |
309 | | int key_end; |
310 | | int val_pos; |
311 | | int val_end; |
312 | | |
313 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
314 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
315 | | std::string::npos) { |
316 | | break; |
317 | | } |
318 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
319 | | val_end = base.size(); |
320 | | } |
321 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
322 | | base.substr(val_pos, val_end - val_pos))); |
323 | | key_pos = val_end; |
324 | | if (key_pos != std::string::npos) { |
325 | | ++key_pos; |
326 | | } |
327 | | } |
328 | | |
329 | | return Status::OK(); |
330 | | } |
331 | | |
332 | | // This is considerably faster than glibc's implementation. |
333 | | // In the case of overflow, the max/min value for the data type will be returned. |
334 | | // Assumes s represents a decimal number. |
335 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
336 | | template <typename T, bool enable_strict_mode = false> |
337 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
338 | | |
339 | | // This is considerably faster than glibc's implementation. |
340 | | // In the case of overflow, the max/min value for the data type will be returned. |
341 | | // Assumes s represents a decimal number. |
342 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
343 | | template <typename T> |
344 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
345 | | ParseResult* result); |
346 | | |
347 | | // Convert a string s representing a number in given base into a decimal number. |
348 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
349 | | template <typename T> |
350 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
351 | | ParseResult* result); |
352 | | |
353 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
354 | | // and the number is positive. |
355 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
356 | | template <typename T, bool enable_strict_mode = false> |
357 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
358 | | ParseResult* result); |
359 | | |
360 | | // zero length, or at least one legal digit. at most consume MAX_LEN digits and stop. or stop when next |
361 | | // char is not a digit. |
362 | | template <typename T> |
363 | | static inline T string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
364 | | ParseResult* result); |
365 | | |
366 | | // This is considerably faster than glibc's implementation (>100x why???) |
367 | | // No special case handling needs to be done for overflows, the floating point spec |
368 | | // already does it and will cap the values to -inf/inf |
369 | | // To avoid inaccurate conversions this function falls back to strtod for |
370 | | // scientific notation. |
371 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
372 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
373 | | template <typename T> |
374 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
375 | | ParseResult* result); |
376 | | |
377 | | // parses a string for 'true' or 'false', case insensitive |
378 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
379 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
380 | | ParseResult* result); |
381 | | |
382 | | // Returns true if s only contains whitespace. |
383 | 5.46k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
384 | 5.91k | for (int i = 0; i < len; ++i) { |
385 | 5.91k | if (!LIKELY(is_whitespace_ascii(s[i]))) { |
386 | 5.46k | return false; |
387 | 5.46k | } |
388 | 5.91k | } |
389 | 0 | return true; |
390 | 5.46k | } |
391 | | |
392 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
393 | 3.39k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
394 | 3.39k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
395 | 3.39k | } |
396 | | |
397 | 2.62k | static inline bool is_all_digit(const char* __restrict s, int len) { |
398 | 5.36k | for (int i = 0; i < len; ++i) { |
399 | 2.81k | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
400 | 81 | return false; |
401 | 81 | } |
402 | 2.81k | } |
403 | 2.54k | return true; |
404 | 2.62k | } |
405 | | }; // end of class StringParser |
406 | | |
407 | | template <typename T, bool enable_strict_mode> |
408 | 355k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
409 | 355k | if (UNLIKELY(len <= 0)) { |
410 | 1.33k | *result = PARSE_FAILURE; |
411 | 1.33k | return 0; |
412 | 1.33k | } |
413 | | |
414 | 354k | using UnsignedT = MakeUnsignedT<T>; |
415 | 354k | UnsignedT val = 0; |
416 | 354k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
417 | 354k | bool negative = false; |
418 | 354k | int i = 0; |
419 | 354k | switch (*s) { |
420 | 93.4k | case '-': |
421 | 93.4k | negative = true; |
422 | 93.4k | max_val += 1; |
423 | 93.4k | [[fallthrough]]; |
424 | 96.1k | case '+': |
425 | 96.1k | ++i; |
426 | | // only one '+'/'-' char, so could return failure directly |
427 | 96.1k | if (UNLIKELY(len == 1)) { |
428 | 1 | *result = PARSE_FAILURE; |
429 | 1 | return 0; |
430 | 1 | } |
431 | 354k | } |
432 | | |
433 | | // This is the fast path where the string cannot overflow. |
434 | 354k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
435 | 244k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); |
436 | 244k | return static_cast<T>(negative ? -val : val); |
437 | 244k | } |
438 | | |
439 | 109k | const T max_div_10 = max_val / 10; |
440 | 109k | const T max_mod_10 = max_val % 10; |
441 | | |
442 | 109k | int first = i; |
443 | 1.48M | for (; i < len; ++i) { |
444 | 1.41M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
445 | 1.40M | T digit = s[i] - '0'; |
446 | | // This is a tricky check to see if adding this digit will cause an overflow. |
447 | 1.40M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
448 | 31.1k | *result = PARSE_OVERFLOW; |
449 | 31.1k | return negative ? -max_val : max_val; |
450 | 31.1k | } |
451 | 1.37M | val = val * 10 + digit; |
452 | 1.37M | } else { |
453 | 3.60k | if constexpr (enable_strict_mode) { |
454 | 1.10k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
455 | | // Reject the string because the remaining chars are not all whitespace |
456 | 1.10k | *result = PARSE_FAILURE; |
457 | 1.10k | return 0; |
458 | 1.10k | } |
459 | 2.49k | } else { |
460 | 2.49k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && |
461 | 2.49k | !is_float_suffix(s + i, len - i))))) { |
462 | | // Reject the string because either the first char was not a digit, |
463 | | // or the remaining chars are not all whitespace |
464 | 1.48k | *result = PARSE_FAILURE; |
465 | 1.48k | return 0; |
466 | 1.48k | } |
467 | 2.49k | } |
468 | | // Returning here is slightly faster than breaking the loop. |
469 | 1.00k | *result = PARSE_SUCCESS; |
470 | 3.60k | return static_cast<T>(negative ? -val : val); |
471 | 3.60k | } |
472 | 1.41M | } |
473 | 75.1k | *result = PARSE_SUCCESS; |
474 | 75.1k | return static_cast<T>(negative ? -val : val); |
475 | 109k | } _ZN5doris12StringParser22string_to_int_internalInLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 45.4k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 45.4k | if (UNLIKELY(len <= 0)) { | 410 | 25 | *result = PARSE_FAILURE; | 411 | 25 | return 0; | 412 | 25 | } | 413 | | | 414 | 45.3k | using UnsignedT = MakeUnsignedT<T>; | 415 | 45.3k | UnsignedT val = 0; | 416 | 45.3k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 45.3k | bool negative = false; | 418 | 45.3k | int i = 0; | 419 | 45.3k | switch (*s) { | 420 | 3.47k | case '-': | 421 | 3.47k | negative = true; | 422 | 3.47k | max_val += 1; | 423 | 3.47k | [[fallthrough]]; | 424 | 3.70k | case '+': | 425 | 3.70k | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 3.70k | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 45.3k | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 45.3k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 41.2k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 41.2k | return static_cast<T>(negative ? -val : val); | 437 | 41.2k | } | 438 | | | 439 | 4.18k | const T max_div_10 = max_val / 10; | 440 | 4.18k | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 4.18k | int first = i; | 443 | 166k | for (; i < len; ++i) { | 444 | 162k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 162k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 162k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 339 | *result = PARSE_OVERFLOW; | 449 | 339 | return negative ? -max_val : max_val; | 450 | 339 | } | 451 | 162k | val = val * 10 + digit; | 452 | 162k | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 185 | } else { | 460 | 185 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 185 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 57 | *result = PARSE_FAILURE; | 465 | 57 | return 0; | 466 | 57 | } | 467 | 185 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 128 | *result = PARSE_SUCCESS; | 470 | 185 | return static_cast<T>(negative ? -val : val); | 471 | 185 | } | 472 | 162k | } | 473 | 3.65k | *result = PARSE_SUCCESS; | 474 | 3.65k | return static_cast<T>(negative ? -val : val); | 475 | 4.18k | } |
_ZN5doris12StringParser22string_to_int_internalIaLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 95.7k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 95.7k | if (UNLIKELY(len <= 0)) { | 410 | 207 | *result = PARSE_FAILURE; | 411 | 207 | return 0; | 412 | 207 | } | 413 | | | 414 | 95.5k | using UnsignedT = MakeUnsignedT<T>; | 415 | 95.5k | UnsignedT val = 0; | 416 | 95.5k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 95.5k | bool negative = false; | 418 | 95.5k | int i = 0; | 419 | 95.5k | switch (*s) { | 420 | 20.0k | case '-': | 421 | 20.0k | negative = true; | 422 | 20.0k | max_val += 1; | 423 | 20.0k | [[fallthrough]]; | 424 | 20.3k | case '+': | 425 | 20.3k | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 20.3k | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 95.5k | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 95.5k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 68.6k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 68.6k | return static_cast<T>(negative ? -val : val); | 437 | 68.6k | } | 438 | | | 439 | 26.8k | const T max_div_10 = max_val / 10; | 440 | 26.8k | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 26.8k | int first = i; | 443 | 90.7k | for (; i < len; ++i) { | 444 | 81.2k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 80.1k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 80.1k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 16.2k | *result = PARSE_OVERFLOW; | 449 | 16.2k | return negative ? -max_val : max_val; | 450 | 16.2k | } | 451 | 63.8k | val = val * 10 + digit; | 452 | 63.8k | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 1.13k | } else { | 460 | 1.13k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 1.13k | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 783 | *result = PARSE_FAILURE; | 465 | 783 | return 0; | 466 | 783 | } | 467 | 1.13k | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 352 | *result = PARSE_SUCCESS; | 470 | 1.13k | return static_cast<T>(negative ? -val : val); | 471 | 1.13k | } | 472 | 81.2k | } | 473 | 9.45k | *result = PARSE_SUCCESS; | 474 | 9.45k | return static_cast<T>(negative ? -val : val); | 475 | 26.8k | } |
_ZN5doris12StringParser22string_to_int_internalIsLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 66.6k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 66.6k | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 66.6k | using UnsignedT = MakeUnsignedT<T>; | 415 | 66.6k | UnsignedT val = 0; | 416 | 66.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 66.6k | bool negative = false; | 418 | 66.6k | int i = 0; | 419 | 66.6k | switch (*s) { | 420 | 10.5k | case '-': | 421 | 10.5k | negative = true; | 422 | 10.5k | max_val += 1; | 423 | 10.5k | [[fallthrough]]; | 424 | 10.8k | case '+': | 425 | 10.8k | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 10.8k | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 66.6k | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 66.6k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 50.4k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 50.4k | return static_cast<T>(negative ? -val : val); | 437 | 50.4k | } | 438 | | | 439 | 16.2k | const T max_div_10 = max_val / 10; | 440 | 16.2k | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 16.2k | int first = i; | 443 | 90.7k | for (; i < len; ++i) { | 444 | 81.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 81.3k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 81.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 6.80k | *result = PARSE_OVERFLOW; | 449 | 6.80k | return negative ? -max_val : max_val; | 450 | 6.80k | } | 451 | 74.5k | val = val * 10 + digit; | 452 | 74.5k | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 405 | } else { | 460 | 405 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 405 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 133 | *result = PARSE_FAILURE; | 465 | 133 | return 0; | 466 | 133 | } | 467 | 405 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 272 | *result = PARSE_SUCCESS; | 470 | 405 | return static_cast<T>(negative ? -val : val); | 471 | 405 | } | 472 | 81.7k | } | 473 | 8.99k | *result = PARSE_SUCCESS; | 474 | 8.99k | return static_cast<T>(negative ? -val : val); | 475 | 16.2k | } |
_ZN5doris12StringParser22string_to_int_internalIiLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 61.6k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 61.6k | if (UNLIKELY(len <= 0)) { | 410 | 1.05k | *result = PARSE_FAILURE; | 411 | 1.05k | return 0; | 412 | 1.05k | } | 413 | | | 414 | 60.6k | using UnsignedT = MakeUnsignedT<T>; | 415 | 60.6k | UnsignedT val = 0; | 416 | 60.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 60.6k | bool negative = false; | 418 | 60.6k | int i = 0; | 419 | 60.6k | switch (*s) { | 420 | 8.49k | case '-': | 421 | 8.49k | negative = true; | 422 | 8.49k | max_val += 1; | 423 | 8.49k | [[fallthrough]]; | 424 | 8.84k | case '+': | 425 | 8.84k | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 8.84k | if (UNLIKELY(len == 1)) { | 428 | 1 | *result = PARSE_FAILURE; | 429 | 1 | return 0; | 430 | 1 | } | 431 | 60.6k | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 60.6k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 50.9k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 50.9k | return static_cast<T>(negative ? -val : val); | 437 | 50.9k | } | 438 | | | 439 | 9.65k | const T max_div_10 = max_val / 10; | 440 | 9.65k | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 9.65k | int first = i; | 443 | 99.6k | for (; i < len; ++i) { | 444 | 93.7k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 93.1k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 93.1k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 3.11k | *result = PARSE_OVERFLOW; | 449 | 3.11k | return negative ? -max_val : max_val; | 450 | 3.11k | } | 451 | 90.0k | val = val * 10 + digit; | 452 | 90.0k | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 573 | } else { | 460 | 573 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 573 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 445 | *result = PARSE_FAILURE; | 465 | 445 | return 0; | 466 | 445 | } | 467 | 573 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 128 | *result = PARSE_SUCCESS; | 470 | 573 | return static_cast<T>(negative ? -val : val); | 471 | 573 | } | 472 | 93.7k | } | 473 | 5.96k | *result = PARSE_SUCCESS; | 474 | 5.96k | return static_cast<T>(negative ? -val : val); | 475 | 9.65k | } |
_ZN5doris12StringParser22string_to_int_internalIlLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 81.2k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 81.2k | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 81.2k | using UnsignedT = MakeUnsignedT<T>; | 415 | 81.2k | UnsignedT val = 0; | 416 | 81.2k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 81.2k | bool negative = false; | 418 | 81.2k | int i = 0; | 419 | 81.2k | switch (*s) { | 420 | 48.7k | case '-': | 421 | 48.7k | negative = true; | 422 | 48.7k | max_val += 1; | 423 | 48.7k | [[fallthrough]]; | 424 | 49.0k | case '+': | 425 | 49.0k | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 49.0k | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 81.2k | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 81.2k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 31.5k | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 31.5k | return static_cast<T>(negative ? -val : val); | 437 | 31.5k | } | 438 | | | 439 | 49.7k | const T max_div_10 = max_val / 10; | 440 | 49.7k | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 49.7k | int first = i; | 443 | 991k | for (; i < len; ++i) { | 444 | 944k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 944k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 944k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 2.81k | *result = PARSE_OVERFLOW; | 449 | 2.81k | return negative ? -max_val : max_val; | 450 | 2.81k | } | 451 | 941k | val = val * 10 + digit; | 452 | 941k | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 199 | } else { | 460 | 199 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 199 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 71 | *result = PARSE_FAILURE; | 465 | 71 | return 0; | 466 | 71 | } | 467 | 199 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 128 | *result = PARSE_SUCCESS; | 470 | 199 | return static_cast<T>(negative ? -val : val); | 471 | 199 | } | 472 | 944k | } | 473 | 46.7k | *result = PARSE_SUCCESS; | 474 | 46.7k | return static_cast<T>(negative ? -val : val); | 475 | 49.7k | } |
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 4 | if (UNLIKELY(len <= 0)) { | 410 | 0 | *result = PARSE_FAILURE; | 411 | 0 | return 0; | 412 | 0 | } | 413 | | | 414 | 4 | using UnsignedT = MakeUnsignedT<T>; | 415 | 4 | UnsignedT val = 0; | 416 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 4 | bool negative = false; | 418 | 4 | int i = 0; | 419 | 4 | switch (*s) { | 420 | 0 | case '-': | 421 | 0 | negative = true; | 422 | 0 | max_val += 1; | 423 | 0 | [[fallthrough]]; | 424 | 0 | case '+': | 425 | 0 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 0 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 4 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 4 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 4 | return static_cast<T>(negative ? -val : val); | 437 | 4 | } | 438 | | | 439 | 0 | const T max_div_10 = max_val / 10; | 440 | 0 | const T max_mod_10 = max_val % 10; | 441 | |
| 442 | 0 | int first = i; | 443 | 0 | for (; i < len; ++i) { | 444 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 0 | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 0 | *result = PARSE_OVERFLOW; | 449 | 0 | return negative ? -max_val : max_val; | 450 | 0 | } | 451 | 0 | val = val * 10 + digit; | 452 | 0 | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 0 | } else { | 460 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 0 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 0 | *result = PARSE_FAILURE; | 465 | 0 | return 0; | 466 | 0 | } | 467 | 0 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 0 | return static_cast<T>(negative ? -val : val); | 471 | 0 | } | 472 | 0 | } | 473 | 0 | *result = PARSE_SUCCESS; | 474 | 0 | return static_cast<T>(negative ? -val : val); | 475 | 0 | } |
_ZN5doris12StringParser22string_to_int_internalIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 4 | if (UNLIKELY(len <= 0)) { | 410 | 0 | *result = PARSE_FAILURE; | 411 | 0 | return 0; | 412 | 0 | } | 413 | | | 414 | 4 | using UnsignedT = MakeUnsignedT<T>; | 415 | 4 | UnsignedT val = 0; | 416 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 4 | bool negative = false; | 418 | 4 | int i = 0; | 419 | 4 | switch (*s) { | 420 | 0 | case '-': | 421 | 0 | negative = true; | 422 | 0 | max_val += 1; | 423 | 0 | [[fallthrough]]; | 424 | 0 | case '+': | 425 | 0 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 0 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 4 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 0 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 0 | return static_cast<T>(negative ? -val : val); | 437 | 0 | } | 438 | | | 439 | 4 | const T max_div_10 = max_val / 10; | 440 | 4 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 4 | int first = i; | 443 | 84 | for (; i < len; ++i) { | 444 | 80 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 80 | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 80 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 0 | *result = PARSE_OVERFLOW; | 449 | 0 | return negative ? -max_val : max_val; | 450 | 0 | } | 451 | 80 | val = val * 10 + digit; | 452 | 80 | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 0 | } else { | 460 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 0 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 0 | *result = PARSE_FAILURE; | 465 | 0 | return 0; | 466 | 0 | } | 467 | 0 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 0 | return static_cast<T>(negative ? -val : val); | 471 | 0 | } | 472 | 80 | } | 473 | 4 | *result = PARSE_SUCCESS; | 474 | 4 | return static_cast<T>(negative ? -val : val); | 475 | 4 | } |
_ZN5doris12StringParser22string_to_int_internalImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 20 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 20 | if (UNLIKELY(len <= 0)) { | 410 | 0 | *result = PARSE_FAILURE; | 411 | 0 | return 0; | 412 | 0 | } | 413 | | | 414 | 20 | using UnsignedT = MakeUnsignedT<T>; | 415 | 20 | UnsignedT val = 0; | 416 | 20 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 20 | bool negative = false; | 418 | 20 | int i = 0; | 419 | 20 | switch (*s) { | 420 | 0 | case '-': | 421 | 0 | negative = true; | 422 | 0 | max_val += 1; | 423 | 0 | [[fallthrough]]; | 424 | 0 | case '+': | 425 | 0 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 0 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 20 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 20 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 20 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 20 | return static_cast<T>(negative ? -val : val); | 437 | 20 | } | 438 | | | 439 | 0 | const T max_div_10 = max_val / 10; | 440 | 0 | const T max_mod_10 = max_val % 10; | 441 | |
| 442 | 0 | int first = i; | 443 | 0 | for (; i < len; ++i) { | 444 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 0 | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 0 | *result = PARSE_OVERFLOW; | 449 | 0 | return negative ? -max_val : max_val; | 450 | 0 | } | 451 | 0 | val = val * 10 + digit; | 452 | 0 | } else { | 453 | | if constexpr (enable_strict_mode) { | 454 | | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | | *result = PARSE_FAILURE; | 457 | | return 0; | 458 | | } | 459 | 0 | } else { | 460 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | 0 | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | 0 | *result = PARSE_FAILURE; | 465 | 0 | return 0; | 466 | 0 | } | 467 | 0 | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 0 | return static_cast<T>(negative ? -val : val); | 471 | 0 | } | 472 | 0 | } | 473 | 0 | *result = PARSE_SUCCESS; | 474 | 0 | return static_cast<T>(negative ? -val : val); | 475 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjLb0EEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalIaLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 1.00k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 1.00k | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 993 | using UnsignedT = MakeUnsignedT<T>; | 415 | 993 | UnsignedT val = 0; | 416 | 993 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 993 | bool negative = false; | 418 | 993 | int i = 0; | 419 | 993 | switch (*s) { | 420 | 446 | case '-': | 421 | 446 | negative = true; | 422 | 446 | max_val += 1; | 423 | 446 | [[fallthrough]]; | 424 | 697 | case '+': | 425 | 697 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 697 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 993 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 993 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 51 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 51 | return static_cast<T>(negative ? -val : val); | 437 | 51 | } | 438 | | | 439 | 942 | const T max_div_10 = max_val / 10; | 440 | 942 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 942 | int first = i; | 443 | 4.25k | for (; i < len; ++i) { | 444 | 4.12k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 3.71k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 3.71k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 400 | *result = PARSE_OVERFLOW; | 449 | 400 | return negative ? -max_val : max_val; | 450 | 400 | } | 451 | 3.31k | val = val * 10 + digit; | 452 | 3.31k | } else { | 453 | 406 | if constexpr (enable_strict_mode) { | 454 | 406 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 406 | *result = PARSE_FAILURE; | 457 | 406 | return 0; | 458 | 406 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 406 | return static_cast<T>(negative ? -val : val); | 471 | 406 | } | 472 | 4.12k | } | 473 | 136 | *result = PARSE_SUCCESS; | 474 | 136 | return static_cast<T>(negative ? -val : val); | 475 | 942 | } |
_ZN5doris12StringParser22string_to_int_internalIsLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 984 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 984 | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 977 | using UnsignedT = MakeUnsignedT<T>; | 415 | 977 | UnsignedT val = 0; | 416 | 977 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 977 | bool negative = false; | 418 | 977 | int i = 0; | 419 | 977 | switch (*s) { | 420 | 438 | case '-': | 421 | 438 | negative = true; | 422 | 438 | max_val += 1; | 423 | 438 | [[fallthrough]]; | 424 | 685 | case '+': | 425 | 685 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 685 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 977 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 977 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 203 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 203 | return static_cast<T>(negative ? -val : val); | 437 | 203 | } | 438 | | | 439 | 774 | const T max_div_10 = max_val / 10; | 440 | 774 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 774 | int first = i; | 443 | 4.92k | for (; i < len; ++i) { | 444 | 4.84k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 4.53k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 4.53k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 384 | *result = PARSE_OVERFLOW; | 449 | 384 | return negative ? -max_val : max_val; | 450 | 384 | } | 451 | 4.14k | val = val * 10 + digit; | 452 | 4.14k | } else { | 453 | 310 | if constexpr (enable_strict_mode) { | 454 | 310 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 310 | *result = PARSE_FAILURE; | 457 | 310 | return 0; | 458 | 310 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 310 | return static_cast<T>(negative ? -val : val); | 471 | 310 | } | 472 | 4.84k | } | 473 | 80 | *result = PARSE_SUCCESS; | 474 | 80 | return static_cast<T>(negative ? -val : val); | 475 | 774 | } |
_ZN5doris12StringParser22string_to_int_internalIiLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 968 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 968 | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 961 | using UnsignedT = MakeUnsignedT<T>; | 415 | 961 | UnsignedT val = 0; | 416 | 961 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 961 | bool negative = false; | 418 | 961 | int i = 0; | 419 | 961 | switch (*s) { | 420 | 430 | case '-': | 421 | 430 | negative = true; | 422 | 430 | max_val += 1; | 423 | 430 | [[fallthrough]]; | 424 | 673 | case '+': | 425 | 673 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 673 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 961 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 961 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 399 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 399 | return static_cast<T>(negative ? -val : val); | 437 | 399 | } | 438 | | | 439 | 562 | const T max_div_10 = max_val / 10; | 440 | 562 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 562 | int first = i; | 443 | 6.65k | for (; i < len; ++i) { | 444 | 6.58k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 6.45k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 6.45k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 368 | *result = PARSE_OVERFLOW; | 449 | 368 | return negative ? -max_val : max_val; | 450 | 368 | } | 451 | 6.08k | val = val * 10 + digit; | 452 | 6.08k | } else { | 453 | 130 | if constexpr (enable_strict_mode) { | 454 | 130 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 130 | *result = PARSE_FAILURE; | 457 | 130 | return 0; | 458 | 130 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 130 | return static_cast<T>(negative ? -val : val); | 471 | 130 | } | 472 | 6.58k | } | 473 | 64 | *result = PARSE_SUCCESS; | 474 | 64 | return static_cast<T>(negative ? -val : val); | 475 | 562 | } |
_ZN5doris12StringParser22string_to_int_internalIlLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 952 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 952 | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 945 | using UnsignedT = MakeUnsignedT<T>; | 415 | 945 | UnsignedT val = 0; | 416 | 945 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 945 | bool negative = false; | 418 | 945 | int i = 0; | 419 | 945 | switch (*s) { | 420 | 422 | case '-': | 421 | 422 | negative = true; | 422 | 422 | max_val += 1; | 423 | 422 | [[fallthrough]]; | 424 | 661 | case '+': | 425 | 661 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 661 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 945 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 945 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 400 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 400 | return static_cast<T>(negative ? -val : val); | 437 | 400 | } | 438 | | | 439 | 545 | const T max_div_10 = max_val / 10; | 440 | 545 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 545 | int first = i; | 443 | 11.5k | for (; i < len; ++i) { | 444 | 11.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 11.3k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 11.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 352 | *result = PARSE_OVERFLOW; | 449 | 352 | return negative ? -max_val : max_val; | 450 | 352 | } | 451 | 10.9k | val = val * 10 + digit; | 452 | 10.9k | } else { | 453 | 129 | if constexpr (enable_strict_mode) { | 454 | 129 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 129 | *result = PARSE_FAILURE; | 457 | 129 | return 0; | 458 | 129 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 129 | return static_cast<T>(negative ? -val : val); | 471 | 129 | } | 472 | 11.4k | } | 473 | 64 | *result = PARSE_SUCCESS; | 474 | 64 | return static_cast<T>(negative ? -val : val); | 475 | 545 | } |
_ZN5doris12StringParser22string_to_int_internalInLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 936 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 936 | if (UNLIKELY(len <= 0)) { | 410 | 7 | *result = PARSE_FAILURE; | 411 | 7 | return 0; | 412 | 7 | } | 413 | | | 414 | 929 | using UnsignedT = MakeUnsignedT<T>; | 415 | 929 | UnsignedT val = 0; | 416 | 929 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 929 | bool negative = false; | 418 | 929 | int i = 0; | 419 | 929 | switch (*s) { | 420 | 414 | case '-': | 421 | 414 | negative = true; | 422 | 414 | max_val += 1; | 423 | 414 | [[fallthrough]]; | 424 | 649 | case '+': | 425 | 649 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 649 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 929 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 929 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 401 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 401 | return static_cast<T>(negative ? -val : val); | 437 | 401 | } | 438 | | | 439 | 528 | const T max_div_10 = max_val / 10; | 440 | 528 | const T max_mod_10 = max_val % 10; | 441 | | | 442 | 528 | int first = i; | 443 | 21.5k | for (; i < len; ++i) { | 444 | 21.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 21.3k | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 21.3k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 336 | *result = PARSE_OVERFLOW; | 449 | 336 | return negative ? -max_val : max_val; | 450 | 336 | } | 451 | 21.0k | val = val * 10 + digit; | 452 | 21.0k | } else { | 453 | 128 | if constexpr (enable_strict_mode) { | 454 | 128 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 128 | *result = PARSE_FAILURE; | 457 | 128 | return 0; | 458 | 128 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 128 | return static_cast<T>(negative ? -val : val); | 471 | 128 | } | 472 | 21.5k | } | 473 | 64 | *result = PARSE_SUCCESS; | 474 | 64 | return static_cast<T>(negative ? -val : val); | 475 | 528 | } |
_ZN5doris12StringParser22string_to_int_internalIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 408 | 132 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 409 | 132 | if (UNLIKELY(len <= 0)) { | 410 | 0 | *result = PARSE_FAILURE; | 411 | 0 | return 0; | 412 | 0 | } | 413 | | | 414 | 132 | using UnsignedT = MakeUnsignedT<T>; | 415 | 132 | UnsignedT val = 0; | 416 | 132 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 417 | 132 | bool negative = false; | 418 | 132 | int i = 0; | 419 | 132 | switch (*s) { | 420 | 0 | case '-': | 421 | 0 | negative = true; | 422 | 0 | max_val += 1; | 423 | 0 | [[fallthrough]]; | 424 | 0 | case '+': | 425 | 0 | ++i; | 426 | | // only one '+'/'-' char, so could return failure directly | 427 | 0 | if (UNLIKELY(len == 1)) { | 428 | 0 | *result = PARSE_FAILURE; | 429 | 0 | return 0; | 430 | 0 | } | 431 | 132 | } | 432 | | | 433 | | // This is the fast path where the string cannot overflow. | 434 | 132 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 435 | 132 | val = string_to_int_no_overflow<UnsignedT, enable_strict_mode>(s + i, len - i, result); | 436 | 132 | return static_cast<T>(negative ? -val : val); | 437 | 132 | } | 438 | | | 439 | 0 | const T max_div_10 = max_val / 10; | 440 | 0 | const T max_mod_10 = max_val % 10; | 441 | |
| 442 | 0 | int first = i; | 443 | 0 | for (; i < len; ++i) { | 444 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 445 | 0 | T digit = s[i] - '0'; | 446 | | // This is a tricky check to see if adding this digit will cause an overflow. | 447 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 448 | 0 | *result = PARSE_OVERFLOW; | 449 | 0 | return negative ? -max_val : max_val; | 450 | 0 | } | 451 | 0 | val = val * 10 + digit; | 452 | 0 | } else { | 453 | 0 | if constexpr (enable_strict_mode) { | 454 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 455 | | // Reject the string because the remaining chars are not all whitespace | 456 | 0 | *result = PARSE_FAILURE; | 457 | 0 | return 0; | 458 | 0 | } | 459 | | } else { | 460 | | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 461 | | !is_float_suffix(s + i, len - i))))) { | 462 | | // Reject the string because either the first char was not a digit, | 463 | | // or the remaining chars are not all whitespace | 464 | | *result = PARSE_FAILURE; | 465 | | return 0; | 466 | | } | 467 | | } | 468 | | // Returning here is slightly faster than breaking the loop. | 469 | 0 | *result = PARSE_SUCCESS; | 470 | 0 | return static_cast<T>(negative ? -val : val); | 471 | 0 | } | 472 | 0 | } | 473 | 0 | *result = PARSE_SUCCESS; | 474 | 0 | return static_cast<T>(negative ? -val : val); | 475 | 0 | } |
|
476 | | |
477 | | template <typename T> |
478 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
479 | 1.37k | ParseResult* result) { |
480 | 1.37k | if (UNLIKELY(len <= 0)) { |
481 | 0 | *result = PARSE_FAILURE; |
482 | 0 | return 0; |
483 | 0 | } |
484 | | |
485 | 1.37k | T val = 0; |
486 | 1.37k | T max_val = std::numeric_limits<T>::max(); |
487 | 1.37k | int i = 0; |
488 | | |
489 | 1.37k | using signedT = MakeSignedT<T>; |
490 | | // This is the fast path where the string cannot overflow. |
491 | 1.37k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
492 | 784 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
493 | 784 | return val; |
494 | 784 | } |
495 | | |
496 | 588 | const T max_div_10 = max_val / 10; |
497 | 588 | const T max_mod_10 = max_val % 10; |
498 | | |
499 | 588 | int first = i; |
500 | 4.65k | for (; i < len; ++i) { |
501 | 4.31k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
502 | 4.26k | T digit = s[i] - '0'; |
503 | | // This is a tricky check to see if adding this digit will cause an overflow. |
504 | 4.26k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
505 | 196 | *result = PARSE_OVERFLOW; |
506 | 196 | return max_val; |
507 | 196 | } |
508 | 4.06k | val = val * 10 + digit; |
509 | 4.06k | } else { |
510 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
511 | | // Reject the string because either the first char was not a digit, |
512 | | // or the remaining chars are not all whitespace |
513 | 49 | *result = PARSE_FAILURE; |
514 | 49 | return 0; |
515 | 49 | } |
516 | | // Returning here is slightly faster than breaking the loop. |
517 | 0 | *result = PARSE_SUCCESS; |
518 | 0 | return val; |
519 | 49 | } |
520 | 4.31k | } |
521 | 343 | *result = PARSE_SUCCESS; |
522 | 343 | return val; |
523 | 588 | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 479 | 343 | ParseResult* result) { | 480 | 343 | if (UNLIKELY(len <= 0)) { | 481 | 0 | *result = PARSE_FAILURE; | 482 | 0 | return 0; | 483 | 0 | } | 484 | | | 485 | 343 | T val = 0; | 486 | 343 | T max_val = std::numeric_limits<T>::max(); | 487 | 343 | int i = 0; | 488 | | | 489 | 343 | using signedT = MakeSignedT<T>; | 490 | | // This is the fast path where the string cannot overflow. | 491 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 492 | 98 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 493 | 98 | return val; | 494 | 98 | } | 495 | | | 496 | 245 | const T max_div_10 = max_val / 10; | 497 | 245 | const T max_mod_10 = max_val % 10; | 498 | | | 499 | 245 | int first = i; | 500 | 784 | for (; i < len; ++i) { | 501 | 637 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 502 | 588 | T digit = s[i] - '0'; | 503 | | // This is a tricky check to see if adding this digit will cause an overflow. | 504 | 588 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 505 | 49 | *result = PARSE_OVERFLOW; | 506 | 49 | return max_val; | 507 | 49 | } | 508 | 539 | val = val * 10 + digit; | 509 | 539 | } else { | 510 | 49 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 511 | | // Reject the string because either the first char was not a digit, | 512 | | // or the remaining chars are not all whitespace | 513 | 49 | *result = PARSE_FAILURE; | 514 | 49 | return 0; | 515 | 49 | } | 516 | | // Returning here is slightly faster than breaking the loop. | 517 | 0 | *result = PARSE_SUCCESS; | 518 | 0 | return val; | 519 | 49 | } | 520 | 637 | } | 521 | 147 | *result = PARSE_SUCCESS; | 522 | 147 | return val; | 523 | 245 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 479 | 343 | ParseResult* result) { | 480 | 343 | if (UNLIKELY(len <= 0)) { | 481 | 0 | *result = PARSE_FAILURE; | 482 | 0 | return 0; | 483 | 0 | } | 484 | | | 485 | 343 | T val = 0; | 486 | 343 | T max_val = std::numeric_limits<T>::max(); | 487 | 343 | int i = 0; | 488 | | | 489 | 343 | using signedT = MakeSignedT<T>; | 490 | | // This is the fast path where the string cannot overflow. | 491 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 492 | 196 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 493 | 196 | return val; | 494 | 196 | } | 495 | | | 496 | 147 | const T max_div_10 = max_val / 10; | 497 | 147 | const T max_mod_10 = max_val % 10; | 498 | | | 499 | 147 | int first = i; | 500 | 833 | for (; i < len; ++i) { | 501 | 735 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 502 | 735 | T digit = s[i] - '0'; | 503 | | // This is a tricky check to see if adding this digit will cause an overflow. | 504 | 735 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 505 | 49 | *result = PARSE_OVERFLOW; | 506 | 49 | return max_val; | 507 | 49 | } | 508 | 686 | val = val * 10 + digit; | 509 | 686 | } else { | 510 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 511 | | // Reject the string because either the first char was not a digit, | 512 | | // or the remaining chars are not all whitespace | 513 | 0 | *result = PARSE_FAILURE; | 514 | 0 | return 0; | 515 | 0 | } | 516 | | // Returning here is slightly faster than breaking the loop. | 517 | 0 | *result = PARSE_SUCCESS; | 518 | 0 | return val; | 519 | 0 | } | 520 | 735 | } | 521 | 98 | *result = PARSE_SUCCESS; | 522 | 98 | return val; | 523 | 147 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 479 | 343 | ParseResult* result) { | 480 | 343 | if (UNLIKELY(len <= 0)) { | 481 | 0 | *result = PARSE_FAILURE; | 482 | 0 | return 0; | 483 | 0 | } | 484 | | | 485 | 343 | T val = 0; | 486 | 343 | T max_val = std::numeric_limits<T>::max(); | 487 | 343 | int i = 0; | 488 | | | 489 | 343 | using signedT = MakeSignedT<T>; | 490 | | // This is the fast path where the string cannot overflow. | 491 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 492 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 493 | 245 | return val; | 494 | 245 | } | 495 | | | 496 | 98 | const T max_div_10 = max_val / 10; | 497 | 98 | const T max_mod_10 = max_val % 10; | 498 | | | 499 | 98 | int first = i; | 500 | 1.02k | for (; i < len; ++i) { | 501 | 980 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 502 | 980 | T digit = s[i] - '0'; | 503 | | // This is a tricky check to see if adding this digit will cause an overflow. | 504 | 980 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 505 | 49 | *result = PARSE_OVERFLOW; | 506 | 49 | return max_val; | 507 | 49 | } | 508 | 931 | val = val * 10 + digit; | 509 | 931 | } else { | 510 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 511 | | // Reject the string because either the first char was not a digit, | 512 | | // or the remaining chars are not all whitespace | 513 | 0 | *result = PARSE_FAILURE; | 514 | 0 | return 0; | 515 | 0 | } | 516 | | // Returning here is slightly faster than breaking the loop. | 517 | 0 | *result = PARSE_SUCCESS; | 518 | 0 | return val; | 519 | 0 | } | 520 | 980 | } | 521 | 49 | *result = PARSE_SUCCESS; | 522 | 49 | return val; | 523 | 98 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 479 | 343 | ParseResult* result) { | 480 | 343 | if (UNLIKELY(len <= 0)) { | 481 | 0 | *result = PARSE_FAILURE; | 482 | 0 | return 0; | 483 | 0 | } | 484 | | | 485 | 343 | T val = 0; | 486 | 343 | T max_val = std::numeric_limits<T>::max(); | 487 | 343 | int i = 0; | 488 | | | 489 | 343 | using signedT = MakeSignedT<T>; | 490 | | // This is the fast path where the string cannot overflow. | 491 | 343 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 492 | 245 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 493 | 245 | return val; | 494 | 245 | } | 495 | | | 496 | 98 | const T max_div_10 = max_val / 10; | 497 | 98 | const T max_mod_10 = max_val % 10; | 498 | | | 499 | 98 | int first = i; | 500 | 2.00k | for (; i < len; ++i) { | 501 | 1.96k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 502 | 1.96k | T digit = s[i] - '0'; | 503 | | // This is a tricky check to see if adding this digit will cause an overflow. | 504 | 1.96k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 505 | 49 | *result = PARSE_OVERFLOW; | 506 | 49 | return max_val; | 507 | 49 | } | 508 | 1.91k | val = val * 10 + digit; | 509 | 1.91k | } else { | 510 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 511 | | // Reject the string because either the first char was not a digit, | 512 | | // or the remaining chars are not all whitespace | 513 | 0 | *result = PARSE_FAILURE; | 514 | 0 | return 0; | 515 | 0 | } | 516 | | // Returning here is slightly faster than breaking the loop. | 517 | 0 | *result = PARSE_SUCCESS; | 518 | 0 | return val; | 519 | 0 | } | 520 | 1.96k | } | 521 | 49 | *result = PARSE_SUCCESS; | 522 | 49 | return val; | 523 | 98 | } |
|
524 | | |
525 | | template <typename T> |
526 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
527 | 27.8k | ParseResult* result) { |
528 | 27.8k | using UnsignedT = MakeUnsignedT<T>; |
529 | 27.8k | UnsignedT val = 0; |
530 | 27.8k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
531 | 27.8k | bool negative = false; |
532 | 27.8k | if (UNLIKELY(len <= 0)) { |
533 | 0 | *result = PARSE_FAILURE; |
534 | 0 | return 0; |
535 | 0 | } |
536 | 27.8k | int i = 0; |
537 | 27.8k | switch (*s) { |
538 | 13.4k | case '-': |
539 | 13.4k | negative = true; |
540 | 13.4k | max_val = StringParser::numeric_limits<T>(false) + 1; |
541 | 13.4k | [[fallthrough]]; |
542 | 13.7k | case '+': |
543 | 13.7k | i = 1; |
544 | 27.8k | } |
545 | | |
546 | 27.8k | const T max_div_base = max_val / base; |
547 | 27.8k | const T max_mod_base = max_val % base; |
548 | | |
549 | 27.8k | int first = i; |
550 | 90.9k | for (; i < len; ++i) { |
551 | 76.6k | T digit; |
552 | 76.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
553 | 75.7k | digit = s[i] - '0'; |
554 | 75.7k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
555 | 639 | digit = (s[i] - 'a' + 10); |
556 | 639 | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
557 | 98 | digit = (s[i] - 'A' + 10); |
558 | 147 | } else { |
559 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
560 | | // Reject the string because either the first char was not an alpha/digit, |
561 | | // or the remaining chars are not all whitespace |
562 | 147 | *result = PARSE_FAILURE; |
563 | 147 | return 0; |
564 | 147 | } |
565 | | // skip trailing whitespace. |
566 | 0 | break; |
567 | 147 | } |
568 | | |
569 | | // Bail, if we encounter a digit that is not available in base. |
570 | 76.4k | if (digit >= base) { |
571 | 392 | break; |
572 | 392 | } |
573 | | |
574 | | // This is a tricky check to see if adding this digit will cause an overflow. |
575 | 76.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
576 | 12.9k | *result = PARSE_OVERFLOW; |
577 | 12.9k | return static_cast<T>(negative ? -max_val : max_val); |
578 | 12.9k | } |
579 | 63.1k | val = val * base + digit; |
580 | 63.1k | } |
581 | 14.7k | *result = PARSE_SUCCESS; |
582 | 14.7k | return static_cast<T>(negative ? -val : val); |
583 | 27.8k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 527 | 26.4k | ParseResult* result) { | 528 | 26.4k | using UnsignedT = MakeUnsignedT<T>; | 529 | 26.4k | UnsignedT val = 0; | 530 | 26.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 531 | 26.4k | bool negative = false; | 532 | 26.4k | if (UNLIKELY(len <= 0)) { | 533 | 0 | *result = PARSE_FAILURE; | 534 | 0 | return 0; | 535 | 0 | } | 536 | 26.4k | int i = 0; | 537 | 26.4k | switch (*s) { | 538 | 12.8k | case '-': | 539 | 12.8k | negative = true; | 540 | 12.8k | max_val = StringParser::numeric_limits<T>(false) + 1; | 541 | 12.8k | [[fallthrough]]; | 542 | 12.9k | case '+': | 543 | 12.9k | i = 1; | 544 | 26.4k | } | 545 | | | 546 | 26.4k | const T max_div_base = max_val / base; | 547 | 26.4k | const T max_mod_base = max_val % base; | 548 | | | 549 | 26.4k | int first = i; | 550 | 80.7k | for (; i < len; ++i) { | 551 | 67.4k | T digit; | 552 | 67.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 553 | 66.6k | digit = s[i] - '0'; | 554 | 66.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 555 | 539 | digit = (s[i] - 'a' + 10); | 556 | 539 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 557 | 98 | digit = (s[i] - 'A' + 10); | 558 | 147 | } else { | 559 | 147 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 560 | | // Reject the string because either the first char was not an alpha/digit, | 561 | | // or the remaining chars are not all whitespace | 562 | 147 | *result = PARSE_FAILURE; | 563 | 147 | return 0; | 564 | 147 | } | 565 | | // skip trailing whitespace. | 566 | 0 | break; | 567 | 147 | } | 568 | | | 569 | | // Bail, if we encounter a digit that is not available in base. | 570 | 67.3k | if (digit >= base) { | 571 | 392 | break; | 572 | 392 | } | 573 | | | 574 | | // This is a tricky check to see if adding this digit will cause an overflow. | 575 | 66.9k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 576 | 12.6k | *result = PARSE_OVERFLOW; | 577 | 12.6k | return static_cast<T>(negative ? -max_val : max_val); | 578 | 12.6k | } | 579 | 54.2k | val = val * base + digit; | 580 | 54.2k | } | 581 | 13.6k | *result = PARSE_SUCCESS; | 582 | 13.6k | return static_cast<T>(negative ? -val : val); | 583 | 26.4k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 527 | 490 | ParseResult* result) { | 528 | 490 | using UnsignedT = MakeUnsignedT<T>; | 529 | 490 | UnsignedT val = 0; | 530 | 490 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 531 | 490 | bool negative = false; | 532 | 490 | if (UNLIKELY(len <= 0)) { | 533 | 0 | *result = PARSE_FAILURE; | 534 | 0 | return 0; | 535 | 0 | } | 536 | 490 | int i = 0; | 537 | 490 | switch (*s) { | 538 | 196 | case '-': | 539 | 196 | negative = true; | 540 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 541 | 196 | [[fallthrough]]; | 542 | 245 | case '+': | 543 | 245 | i = 1; | 544 | 490 | } | 545 | | | 546 | 490 | const T max_div_base = max_val / base; | 547 | 490 | const T max_mod_base = max_val % base; | 548 | | | 549 | 490 | int first = i; | 550 | 2.10k | for (; i < len; ++i) { | 551 | 1.71k | T digit; | 552 | 1.71k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 553 | 1.61k | digit = s[i] - '0'; | 554 | 1.61k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 555 | 98 | digit = (s[i] - 'a' + 10); | 556 | 98 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 557 | 0 | digit = (s[i] - 'A' + 10); | 558 | 0 | } else { | 559 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 560 | | // Reject the string because either the first char was not an alpha/digit, | 561 | | // or the remaining chars are not all whitespace | 562 | 0 | *result = PARSE_FAILURE; | 563 | 0 | return 0; | 564 | 0 | } | 565 | | // skip trailing whitespace. | 566 | 0 | break; | 567 | 0 | } | 568 | | | 569 | | // Bail, if we encounter a digit that is not available in base. | 570 | 1.71k | if (digit >= base) { | 571 | 0 | break; | 572 | 0 | } | 573 | | | 574 | | // This is a tricky check to see if adding this digit will cause an overflow. | 575 | 1.71k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 576 | 98 | *result = PARSE_OVERFLOW; | 577 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 578 | 98 | } | 579 | 1.61k | val = val * base + digit; | 580 | 1.61k | } | 581 | 392 | *result = PARSE_SUCCESS; | 582 | 392 | return static_cast<T>(negative ? -val : val); | 583 | 490 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 527 | 441 | ParseResult* result) { | 528 | 441 | using UnsignedT = MakeUnsignedT<T>; | 529 | 441 | UnsignedT val = 0; | 530 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 531 | 441 | bool negative = false; | 532 | 441 | if (UNLIKELY(len <= 0)) { | 533 | 0 | *result = PARSE_FAILURE; | 534 | 0 | return 0; | 535 | 0 | } | 536 | 441 | int i = 0; | 537 | 441 | switch (*s) { | 538 | 147 | case '-': | 539 | 147 | negative = true; | 540 | 147 | max_val = StringParser::numeric_limits<T>(false) + 1; | 541 | 147 | [[fallthrough]]; | 542 | 245 | case '+': | 543 | 245 | i = 1; | 544 | 441 | } | 545 | | | 546 | 441 | const T max_div_base = max_val / base; | 547 | 441 | const T max_mod_base = max_val % base; | 548 | | | 549 | 441 | int first = i; | 550 | 3.03k | for (; i < len; ++i) { | 551 | 2.69k | T digit; | 552 | 2.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 553 | 2.69k | digit = s[i] - '0'; | 554 | 2.69k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 555 | 0 | digit = (s[i] - 'a' + 10); | 556 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 557 | 0 | digit = (s[i] - 'A' + 10); | 558 | 0 | } else { | 559 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 560 | | // Reject the string because either the first char was not an alpha/digit, | 561 | | // or the remaining chars are not all whitespace | 562 | 0 | *result = PARSE_FAILURE; | 563 | 0 | return 0; | 564 | 0 | } | 565 | | // skip trailing whitespace. | 566 | 0 | break; | 567 | 0 | } | 568 | | | 569 | | // Bail, if we encounter a digit that is not available in base. | 570 | 2.69k | if (digit >= base) { | 571 | 0 | break; | 572 | 0 | } | 573 | | | 574 | | // This is a tricky check to see if adding this digit will cause an overflow. | 575 | 2.69k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 576 | 98 | *result = PARSE_OVERFLOW; | 577 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 578 | 98 | } | 579 | 2.59k | val = val * base + digit; | 580 | 2.59k | } | 581 | 343 | *result = PARSE_SUCCESS; | 582 | 343 | return static_cast<T>(negative ? -val : val); | 583 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 527 | 441 | ParseResult* result) { | 528 | 441 | using UnsignedT = MakeUnsignedT<T>; | 529 | 441 | UnsignedT val = 0; | 530 | 441 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 531 | 441 | bool negative = false; | 532 | 441 | if (UNLIKELY(len <= 0)) { | 533 | 0 | *result = PARSE_FAILURE; | 534 | 0 | return 0; | 535 | 0 | } | 536 | 441 | int i = 0; | 537 | 441 | switch (*s) { | 538 | 196 | case '-': | 539 | 196 | negative = true; | 540 | 196 | max_val = StringParser::numeric_limits<T>(false) + 1; | 541 | 196 | [[fallthrough]]; | 542 | 245 | case '+': | 543 | 245 | i = 1; | 544 | 441 | } | 545 | | | 546 | 441 | const T max_div_base = max_val / base; | 547 | 441 | const T max_mod_base = max_val % base; | 548 | | | 549 | 441 | int first = i; | 550 | 5.09k | for (; i < len; ++i) { | 551 | 4.75k | T digit; | 552 | 4.75k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 553 | 4.75k | digit = s[i] - '0'; | 554 | 4.75k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 555 | 0 | digit = (s[i] - 'a' + 10); | 556 | 0 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 557 | 0 | digit = (s[i] - 'A' + 10); | 558 | 0 | } else { | 559 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 560 | | // Reject the string because either the first char was not an alpha/digit, | 561 | | // or the remaining chars are not all whitespace | 562 | 0 | *result = PARSE_FAILURE; | 563 | 0 | return 0; | 564 | 0 | } | 565 | | // skip trailing whitespace. | 566 | 0 | break; | 567 | 0 | } | 568 | | | 569 | | // Bail, if we encounter a digit that is not available in base. | 570 | 4.75k | if (digit >= base) { | 571 | 0 | break; | 572 | 0 | } | 573 | | | 574 | | // This is a tricky check to see if adding this digit will cause an overflow. | 575 | 4.75k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 576 | 98 | *result = PARSE_OVERFLOW; | 577 | 98 | return static_cast<T>(negative ? -max_val : max_val); | 578 | 98 | } | 579 | 4.65k | val = val * base + digit; | 580 | 4.65k | } | 581 | 343 | *result = PARSE_SUCCESS; | 582 | 343 | return static_cast<T>(negative ? -val : val); | 583 | 441 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 527 | 1 | ParseResult* result) { | 528 | 1 | using UnsignedT = MakeUnsignedT<T>; | 529 | 1 | UnsignedT val = 0; | 530 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 531 | 1 | bool negative = false; | 532 | 1 | if (UNLIKELY(len <= 0)) { | 533 | 0 | *result = PARSE_FAILURE; | 534 | 0 | return 0; | 535 | 0 | } | 536 | 1 | int i = 0; | 537 | 1 | switch (*s) { | 538 | 0 | case '-': | 539 | 0 | negative = true; | 540 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 541 | 0 | [[fallthrough]]; | 542 | 0 | case '+': | 543 | 0 | i = 1; | 544 | 1 | } | 545 | | | 546 | 1 | const T max_div_base = max_val / base; | 547 | 1 | const T max_mod_base = max_val % base; | 548 | | | 549 | 1 | int first = i; | 550 | 3 | for (; i < len; ++i) { | 551 | 2 | T digit; | 552 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 553 | 0 | digit = s[i] - '0'; | 554 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 555 | 2 | digit = (s[i] - 'a' + 10); | 556 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 557 | 0 | digit = (s[i] - 'A' + 10); | 558 | 0 | } else { | 559 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 560 | | // Reject the string because either the first char was not an alpha/digit, | 561 | | // or the remaining chars are not all whitespace | 562 | 0 | *result = PARSE_FAILURE; | 563 | 0 | return 0; | 564 | 0 | } | 565 | | // skip trailing whitespace. | 566 | 0 | break; | 567 | 0 | } | 568 | | | 569 | | // Bail, if we encounter a digit that is not available in base. | 570 | 2 | if (digit >= base) { | 571 | 0 | break; | 572 | 0 | } | 573 | | | 574 | | // This is a tricky check to see if adding this digit will cause an overflow. | 575 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 576 | 0 | *result = PARSE_OVERFLOW; | 577 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 578 | 0 | } | 579 | 2 | val = val * base + digit; | 580 | 2 | } | 581 | 1 | *result = PARSE_SUCCESS; | 582 | 1 | return static_cast<T>(negative ? -val : val); | 583 | 1 | } |
|
584 | | |
585 | | template <typename T, bool enable_strict_mode> |
586 | 245k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
587 | 245k | T val = 0; |
588 | 245k | if (UNLIKELY(len == 0)) { |
589 | 0 | *result = PARSE_SUCCESS; |
590 | 0 | return val; |
591 | 0 | } |
592 | | // Factor out the first char for error handling speeds up the loop. |
593 | 245k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
594 | 243k | val = s[0] - '0'; |
595 | 243k | } else { |
596 | 1.70k | *result = PARSE_FAILURE; |
597 | 1.70k | return 0; |
598 | 1.70k | } |
599 | 389k | for (int i = 1; i < len; ++i) { |
600 | 148k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
601 | 145k | T digit = s[i] - '0'; |
602 | 145k | val = val * 10 + digit; |
603 | 145k | } else { |
604 | 2.72k | if constexpr (enable_strict_mode) { |
605 | 860 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { |
606 | 860 | *result = PARSE_FAILURE; |
607 | 860 | return 0; |
608 | 860 | } |
609 | 1.86k | } else { |
610 | 1.86k | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && |
611 | 1.86k | !is_float_suffix(s + i, len - i)))) { |
612 | 328 | *result = PARSE_FAILURE; |
613 | 328 | return 0; |
614 | 328 | } |
615 | 1.86k | } |
616 | 1.53k | *result = PARSE_SUCCESS; |
617 | 2.72k | return val; |
618 | 2.72k | } |
619 | 148k | } |
620 | 240k | *result = PARSE_SUCCESS; |
621 | 240k | return val; |
622 | 243k | } _ZN5doris12StringParser25string_to_int_no_overflowIoLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 41.2k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 41.2k | T val = 0; | 588 | 41.2k | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 41.2k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 41.0k | val = s[0] - '0'; | 595 | 41.0k | } else { | 596 | 119 | *result = PARSE_FAILURE; | 597 | 119 | return 0; | 598 | 119 | } | 599 | 57.7k | for (int i = 1; i < len; ++i) { | 600 | 16.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 16.6k | T digit = s[i] - '0'; | 602 | 16.6k | val = val * 10 + digit; | 603 | 16.6k | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 289 | } else { | 610 | 289 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 289 | !is_float_suffix(s + i, len - i)))) { | 612 | 65 | *result = PARSE_FAILURE; | 613 | 65 | return 0; | 614 | 65 | } | 615 | 289 | } | 616 | 224 | *result = PARSE_SUCCESS; | 617 | 289 | return val; | 618 | 289 | } | 619 | 16.9k | } | 620 | 40.7k | *result = PARSE_SUCCESS; | 621 | 40.7k | return val; | 622 | 41.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 68.7k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 68.7k | T val = 0; | 588 | 68.7k | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 68.7k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 68.6k | val = s[0] - '0'; | 595 | 68.6k | } else { | 596 | 127 | *result = PARSE_FAILURE; | 597 | 127 | return 0; | 598 | 127 | } | 599 | 101k | for (int i = 1; i < len; ++i) { | 600 | 33.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 33.0k | T digit = s[i] - '0'; | 602 | 33.0k | val = val * 10 + digit; | 603 | 33.0k | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 1 | } else { | 610 | 1 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 1 | !is_float_suffix(s + i, len - i)))) { | 612 | 1 | *result = PARSE_FAILURE; | 613 | 1 | return 0; | 614 | 1 | } | 615 | 1 | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 1 | return val; | 618 | 1 | } | 619 | 33.0k | } | 620 | 68.6k | *result = PARSE_SUCCESS; | 621 | 68.6k | return val; | 622 | 68.6k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 50.6k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 50.6k | T val = 0; | 588 | 50.6k | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 50.6k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 50.3k | val = s[0] - '0'; | 595 | 50.3k | } else { | 596 | 250 | *result = PARSE_FAILURE; | 597 | 250 | return 0; | 598 | 250 | } | 599 | 73.4k | for (int i = 1; i < len; ++i) { | 600 | 23.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 23.0k | T digit = s[i] - '0'; | 602 | 23.0k | val = val * 10 + digit; | 603 | 23.0k | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 924 | } else { | 610 | 924 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 924 | !is_float_suffix(s + i, len - i)))) { | 612 | 63 | *result = PARSE_FAILURE; | 613 | 63 | return 0; | 614 | 63 | } | 615 | 924 | } | 616 | 861 | *result = PARSE_SUCCESS; | 617 | 924 | return val; | 618 | 924 | } | 619 | 23.9k | } | 620 | 49.4k | *result = PARSE_SUCCESS; | 621 | 49.4k | return val; | 622 | 50.3k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 51.2k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 51.2k | T val = 0; | 588 | 51.2k | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 51.2k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 50.3k | val = s[0] - '0'; | 595 | 50.3k | } else { | 596 | 831 | *result = PARSE_FAILURE; | 597 | 831 | return 0; | 598 | 831 | } | 599 | 86.0k | for (int i = 1; i < len; ++i) { | 600 | 35.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 35.6k | T digit = s[i] - '0'; | 602 | 35.6k | val = val * 10 + digit; | 603 | 35.6k | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 339 | } else { | 610 | 339 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 339 | !is_float_suffix(s + i, len - i)))) { | 612 | 115 | *result = PARSE_FAILURE; | 613 | 115 | return 0; | 614 | 115 | } | 615 | 339 | } | 616 | 224 | *result = PARSE_SUCCESS; | 617 | 339 | return val; | 618 | 339 | } | 619 | 35.9k | } | 620 | 50.0k | *result = PARSE_SUCCESS; | 621 | 50.0k | return val; | 622 | 50.3k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 31.8k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 31.8k | T val = 0; | 588 | 31.8k | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 31.8k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 31.5k | val = s[0] - '0'; | 595 | 31.5k | } else { | 596 | 258 | *result = PARSE_FAILURE; | 597 | 258 | return 0; | 598 | 258 | } | 599 | 66.2k | for (int i = 1; i < len; ++i) { | 600 | 35.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 34.7k | T digit = s[i] - '0'; | 602 | 34.7k | val = val * 10 + digit; | 603 | 34.7k | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 309 | } else { | 610 | 309 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 309 | !is_float_suffix(s + i, len - i)))) { | 612 | 84 | *result = PARSE_FAILURE; | 613 | 84 | return 0; | 614 | 84 | } | 615 | 309 | } | 616 | 225 | *result = PARSE_SUCCESS; | 617 | 309 | return val; | 618 | 309 | } | 619 | 35.0k | } | 620 | 31.2k | *result = PARSE_SUCCESS; | 621 | 31.2k | return val; | 622 | 31.5k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEELb0EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 4 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 4 | T val = 0; | 588 | 4 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 4 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 4 | val = s[0] - '0'; | 595 | 4 | } else { | 596 | 0 | *result = PARSE_FAILURE; | 597 | 0 | return 0; | 598 | 0 | } | 599 | 4 | for (int i = 1; i < len; ++i) { | 600 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 0 | T digit = s[i] - '0'; | 602 | 0 | val = val * 10 + digit; | 603 | 0 | } else { | 604 | | if constexpr (enable_strict_mode) { | 605 | | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | | *result = PARSE_FAILURE; | 607 | | return 0; | 608 | | } | 609 | 0 | } else { | 610 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | 0 | !is_float_suffix(s + i, len - i)))) { | 612 | 0 | *result = PARSE_FAILURE; | 613 | 0 | return 0; | 614 | 0 | } | 615 | 0 | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 0 | return val; | 618 | 0 | } | 619 | 0 | } | 620 | 4 | *result = PARSE_SUCCESS; | 621 | 4 | return val; | 622 | 4 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIhLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 51 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 51 | T val = 0; | 588 | 51 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 51 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 41 | val = s[0] - '0'; | 595 | 41 | } else { | 596 | 10 | *result = PARSE_FAILURE; | 597 | 10 | return 0; | 598 | 10 | } | 599 | 41 | for (int i = 1; i < len; ++i) { | 600 | 1 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 0 | T digit = s[i] - '0'; | 602 | 0 | val = val * 10 + digit; | 603 | 1 | } else { | 604 | 1 | if constexpr (enable_strict_mode) { | 605 | 1 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | 1 | *result = PARSE_FAILURE; | 607 | 1 | return 0; | 608 | 1 | } | 609 | | } else { | 610 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | | !is_float_suffix(s + i, len - i)))) { | 612 | | *result = PARSE_FAILURE; | 613 | | return 0; | 614 | | } | 615 | | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 1 | return val; | 618 | 1 | } | 619 | 1 | } | 620 | 40 | *result = PARSE_SUCCESS; | 621 | 40 | return val; | 622 | 41 | } |
_ZN5doris12StringParser25string_to_int_no_overflowItLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 203 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 203 | T val = 0; | 588 | 203 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 203 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 187 | val = s[0] - '0'; | 595 | 187 | } else { | 596 | 16 | *result = PARSE_FAILURE; | 597 | 16 | return 0; | 598 | 16 | } | 599 | 339 | for (int i = 1; i < len; ++i) { | 600 | 243 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 152 | T digit = s[i] - '0'; | 602 | 152 | val = val * 10 + digit; | 603 | 152 | } else { | 604 | 91 | if constexpr (enable_strict_mode) { | 605 | 91 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | 91 | *result = PARSE_FAILURE; | 607 | 91 | return 0; | 608 | 91 | } | 609 | | } else { | 610 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | | !is_float_suffix(s + i, len - i)))) { | 612 | | *result = PARSE_FAILURE; | 613 | | return 0; | 614 | | } | 615 | | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 91 | return val; | 618 | 91 | } | 619 | 243 | } | 620 | 96 | *result = PARSE_SUCCESS; | 621 | 96 | return val; | 622 | 187 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 531 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 531 | T val = 0; | 588 | 531 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 531 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 500 | val = s[0] - '0'; | 595 | 500 | } else { | 596 | 31 | *result = PARSE_FAILURE; | 597 | 31 | return 0; | 598 | 31 | } | 599 | 1.40k | for (int i = 1; i < len; ++i) { | 600 | 1.16k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 904 | T digit = s[i] - '0'; | 602 | 904 | val = val * 10 + digit; | 603 | 904 | } else { | 604 | 256 | if constexpr (enable_strict_mode) { | 605 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | 256 | *result = PARSE_FAILURE; | 607 | 256 | return 0; | 608 | 256 | } | 609 | | } else { | 610 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | | !is_float_suffix(s + i, len - i)))) { | 612 | | *result = PARSE_FAILURE; | 613 | | return 0; | 614 | | } | 615 | | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 256 | return val; | 618 | 256 | } | 619 | 1.16k | } | 620 | 244 | *result = PARSE_SUCCESS; | 621 | 244 | return val; | 622 | 500 | } |
_ZN5doris12StringParser25string_to_int_no_overflowImLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 400 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 400 | T val = 0; | 588 | 400 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 400 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 368 | val = s[0] - '0'; | 595 | 368 | } else { | 596 | 32 | *result = PARSE_FAILURE; | 597 | 32 | return 0; | 598 | 32 | } | 599 | 1.09k | for (int i = 1; i < len; ++i) { | 600 | 981 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 725 | T digit = s[i] - '0'; | 602 | 725 | val = val * 10 + digit; | 603 | 725 | } else { | 604 | 256 | if constexpr (enable_strict_mode) { | 605 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | 256 | *result = PARSE_FAILURE; | 607 | 256 | return 0; | 608 | 256 | } | 609 | | } else { | 610 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | | !is_float_suffix(s + i, len - i)))) { | 612 | | *result = PARSE_FAILURE; | 613 | | return 0; | 614 | | } | 615 | | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 256 | return val; | 618 | 256 | } | 619 | 981 | } | 620 | 112 | *result = PARSE_SUCCESS; | 621 | 112 | return val; | 622 | 368 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoLb1EEET_PKciPNS0_11ParseResultE Line | Count | Source | 586 | 401 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 587 | 401 | T val = 0; | 588 | 401 | if (UNLIKELY(len == 0)) { | 589 | 0 | *result = PARSE_SUCCESS; | 590 | 0 | return val; | 591 | 0 | } | 592 | | // Factor out the first char for error handling speeds up the loop. | 593 | 401 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 594 | 368 | val = s[0] - '0'; | 595 | 368 | } else { | 596 | 33 | *result = PARSE_FAILURE; | 597 | 33 | return 0; | 598 | 33 | } | 599 | 1.09k | for (int i = 1; i < len; ++i) { | 600 | 981 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 601 | 725 | T digit = s[i] - '0'; | 602 | 725 | val = val * 10 + digit; | 603 | 725 | } else { | 604 | 256 | if constexpr (enable_strict_mode) { | 605 | 256 | if (UNLIKELY(!is_all_whitespace(s + i, len - i))) { | 606 | 256 | *result = PARSE_FAILURE; | 607 | 256 | return 0; | 608 | 256 | } | 609 | | } else { | 610 | | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 611 | | !is_float_suffix(s + i, len - i)))) { | 612 | | *result = PARSE_FAILURE; | 613 | | return 0; | 614 | | } | 615 | | } | 616 | 0 | *result = PARSE_SUCCESS; | 617 | 256 | return val; | 618 | 256 | } | 619 | 981 | } | 620 | 112 | *result = PARSE_SUCCESS; | 621 | 112 | return val; | 622 | 368 | } |
|
623 | | |
624 | | // at least the first char(if any) must be a digit. |
625 | | template <typename T> |
626 | | T StringParser::string_to_uint_greedy_no_overflow(const char* __restrict s, int max_len, |
627 | 59 | ParseResult* result) { |
628 | 59 | T val = 0; |
629 | 59 | if (max_len == 0) [[unlikely]] { |
630 | 10 | *result = PARSE_SUCCESS; |
631 | 10 | return val; |
632 | 10 | } |
633 | | // Factor out the first char for error handling speeds up the loop. |
634 | 49 | if (is_numeric_ascii(s[0])) [[likely]] { |
635 | 49 | val = s[0] - '0'; |
636 | 49 | } else { |
637 | 0 | *result = PARSE_FAILURE; |
638 | 0 | return 0; |
639 | 0 | } |
640 | 215 | for (int i = 1; i < max_len; ++i) { |
641 | 166 | if (is_numeric_ascii(s[i])) [[likely]] { |
642 | 166 | T digit = s[i] - '0'; |
643 | 166 | val = val * 10 + digit; |
644 | 166 | } else { |
645 | | // 123abc, return 123 |
646 | 0 | *result = PARSE_SUCCESS; |
647 | 0 | return val; |
648 | 0 | } |
649 | 166 | } |
650 | 49 | *result = PARSE_SUCCESS; |
651 | 49 | return val; |
652 | 49 | } |
653 | | |
654 | | template <typename T> |
655 | 153k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
656 | 153k | int i = 0; |
657 | | // skip leading spaces |
658 | 153k | for (; i < len; ++i) { |
659 | 153k | if (!is_whitespace_ascii(s[i])) { |
660 | 153k | break; |
661 | 153k | } |
662 | 153k | } |
663 | | |
664 | | // skip back spaces |
665 | 153k | int j = len - 1; |
666 | 153k | for (; j >= i; j--) { |
667 | 153k | if (!is_whitespace_ascii(s[j])) { |
668 | 153k | break; |
669 | 153k | } |
670 | 153k | } |
671 | | |
672 | | // skip leading '+', from_chars can handle '-' |
673 | 153k | if (i < len && s[i] == '+') { |
674 | 7.08k | i++; |
675 | | // ++ or +- are not valid, but the first + is already skipped, |
676 | | // if don't check here, from_chars will succeed. |
677 | | // |
678 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' |
679 | | // which may avoid this extra check here. |
680 | | // e.g.: |
681 | | // fast_float::chars_format format = |
682 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; |
683 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); |
684 | 7.08k | if (i < len && (s[i] == '+' || s[i] == '-')) { |
685 | 20 | *result = PARSE_FAILURE; |
686 | 20 | return 0; |
687 | 20 | } |
688 | 7.08k | } |
689 | 153k | if (UNLIKELY(i > j)) { |
690 | 32 | *result = PARSE_FAILURE; |
691 | 32 | return 0; |
692 | 32 | } |
693 | | |
694 | | // Use double here to not lose precision while accumulating the result |
695 | 153k | double val = 0; |
696 | 153k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
697 | | |
698 | 153k | if (res.ptr == s + j + 1) { |
699 | 148k | *result = PARSE_SUCCESS; |
700 | 148k | return val; |
701 | 148k | } else { |
702 | 4.96k | *result = PARSE_FAILURE; |
703 | 4.96k | } |
704 | 4.96k | return 0; |
705 | 153k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 655 | 87.7k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 656 | 87.7k | int i = 0; | 657 | | // skip leading spaces | 658 | 87.7k | for (; i < len; ++i) { | 659 | 87.7k | if (!is_whitespace_ascii(s[i])) { | 660 | 87.7k | break; | 661 | 87.7k | } | 662 | 87.7k | } | 663 | | | 664 | | // skip back spaces | 665 | 87.7k | int j = len - 1; | 666 | 87.7k | for (; j >= i; j--) { | 667 | 87.7k | if (!is_whitespace_ascii(s[j])) { | 668 | 87.7k | break; | 669 | 87.7k | } | 670 | 87.7k | } | 671 | | | 672 | | // skip leading '+', from_chars can handle '-' | 673 | 87.7k | if (i < len && s[i] == '+') { | 674 | 3.54k | i++; | 675 | | // ++ or +- are not valid, but the first + is already skipped, | 676 | | // if don't check here, from_chars will succeed. | 677 | | // | 678 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 679 | | // which may avoid this extra check here. | 680 | | // e.g.: | 681 | | // fast_float::chars_format format = | 682 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 683 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 684 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 685 | 10 | *result = PARSE_FAILURE; | 686 | 10 | return 0; | 687 | 10 | } | 688 | 3.54k | } | 689 | 87.7k | if (UNLIKELY(i > j)) { | 690 | 18 | *result = PARSE_FAILURE; | 691 | 18 | return 0; | 692 | 18 | } | 693 | | | 694 | | // Use double here to not lose precision while accumulating the result | 695 | 87.7k | double val = 0; | 696 | 87.7k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 697 | | | 698 | 87.7k | if (res.ptr == s + j + 1) { | 699 | 85.0k | *result = PARSE_SUCCESS; | 700 | 85.0k | return val; | 701 | 85.0k | } else { | 702 | 2.67k | *result = PARSE_FAILURE; | 703 | 2.67k | } | 704 | 2.67k | return 0; | 705 | 87.7k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 655 | 65.3k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 656 | 65.3k | int i = 0; | 657 | | // skip leading spaces | 658 | 65.3k | for (; i < len; ++i) { | 659 | 65.3k | if (!is_whitespace_ascii(s[i])) { | 660 | 65.3k | break; | 661 | 65.3k | } | 662 | 65.3k | } | 663 | | | 664 | | // skip back spaces | 665 | 65.3k | int j = len - 1; | 666 | 65.3k | for (; j >= i; j--) { | 667 | 65.3k | if (!is_whitespace_ascii(s[j])) { | 668 | 65.3k | break; | 669 | 65.3k | } | 670 | 65.3k | } | 671 | | | 672 | | // skip leading '+', from_chars can handle '-' | 673 | 65.3k | if (i < len && s[i] == '+') { | 674 | 3.54k | i++; | 675 | | // ++ or +- are not valid, but the first + is already skipped, | 676 | | // if don't check here, from_chars will succeed. | 677 | | // | 678 | | // New version of fast_float supports a new flag called 'chars_format::allow_leading_plus' | 679 | | // which may avoid this extra check here. | 680 | | // e.g.: | 681 | | // fast_float::chars_format format = | 682 | | // fast_float::chars_format::general | fast_float::chars_format::allow_leading_plus; | 683 | | // auto res = fast_float::from_chars(s + i, s + j + 1, val, format); | 684 | 3.54k | if (i < len && (s[i] == '+' || s[i] == '-')) { | 685 | 10 | *result = PARSE_FAILURE; | 686 | 10 | return 0; | 687 | 10 | } | 688 | 3.54k | } | 689 | 65.3k | if (UNLIKELY(i > j)) { | 690 | 14 | *result = PARSE_FAILURE; | 691 | 14 | return 0; | 692 | 14 | } | 693 | | | 694 | | // Use double here to not lose precision while accumulating the result | 695 | 65.3k | double val = 0; | 696 | 65.3k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 697 | | | 698 | 65.3k | if (res.ptr == s + j + 1) { | 699 | 63.0k | *result = PARSE_SUCCESS; | 700 | 63.0k | return val; | 701 | 63.0k | } else { | 702 | 2.28k | *result = PARSE_FAILURE; | 703 | 2.28k | } | 704 | 2.28k | return 0; | 705 | 65.3k | } |
|
706 | | |
707 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
708 | 11.7k | ParseResult* result) { |
709 | 11.7k | *result = PARSE_SUCCESS; |
710 | | |
711 | 11.7k | if (len == 1) { |
712 | 2.75k | if (s[0] == '1' || s[0] == 't' || s[0] == 'T') { |
713 | 343 | return true; |
714 | 343 | } |
715 | 2.41k | if (s[0] == '0' || s[0] == 'f' || s[0] == 'F') { |
716 | 966 | return false; |
717 | 966 | } |
718 | 1.44k | *result = PARSE_FAILURE; |
719 | 1.44k | return false; |
720 | 2.41k | } |
721 | | |
722 | 9.00k | if (len == 2) { |
723 | 1.00k | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) { |
724 | 10 | return true; |
725 | 10 | } |
726 | 994 | if ((s[0] == 'n' || s[0] == 'N') && (s[1] == 'o' || s[1] == 'O')) { |
727 | 9 | return false; |
728 | 9 | } |
729 | 994 | } |
730 | | |
731 | 8.98k | if (len == 3) { |
732 | 40 | if ((s[0] == 'y' || s[0] == 'Y') && (s[1] == 'e' || s[1] == 'E') && |
733 | 40 | (s[2] == 's' || s[2] == 'S')) { |
734 | 10 | return true; |
735 | 10 | } |
736 | 30 | if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'f' || s[1] == 'F') && |
737 | 30 | (s[2] == 'f' || s[2] == 'F')) { |
738 | 9 | return false; |
739 | 9 | } |
740 | 30 | } |
741 | | |
742 | 8.96k | if (len == 4 && (s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') && |
743 | 8.96k | (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E')) { |
744 | 3.49k | return true; |
745 | 3.49k | } |
746 | | |
747 | 5.46k | if (len == 5 && (s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') && |
748 | 5.46k | (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') && |
749 | 5.46k | (s[4] == 'e' || s[4] == 'E')) { |
750 | 3.54k | return false; |
751 | 3.54k | } |
752 | | |
753 | | // No valid boolean value found |
754 | 1.92k | *result = PARSE_FAILURE; |
755 | 1.92k | return false; |
756 | 5.46k | } |
757 | | |
758 | | /* |
759 | | template <PrimitiveType P, typename T, typename DecimalType> |
760 | | T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision, |
761 | | int type_scale, ParseResult* result) { |
762 | | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
763 | | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
764 | | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
765 | | "wide::Int256."); |
766 | | // Special cases: |
767 | | // 1) '' == Fail, an empty string fails to parse. |
768 | | // 2) ' # ' == #, leading and trailing white space is ignored. |
769 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). |
770 | | // 4) '#.' == '#', a trailing dot is ignored. |
771 | | |
772 | | // Ignore leading and trailing spaces. |
773 | | while (len > 0 && is_whitespace(*s)) { |
774 | | ++s; |
775 | | --len; |
776 | | } |
777 | | while (len > 0 && is_whitespace(s[len - 1])) { |
778 | | --len; |
779 | | } |
780 | | |
781 | | bool is_negative = false; |
782 | | if (len > 0) { |
783 | | switch (*s) { |
784 | | case '-': |
785 | | is_negative = true; |
786 | | [[fallthrough]]; |
787 | | case '+': |
788 | | ++s; |
789 | | --len; |
790 | | } |
791 | | } |
792 | | |
793 | | // Ignore leading zeros. |
794 | | bool found_value = false; |
795 | | while (len > 0 && UNLIKELY(*s == '0')) { |
796 | | found_value = true; |
797 | | ++s; |
798 | | --len; |
799 | | } |
800 | | |
801 | | // Ignore leading zeros even after a dot. This allows for differentiating between |
802 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would |
803 | | // overflow. |
804 | | int scale = 0; |
805 | | int found_dot = 0; |
806 | | if (len > 0 && *s == '.') { |
807 | | found_dot = 1; |
808 | | ++s; |
809 | | --len; |
810 | | while (len > 0 && UNLIKELY(*s == '0')) { |
811 | | found_value = true; |
812 | | ++scale; |
813 | | ++s; |
814 | | --len; |
815 | | } |
816 | | } |
817 | | |
818 | | int precision = 0; |
819 | | int max_digit = type_precision - type_scale; |
820 | | int cur_digit = 0; |
821 | | bool found_exponent = false; |
822 | | int8_t exponent = 0; |
823 | | T value = 0; |
824 | | bool has_round = false; |
825 | | for (int i = 0; i < len; ++i) { |
826 | | const char& c = s[i]; |
827 | | if (LIKELY('0' <= c && c <= '9')) { |
828 | | found_value = true; |
829 | | // Ignore digits once the type's precision limit is reached. This avoids |
830 | | // overflowing the underlying storage while handling a string like |
831 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and |
832 | | // an exponent will be made later. |
833 | | if (LIKELY(type_precision > precision) && !has_round) { |
834 | | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... |
835 | | ++precision; |
836 | | scale += found_dot; |
837 | | cur_digit = precision - scale; |
838 | | } else if (!found_dot && max_digit < (precision - scale)) { |
839 | | *result = StringParser::PARSE_OVERFLOW; |
840 | | value = is_negative ? vectorized::min_decimal_value<P>(type_precision) |
841 | | : vectorized::max_decimal_value<P>(type_precision); |
842 | | return value; |
843 | | } else if (found_dot && scale >= type_scale && !has_round) { |
844 | | // make rounding cases |
845 | | if (c > '4') { |
846 | | value += 1; |
847 | | } |
848 | | has_round = true; |
849 | | continue; |
850 | | } else if (!found_dot) { |
851 | | ++cur_digit; |
852 | | } |
853 | | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. |
854 | | } else if (c == '.' && LIKELY(!found_dot)) { |
855 | | found_dot = 1; |
856 | | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { |
857 | | found_exponent = true; |
858 | | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); |
859 | | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { |
860 | | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { |
861 | | *result = StringParser::PARSE_UNDERFLOW; |
862 | | } |
863 | | return 0; |
864 | | } |
865 | | break; |
866 | | } else { |
867 | | if (value == 0) { |
868 | | *result = StringParser::PARSE_FAILURE; |
869 | | return 0; |
870 | | } |
871 | | // here to handle |
872 | | *result = StringParser::PARSE_SUCCESS; |
873 | | if (type_scale >= scale) { |
874 | | value *= get_scale_multiplier<T>(type_scale - scale); |
875 | | // here meet non-valid character, should return the value, keep going to meet |
876 | | // the E/e character because we make right user-given type_precision |
877 | | // not max number type_precision |
878 | | if (!is_numeric_ascii(c)) { |
879 | | if (cur_digit > type_precision) { |
880 | | *result = StringParser::PARSE_OVERFLOW; |
881 | | value = is_negative ? vectorized::min_decimal_value<P>(type_precision) |
882 | | : vectorized::max_decimal_value<P>(type_precision); |
883 | | return value; |
884 | | } |
885 | | return is_negative ? T(-value) : T(value); |
886 | | } |
887 | | } |
888 | | |
889 | | return is_negative ? T(-value) : T(value); |
890 | | } |
891 | | } |
892 | | |
893 | | // Find the number of truncated digits before adjusting the precision for an exponent. |
894 | | if (exponent > scale) { |
895 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the |
896 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. |
897 | | precision += exponent - scale; |
898 | | |
899 | | value *= get_scale_multiplier<T>(exponent - scale); |
900 | | scale = 0; |
901 | | } else { |
902 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, |
903 | | // the precision must also be set to 4 but that will be done below for the |
904 | | // non-exponent case anyways. |
905 | | scale -= exponent; |
906 | | } |
907 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros |
908 | | // were ignored during previous parsing. |
909 | | if (scale > precision) { |
910 | | precision = scale; |
911 | | } |
912 | | |
913 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower |
914 | | // than just letting the function run out. |
915 | | *result = StringParser::PARSE_SUCCESS; |
916 | | if (UNLIKELY(precision - scale > type_precision - type_scale)) { |
917 | | *result = StringParser::PARSE_OVERFLOW; |
918 | | if constexpr (TYPE_DECIMALV2 != P) { |
919 | | // decimalv3 overflow will return max min value for type precision |
920 | | value = is_negative ? vectorized::min_decimal_value<P>(type_precision) |
921 | | : vectorized::max_decimal_value<P>(type_precision); |
922 | | return value; |
923 | | } |
924 | | } else if (UNLIKELY(scale > type_scale)) { |
925 | | *result = StringParser::PARSE_UNDERFLOW; |
926 | | int shift = scale - type_scale; |
927 | | T divisor = get_scale_multiplier<T>(shift); |
928 | | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { |
929 | | value = 0; |
930 | | } else { |
931 | | T remainder = value % divisor; |
932 | | value /= divisor; |
933 | | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { |
934 | | value += 1; |
935 | | } |
936 | | } |
937 | | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. |
938 | | } else if (UNLIKELY(!found_value && !found_dot)) { |
939 | | *result = StringParser::PARSE_FAILURE; |
940 | | } |
941 | | |
942 | | if (type_scale > scale) { |
943 | | value *= get_scale_multiplier<T>(type_scale - scale); |
944 | | } |
945 | | |
946 | | return is_negative ? T(-value) : T(value); |
947 | | } |
948 | | */ |
949 | | |
950 | | } // end namespace doris |