be/src/util/string_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/string_parser.hpp" |
19 | | |
20 | | #include <limits> |
21 | | |
22 | | #include "core/extended_types.h" |
23 | | #include "core/types.h" |
24 | | namespace doris { |
25 | | #include "common/compile_check_avoid_begin.h" |
26 | | // Supported decimal number format: |
27 | | // <decimal> ::= <whitespace>* <value> <whitespace>* |
28 | | // |
29 | | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
30 | | // |
31 | | // <value> ::= <sign>? <significand> <exponent>? |
32 | | // |
33 | | // <sign> ::= "+" | "-" |
34 | | // |
35 | | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
36 | | // |
37 | | // <digits> ::= <digit>+ |
38 | | // |
39 | | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
40 | | // |
41 | | // <exponent> ::= <e_marker> <sign>? <digits> |
42 | | // |
43 | | // <e_marker> ::= "e" | "E" |
44 | | // |
45 | | // Parsing algorithm: |
46 | | // 1. Trim spaces and the sign, then normalize the significand by skipping leading zeros and an |
47 | | // optional leading dot. During this scan, count digits that belong to the original integral |
48 | | // part (`int_part_count`) and remember where the significand ends (`end_digit_index`). |
49 | | // 2. Parse the optional exponent. Scientific notation is handled by moving the decimal point: |
50 | | // `result_int_part_digit_count = int_part_count + exponent`. For example, "12.34e-1" has |
51 | | // int_part_count=2 and exponent=-1, so the result has one integral digit: "1.234". |
52 | | // 3. Build the result in scaled-integer form: first collect the integral digits up to the shifted |
53 | | // decimal point, then collect up to `type_scale` fractional digits, padding with zeros when the |
54 | | // input has fewer fractional digits than the target scale. |
55 | | // 4. If there are extra fractional digits, round half up using the first discarded digit. Finally, |
56 | | // check the integral digit count against `type_precision - type_scale` and return the signed |
57 | | // scaled integer value. |
58 | | template <PrimitiveType P> |
59 | | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
60 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
61 | 21.0M | ParseResult* result) { |
62 | 21.0M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
63 | 21.0M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
64 | 21.0M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
65 | 21.0M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
66 | 21.0M | "wide::Int256."); |
67 | | |
68 | | // Parse in two logical coordinate systems: |
69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and |
70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so |
71 | | // ".14E+3" is parsed as significand "14" with exponent 3. |
72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position |
73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, |
74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. |
75 | | // `digit_index` always indexes the normalized significand string, which may still contain a |
76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. |
77 | | // Ignore leading and trailing spaces. |
78 | 21.0M | s = skip_ascii_whitespaces(s, len); |
79 | | |
80 | 21.0M | bool is_negative = false; |
81 | 21.0M | if (len > 0) { |
82 | 20.9M | switch (*s) { |
83 | 524k | case '-': |
84 | 524k | is_negative = true; |
85 | 524k | [[fallthrough]]; |
86 | 552k | case '+': |
87 | 552k | ++s; |
88 | 552k | --len; |
89 | 20.9M | } |
90 | 20.9M | } |
91 | | // Ignore leading zeros. |
92 | 21.0M | bool found_value = false; |
93 | 28.6M | while (len > 0 && UNLIKELY(*s == '0')) { |
94 | 7.54M | found_value = true; |
95 | 7.54M | ++s; |
96 | 7.54M | --len; |
97 | 7.54M | } |
98 | | |
99 | 21.0M | int found_dot = 0; |
100 | 21.0M | if (len > 0 && *s == '.') { |
101 | 7.28M | found_dot = 1; |
102 | 7.28M | ++s; |
103 | 7.28M | --len; |
104 | 7.28M | } |
105 | 21.0M | int int_part_count = 0; |
106 | 21.0M | int i = 0; |
107 | 149M | for (; i != len; ++i) { |
108 | 128M | const char& c = s[i]; |
109 | 128M | if (LIKELY('0' <= c && c <= '9')) { |
110 | 118M | found_value = true; |
111 | 118M | if (!found_dot) { |
112 | 60.8M | ++int_part_count; |
113 | 60.8M | } |
114 | 118M | } else if (c == '.') { |
115 | 9.71M | if (found_dot) { |
116 | 2 | *result = StringParser::PARSE_FAILURE; |
117 | 2 | return 0; |
118 | 2 | } |
119 | 9.71M | found_dot = 1; |
120 | 9.71M | } else { |
121 | 92.3k | break; |
122 | 92.3k | } |
123 | 128M | } |
124 | 21.0M | if (!found_value) { |
125 | | // '', '.' |
126 | 98.0k | *result = StringParser::PARSE_FAILURE; |
127 | 98.0k | return 0; |
128 | 98.0k | } |
129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts |
130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after |
131 | | // "E+2". |
132 | 20.9M | int64_t exponent = 0; |
133 | 20.9M | auto end_digit_index = i; |
134 | 20.9M | if (i != len) { |
135 | 113k | bool negative_exponent = false; |
136 | 113k | if (s[i] == 'e' || s[i] == 'E') { |
137 | 113k | ++i; |
138 | 113k | if (i != len) { |
139 | 113k | switch (s[i]) { |
140 | 15.6k | case '-': |
141 | 15.6k | negative_exponent = true; |
142 | 15.6k | [[fallthrough]]; |
143 | 82.2k | case '+': |
144 | 82.2k | ++i; |
145 | 113k | } |
146 | 113k | } |
147 | 113k | if (i == len) { |
148 | | // '123e', '123e+', '123e-' |
149 | 6 | *result = StringParser::PARSE_FAILURE; |
150 | 6 | return 0; |
151 | 6 | } |
152 | 335k | for (; i != len; ++i) { |
153 | 222k | const char& c = s[i]; |
154 | 222k | if (LIKELY('0' <= c && c <= '9')) { |
155 | 222k | exponent = exponent * 10 + (c - '0'); |
156 | | // max string len is config::string_type_length_soft_limit_bytes, |
157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
158 | | // just check overflow of int32_t to simplify the logic |
159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 |
160 | 222k | if (exponent > std::numeric_limits<int32_t>::max()) { |
161 | 0 | *result = StringParser::PARSE_OVERFLOW; |
162 | 0 | return 0; |
163 | 0 | } |
164 | 222k | } else { |
165 | | // '123e12abc', '123e1.2' |
166 | 22 | *result = StringParser::PARSE_FAILURE; |
167 | 22 | return 0; |
168 | 22 | } |
169 | 222k | } |
170 | 113k | if (negative_exponent) { |
171 | 15.6k | exponent = -exponent; |
172 | 15.6k | } |
173 | 113k | } else { |
174 | 206 | *result = StringParser::PARSE_FAILURE; |
175 | 206 | return 0; |
176 | 206 | } |
177 | 113k | } |
178 | | // TODO: check limit values of exponent and add UT |
179 | | // max string len is config::string_type_length_soft_limit_bytes, |
180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
181 | | // so int_part_count will be in range of int32_t, |
182 | | // and int_part_count + exponent will be in range of int64_t |
183 | 20.9M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; |
184 | 20.9M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || |
185 | 21.0M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { |
186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
187 | 0 | return 0; |
188 | 0 | } |
189 | 20.9M | int result_int_part_digit_count = tmp_result_int_part_digit_count; |
190 | 20.9M | T int_part_number = 0; |
191 | 20.9M | T frac_part_number = 0; |
192 | 20.9M | int actual_frac_part_count = 0; |
193 | 20.9M | int digit_index = 0; |
194 | 21.0M | if (result_int_part_digit_count >= 0) { |
195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra |
196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to |
197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index |
198 | | // because missing digits are appended later by multiplying with powers of 10. |
199 | 21.0M | int max_index = std::min(found_dot ? (result_int_part_digit_count + |
200 | 16.9M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) |
201 | 21.0M | : result_int_part_digit_count, |
202 | 21.0M | end_digit_index); |
203 | 21.0M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); |
204 | | // skip zero number |
205 | 21.8M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { |
206 | 858k | } |
207 | | // test 0.00, .00, 0.{00...}e2147483647 |
208 | | // 0.00000e2147483647 |
209 | 21.0M | if (digit_index != max_index && |
210 | 21.0M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { |
211 | 17.0k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
212 | 17.0k | return 0; |
213 | 17.0k | } |
214 | | // get int part number |
215 | 82.2M | for (; digit_index != max_index; ++digit_index) { |
216 | 61.2M | if (UNLIKELY(s[digit_index] == '.')) { |
217 | 71.1k | continue; |
218 | 71.1k | } |
219 | 61.1M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); |
220 | 61.1M | } |
221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal |
222 | | // point past all available significant digits, append zeros by scaling the integer part: |
223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. |
224 | 20.9M | auto total_significant_digit_count = |
225 | 20.9M | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); |
226 | 20.9M | if (result_int_part_digit_count > total_significant_digit_count) { |
227 | 64.8k | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - |
228 | 64.8k | total_significant_digit_count); |
229 | 64.8k | } |
230 | 18.4E | } else { |
231 | | // leading zeros of fraction part |
232 | 18.4E | actual_frac_part_count = -result_int_part_digit_count; |
233 | 18.4E | } |
234 | | // get fraction part number |
235 | 85.8M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { |
236 | 64.8M | if (UNLIKELY(s[digit_index] == '.')) { |
237 | 9.41M | continue; |
238 | 9.41M | } |
239 | 55.4M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); |
240 | 55.4M | ++actual_frac_part_count; |
241 | 55.4M | } |
242 | 20.9M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional |
244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions |
245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of |
246 | | // rounding up. |
247 | 20.9M | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { |
248 | 292k | if (UNLIKELY(s[digit_index] == '.')) { |
249 | 210k | ++digit_index; |
250 | 210k | } |
251 | 292k | if (digit_index != end_digit_index) { |
252 | | // example: test 1.5 -> decimal(1, 0) |
253 | 291k | if (s[digit_index] >= '5') { |
254 | 207k | ++frac_part_number; |
255 | 207k | if (frac_part_number == type_scale_multiplier) { |
256 | 176k | frac_part_number = 0; |
257 | 176k | ++int_part_number; |
258 | 176k | } |
259 | 207k | } |
260 | 291k | } |
261 | 20.6M | } else { |
262 | 20.6M | if (actual_frac_part_count < type_scale) { |
263 | 4.12M | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
264 | 4.12M | } |
265 | 20.6M | } |
266 | 20.9M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
267 | 152 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
268 | 152 | return 0; |
269 | 152 | } |
270 | | |
271 | 20.9M | T value = int_part_number * type_scale_multiplier + frac_part_number; |
272 | 20.9M | *result = StringParser::PARSE_SUCCESS; |
273 | 20.9M | return is_negative ? T(-value) : T(value); |
274 | 20.9M | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 61 | 2.03M | ParseResult* result) { | 62 | 2.03M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 63 | 2.03M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 64 | 2.03M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 65 | 2.03M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 66 | 2.03M | "wide::Int256."); | 67 | | | 68 | | // Parse in two logical coordinate systems: | 69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and | 70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so | 71 | | // ".14E+3" is parsed as significand "14" with exponent 3. | 72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position | 73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, | 74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. | 75 | | // `digit_index` always indexes the normalized significand string, which may still contain a | 76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. | 77 | | // Ignore leading and trailing spaces. | 78 | 2.03M | s = skip_ascii_whitespaces(s, len); | 79 | | | 80 | 2.03M | bool is_negative = false; | 81 | 2.03M | if (len > 0) { | 82 | 2.03M | switch (*s) { | 83 | 222k | case '-': | 84 | 222k | is_negative = true; | 85 | 222k | [[fallthrough]]; | 86 | 230k | case '+': | 87 | 230k | ++s; | 88 | 230k | --len; | 89 | 2.03M | } | 90 | 2.03M | } | 91 | | // Ignore leading zeros. | 92 | 2.03M | bool found_value = false; | 93 | 2.09M | while (len > 0 && UNLIKELY(*s == '0')) { | 94 | 56.0k | found_value = true; | 95 | 56.0k | ++s; | 96 | 56.0k | --len; | 97 | 56.0k | } | 98 | | | 99 | 2.03M | int found_dot = 0; | 100 | 2.03M | if (len > 0 && *s == '.') { | 101 | 19.6k | found_dot = 1; | 102 | 19.6k | ++s; | 103 | 19.6k | --len; | 104 | 19.6k | } | 105 | 2.03M | int int_part_count = 0; | 106 | 2.03M | int i = 0; | 107 | 12.4M | for (; i != len; ++i) { | 108 | 10.3M | const char& c = s[i]; | 109 | 10.3M | if (LIKELY('0' <= c && c <= '9')) { | 110 | 8.52M | found_value = true; | 111 | 8.52M | if (!found_dot) { | 112 | 3.92M | ++int_part_count; | 113 | 3.92M | } | 114 | 8.52M | } else if (c == '.') { | 115 | 1.85M | if (found_dot) { | 116 | 2 | *result = StringParser::PARSE_FAILURE; | 117 | 2 | return 0; | 118 | 2 | } | 119 | 1.85M | found_dot = 1; | 120 | 1.85M | } else { | 121 | 8.35k | break; | 122 | 8.35k | } | 123 | 10.3M | } | 124 | 2.03M | if (!found_value) { | 125 | | // '', '.' | 126 | 6.97k | *result = StringParser::PARSE_FAILURE; | 127 | 6.97k | return 0; | 128 | 6.97k | } | 129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts | 130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after | 131 | | // "E+2". | 132 | 2.03M | int64_t exponent = 0; | 133 | 2.03M | auto end_digit_index = i; | 134 | 2.03M | if (i != len) { | 135 | 9.42k | bool negative_exponent = false; | 136 | 9.42k | if (s[i] == 'e' || s[i] == 'E') { | 137 | 9.33k | ++i; | 138 | 9.33k | if (i != len) { | 139 | 9.33k | switch (s[i]) { | 140 | 1.54k | case '-': | 141 | 1.54k | negative_exponent = true; | 142 | 1.54k | [[fallthrough]]; | 143 | 1.54k | case '+': | 144 | 1.54k | ++i; | 145 | 9.33k | } | 146 | 9.33k | } | 147 | 9.33k | if (i == len) { | 148 | | // '123e', '123e+', '123e-' | 149 | 6 | *result = StringParser::PARSE_FAILURE; | 150 | 6 | return 0; | 151 | 6 | } | 152 | 24.6k | for (; i != len; ++i) { | 153 | 15.3k | const char& c = s[i]; | 154 | 15.3k | if (LIKELY('0' <= c && c <= '9')) { | 155 | 15.3k | exponent = exponent * 10 + (c - '0'); | 156 | | // max string len is config::string_type_length_soft_limit_bytes, | 157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 158 | | // just check overflow of int32_t to simplify the logic | 159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 160 | 15.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 161 | 0 | *result = StringParser::PARSE_OVERFLOW; | 162 | 0 | return 0; | 163 | 0 | } | 164 | 15.3k | } else { | 165 | | // '123e12abc', '123e1.2' | 166 | 12 | *result = StringParser::PARSE_FAILURE; | 167 | 12 | return 0; | 168 | 12 | } | 169 | 15.3k | } | 170 | 9.31k | if (negative_exponent) { | 171 | 1.53k | exponent = -exponent; | 172 | 1.53k | } | 173 | 9.31k | } else { | 174 | 90 | *result = StringParser::PARSE_FAILURE; | 175 | 90 | return 0; | 176 | 90 | } | 177 | 9.42k | } | 178 | | // TODO: check limit values of exponent and add UT | 179 | | // max string len is config::string_type_length_soft_limit_bytes, | 180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 181 | | // so int_part_count will be in range of int32_t, | 182 | | // and int_part_count + exponent will be in range of int64_t | 183 | 2.03M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 184 | 2.03M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 185 | 2.03M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 187 | 0 | return 0; | 188 | 0 | } | 189 | 2.03M | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 190 | 2.03M | T int_part_number = 0; | 191 | 2.03M | T frac_part_number = 0; | 192 | 2.03M | int actual_frac_part_count = 0; | 193 | 2.03M | int digit_index = 0; | 194 | 2.03M | if (result_int_part_digit_count >= 0) { | 195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra | 196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to | 197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index | 198 | | // because missing digits are appended later by multiplying with powers of 10. | 199 | 2.03M | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 200 | 1.87M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 201 | 2.03M | : result_int_part_digit_count, | 202 | 2.03M | end_digit_index); | 203 | 2.03M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 204 | | // skip zero number | 205 | 2.24M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 206 | 212k | } | 207 | | // test 0.00, .00, 0.{00...}e2147483647 | 208 | | // 0.00000e2147483647 | 209 | 2.03M | if (digit_index != max_index && | 210 | 2.03M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 211 | 6.03k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 212 | 6.03k | return 0; | 213 | 6.03k | } | 214 | | // get int part number | 215 | 5.85M | for (; digit_index != max_index; ++digit_index) { | 216 | 3.83M | if (UNLIKELY(s[digit_index] == '.')) { | 217 | 1.60k | continue; | 218 | 1.60k | } | 219 | 3.82M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 220 | 3.82M | } | 221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal | 222 | | // point past all available significant digits, append zeros by scaling the integer part: | 223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. | 224 | 2.02M | auto total_significant_digit_count = | 225 | 2.02M | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); | 226 | 2.02M | if (result_int_part_digit_count > total_significant_digit_count) { | 227 | 100 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 228 | 100 | total_significant_digit_count); | 229 | 100 | } | 230 | 2.02M | } else { | 231 | | // leading zeros of fraction part | 232 | 270 | actual_frac_part_count = -result_int_part_digit_count; | 233 | 270 | } | 234 | | // get fraction part number | 235 | 7.81M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 236 | 5.79M | if (UNLIKELY(s[digit_index] == '.')) { | 237 | 1.64M | continue; | 238 | 1.64M | } | 239 | 4.14M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 240 | 4.14M | ++actual_frac_part_count; | 241 | 4.14M | } | 242 | 2.02M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional | 244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions | 245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of | 246 | | // rounding up. | 247 | 2.02M | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { | 248 | 228k | if (UNLIKELY(s[digit_index] == '.')) { | 249 | 208k | ++digit_index; | 250 | 208k | } | 251 | 228k | if (digit_index != end_digit_index) { | 252 | | // example: test 1.5 -> decimal(1, 0) | 253 | 228k | if (s[digit_index] >= '5') { | 254 | 181k | ++frac_part_number; | 255 | 181k | if (frac_part_number == type_scale_multiplier) { | 256 | 173k | frac_part_number = 0; | 257 | 173k | ++int_part_number; | 258 | 173k | } | 259 | 181k | } | 260 | 228k | } | 261 | 1.79M | } else { | 262 | 1.79M | if (actual_frac_part_count < type_scale) { | 263 | 30.1k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 264 | 30.1k | } | 265 | 1.79M | } | 266 | 2.02M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 267 | 24 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 268 | 24 | return 0; | 269 | 24 | } | 270 | | | 271 | 2.02M | T value = int_part_number * type_scale_multiplier + frac_part_number; | 272 | 2.02M | *result = StringParser::PARSE_SUCCESS; | 273 | 2.02M | return is_negative ? T(-value) : T(value); | 274 | 2.02M | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 61 | 16.1M | ParseResult* result) { | 62 | 16.1M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 63 | 16.1M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 64 | 16.1M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 65 | 16.1M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 66 | 16.1M | "wide::Int256."); | 67 | | | 68 | | // Parse in two logical coordinate systems: | 69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and | 70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so | 71 | | // ".14E+3" is parsed as significand "14" with exponent 3. | 72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position | 73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, | 74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. | 75 | | // `digit_index` always indexes the normalized significand string, which may still contain a | 76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. | 77 | | // Ignore leading and trailing spaces. | 78 | 16.1M | s = skip_ascii_whitespaces(s, len); | 79 | | | 80 | 16.1M | bool is_negative = false; | 81 | 16.1M | if (len > 0) { | 82 | 16.1M | switch (*s) { | 83 | 112k | case '-': | 84 | 112k | is_negative = true; | 85 | 112k | [[fallthrough]]; | 86 | 119k | case '+': | 87 | 119k | ++s; | 88 | 119k | --len; | 89 | 16.1M | } | 90 | 16.1M | } | 91 | | // Ignore leading zeros. | 92 | 16.1M | bool found_value = false; | 93 | 23.4M | while (len > 0 && UNLIKELY(*s == '0')) { | 94 | 7.26M | found_value = true; | 95 | 7.26M | ++s; | 96 | 7.26M | --len; | 97 | 7.26M | } | 98 | | | 99 | 16.1M | int found_dot = 0; | 100 | 16.1M | if (len > 0 && *s == '.') { | 101 | 7.21M | found_dot = 1; | 102 | 7.21M | ++s; | 103 | 7.21M | --len; | 104 | 7.21M | } | 105 | 16.1M | int int_part_count = 0; | 106 | 16.1M | int i = 0; | 107 | 82.9M | for (; i != len; ++i) { | 108 | 66.7M | const char& c = s[i]; | 109 | 66.7M | if (LIKELY('0' <= c && c <= '9')) { | 110 | 61.3M | found_value = true; | 111 | 61.3M | if (!found_dot) { | 112 | 29.2M | ++int_part_count; | 113 | 29.2M | } | 114 | 61.3M | } else if (c == '.') { | 115 | 5.37M | if (found_dot) { | 116 | 0 | *result = StringParser::PARSE_FAILURE; | 117 | 0 | return 0; | 118 | 0 | } | 119 | 5.37M | found_dot = 1; | 120 | 18.4E | } else { | 121 | 18.4E | break; | 122 | 18.4E | } | 123 | 66.7M | } | 124 | 16.1M | if (!found_value) { | 125 | | // '', '.' | 126 | 379 | *result = StringParser::PARSE_FAILURE; | 127 | 379 | return 0; | 128 | 379 | } | 129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts | 130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after | 131 | | // "E+2". | 132 | 16.1M | int64_t exponent = 0; | 133 | 16.1M | auto end_digit_index = i; | 134 | 16.1M | if (i != len) { | 135 | 13.7k | bool negative_exponent = false; | 136 | 13.7k | if (s[i] == 'e' || s[i] == 'E') { | 137 | 13.6k | ++i; | 138 | 13.6k | if (i != len) { | 139 | 13.6k | switch (s[i]) { | 140 | 5.89k | case '-': | 141 | 5.89k | negative_exponent = true; | 142 | 5.89k | [[fallthrough]]; | 143 | 5.89k | case '+': | 144 | 5.89k | ++i; | 145 | 13.6k | } | 146 | 13.6k | } | 147 | 13.6k | if (i == len) { | 148 | | // '123e', '123e+', '123e-' | 149 | 0 | *result = StringParser::PARSE_FAILURE; | 150 | 0 | return 0; | 151 | 0 | } | 152 | 40.7k | for (; i != len; ++i) { | 153 | 27.0k | const char& c = s[i]; | 154 | 27.0k | if (LIKELY('0' <= c && c <= '9')) { | 155 | 27.0k | exponent = exponent * 10 + (c - '0'); | 156 | | // max string len is config::string_type_length_soft_limit_bytes, | 157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 158 | | // just check overflow of int32_t to simplify the logic | 159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 160 | 27.0k | if (exponent > std::numeric_limits<int32_t>::max()) { | 161 | 0 | *result = StringParser::PARSE_OVERFLOW; | 162 | 0 | return 0; | 163 | 0 | } | 164 | 27.0k | } else { | 165 | | // '123e12abc', '123e1.2' | 166 | 0 | *result = StringParser::PARSE_FAILURE; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 27.0k | } | 170 | 13.6k | if (negative_exponent) { | 171 | 5.89k | exponent = -exponent; | 172 | 5.89k | } | 173 | 13.6k | } else { | 174 | 81 | *result = StringParser::PARSE_FAILURE; | 175 | 81 | return 0; | 176 | 81 | } | 177 | 13.7k | } | 178 | | // TODO: check limit values of exponent and add UT | 179 | | // max string len is config::string_type_length_soft_limit_bytes, | 180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 181 | | // so int_part_count will be in range of int32_t, | 182 | | // and int_part_count + exponent will be in range of int64_t | 183 | 16.1M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 184 | 16.1M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 185 | 16.1M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 187 | 0 | return 0; | 188 | 0 | } | 189 | 16.1M | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 190 | 16.1M | T int_part_number = 0; | 191 | 16.1M | T frac_part_number = 0; | 192 | 16.1M | int actual_frac_part_count = 0; | 193 | 16.1M | int digit_index = 0; | 194 | 16.1M | if (result_int_part_digit_count >= 0) { | 195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra | 196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to | 197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index | 198 | | // because missing digits are appended later by multiplying with powers of 10. | 199 | 16.1M | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 200 | 12.5M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 201 | 16.1M | : result_int_part_digit_count, | 202 | 16.1M | end_digit_index); | 203 | 16.1M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 204 | | // skip zero number | 205 | 16.3M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 206 | 214k | } | 207 | | // test 0.00, .00, 0.{00...}e2147483647 | 208 | | // 0.00000e2147483647 | 209 | 16.1M | if (digit_index != max_index && | 210 | 16.1M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 211 | 10.5k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 212 | 10.5k | return 0; | 213 | 10.5k | } | 214 | | // get int part number | 215 | 45.0M | for (; digit_index != max_index; ++digit_index) { | 216 | 28.8M | if (UNLIKELY(s[digit_index] == '.')) { | 217 | 960 | continue; | 218 | 960 | } | 219 | 28.8M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 220 | 28.8M | } | 221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal | 222 | | // point past all available significant digits, append zeros by scaling the integer part: | 223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. | 224 | 16.1M | auto total_significant_digit_count = | 225 | 16.1M | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); | 226 | 16.1M | if (result_int_part_digit_count > total_significant_digit_count) { | 227 | 92 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 228 | 92 | total_significant_digit_count); | 229 | 92 | } | 230 | 18.4E | } else { | 231 | | // leading zeros of fraction part | 232 | 18.4E | actual_frac_part_count = -result_int_part_digit_count; | 233 | 18.4E | } | 234 | | // get fraction part number | 235 | 53.4M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 236 | 37.2M | if (UNLIKELY(s[digit_index] == '.')) { | 237 | 5.35M | continue; | 238 | 5.35M | } | 239 | 31.9M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 240 | 31.9M | ++actual_frac_part_count; | 241 | 31.9M | } | 242 | 16.1M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional | 244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions | 245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of | 246 | | // rounding up. | 247 | 16.1M | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { | 248 | 22.4k | if (UNLIKELY(s[digit_index] == '.')) { | 249 | 869 | ++digit_index; | 250 | 869 | } | 251 | 22.4k | if (digit_index != end_digit_index) { | 252 | | // example: test 1.5 -> decimal(1, 0) | 253 | 22.2k | if (s[digit_index] >= '5') { | 254 | 8.99k | ++frac_part_number; | 255 | 8.99k | if (frac_part_number == type_scale_multiplier) { | 256 | 988 | frac_part_number = 0; | 257 | 988 | ++int_part_number; | 258 | 988 | } | 259 | 8.99k | } | 260 | 22.2k | } | 261 | 16.1M | } else { | 262 | 16.1M | if (actual_frac_part_count < type_scale) { | 263 | 3.63M | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 264 | 3.63M | } | 265 | 16.1M | } | 266 | 16.1M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 267 | 56 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 268 | 56 | return 0; | 269 | 56 | } | 270 | | | 271 | 16.1M | T value = int_part_number * type_scale_multiplier + frac_part_number; | 272 | 16.1M | *result = StringParser::PARSE_SUCCESS; | 273 | 16.1M | return is_negative ? T(-value) : T(value); | 274 | 16.1M | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 61 | 2.73M | ParseResult* result) { | 62 | 2.73M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 63 | 2.73M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 64 | 2.73M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 65 | 2.73M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 66 | 2.73M | "wide::Int256."); | 67 | | | 68 | | // Parse in two logical coordinate systems: | 69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and | 70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so | 71 | | // ".14E+3" is parsed as significand "14" with exponent 3. | 72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position | 73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, | 74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. | 75 | | // `digit_index` always indexes the normalized significand string, which may still contain a | 76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. | 77 | | // Ignore leading and trailing spaces. | 78 | 2.73M | s = skip_ascii_whitespaces(s, len); | 79 | | | 80 | 2.73M | bool is_negative = false; | 81 | 2.73M | if (len > 0) { | 82 | 2.64M | switch (*s) { | 83 | 161k | case '-': | 84 | 161k | is_negative = true; | 85 | 161k | [[fallthrough]]; | 86 | 168k | case '+': | 87 | 168k | ++s; | 88 | 168k | --len; | 89 | 2.64M | } | 90 | 2.64M | } | 91 | | // Ignore leading zeros. | 92 | 2.73M | bool found_value = false; | 93 | 2.81M | while (len > 0 && UNLIKELY(*s == '0')) { | 94 | 80.4k | found_value = true; | 95 | 80.4k | ++s; | 96 | 80.4k | --len; | 97 | 80.4k | } | 98 | | | 99 | 2.73M | int found_dot = 0; | 100 | 2.73M | if (len > 0 && *s == '.') { | 101 | 29.1k | found_dot = 1; | 102 | 29.1k | ++s; | 103 | 29.1k | --len; | 104 | 29.1k | } | 105 | 2.73M | int int_part_count = 0; | 106 | 2.73M | int i = 0; | 107 | 48.7M | for (; i != len; ++i) { | 108 | 45.9M | const char& c = s[i]; | 109 | 45.9M | if (LIKELY('0' <= c && c <= '9')) { | 110 | 43.5M | found_value = true; | 111 | 43.5M | if (!found_dot) { | 112 | 25.8M | ++int_part_count; | 113 | 25.8M | } | 114 | 43.5M | } else if (c == '.') { | 115 | 2.37M | if (found_dot) { | 116 | 0 | *result = StringParser::PARSE_FAILURE; | 117 | 0 | return 0; | 118 | 0 | } | 119 | 2.37M | found_dot = 1; | 120 | 2.37M | } else { | 121 | 11.5k | break; | 122 | 11.5k | } | 123 | 45.9M | } | 124 | 2.73M | if (!found_value) { | 125 | | // '', '.' | 126 | 90.6k | *result = StringParser::PARSE_FAILURE; | 127 | 90.6k | return 0; | 128 | 90.6k | } | 129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts | 130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after | 131 | | // "E+2". | 132 | 2.64M | int64_t exponent = 0; | 133 | 2.64M | auto end_digit_index = i; | 134 | 2.64M | if (i != len) { | 135 | 12.4k | bool negative_exponent = false; | 136 | 12.4k | if (s[i] == 'e' || s[i] == 'E') { | 137 | 12.3k | ++i; | 138 | 12.3k | if (i != len) { | 139 | 12.3k | switch (s[i]) { | 140 | 4.62k | case '-': | 141 | 4.62k | negative_exponent = true; | 142 | 4.62k | [[fallthrough]]; | 143 | 4.62k | case '+': | 144 | 4.62k | ++i; | 145 | 12.3k | } | 146 | 12.3k | } | 147 | 12.3k | if (i == len) { | 148 | | // '123e', '123e+', '123e-' | 149 | 0 | *result = StringParser::PARSE_FAILURE; | 150 | 0 | return 0; | 151 | 0 | } | 152 | 35.7k | for (; i != len; ++i) { | 153 | 23.3k | const char& c = s[i]; | 154 | 23.3k | if (LIKELY('0' <= c && c <= '9')) { | 155 | 23.3k | exponent = exponent * 10 + (c - '0'); | 156 | | // max string len is config::string_type_length_soft_limit_bytes, | 157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 158 | | // just check overflow of int32_t to simplify the logic | 159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 160 | 23.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 161 | 0 | *result = StringParser::PARSE_OVERFLOW; | 162 | 0 | return 0; | 163 | 0 | } | 164 | 23.3k | } else { | 165 | | // '123e12abc', '123e1.2' | 166 | 0 | *result = StringParser::PARSE_FAILURE; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 23.3k | } | 170 | 12.3k | if (negative_exponent) { | 171 | 4.62k | exponent = -exponent; | 172 | 4.62k | } | 173 | 12.3k | } else { | 174 | 20 | *result = StringParser::PARSE_FAILURE; | 175 | 20 | return 0; | 176 | 20 | } | 177 | 12.4k | } | 178 | | // TODO: check limit values of exponent and add UT | 179 | | // max string len is config::string_type_length_soft_limit_bytes, | 180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 181 | | // so int_part_count will be in range of int32_t, | 182 | | // and int_part_count + exponent will be in range of int64_t | 183 | 2.64M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 184 | 2.64M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 185 | 2.64M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 187 | 0 | return 0; | 188 | 0 | } | 189 | 2.64M | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 190 | 2.64M | T int_part_number = 0; | 191 | 2.64M | T frac_part_number = 0; | 192 | 2.64M | int actual_frac_part_count = 0; | 193 | 2.64M | int digit_index = 0; | 194 | 2.64M | if (result_int_part_digit_count >= 0) { | 195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra | 196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to | 197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index | 198 | | // because missing digits are appended later by multiplying with powers of 10. | 199 | 2.64M | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 200 | 2.39M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 201 | 2.64M | : result_int_part_digit_count, | 202 | 2.64M | end_digit_index); | 203 | 2.64M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 204 | | // skip zero number | 205 | 2.85M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 206 | 213k | } | 207 | | // test 0.00, .00, 0.{00...}e2147483647 | 208 | | // 0.00000e2147483647 | 209 | 2.64M | if (digit_index != max_index && | 210 | 2.64M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 211 | 143 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 212 | 143 | return 0; | 213 | 143 | } | 214 | | // get int part number | 215 | 28.4M | for (; digit_index != max_index; ++digit_index) { | 216 | 25.8M | if (UNLIKELY(s[digit_index] == '.')) { | 217 | 962 | continue; | 218 | 962 | } | 219 | 25.8M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 220 | 25.8M | } | 221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal | 222 | | // point past all available significant digits, append zeros by scaling the integer part: | 223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. | 224 | 2.64M | auto total_significant_digit_count = | 225 | 2.64M | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); | 226 | 2.64M | if (result_int_part_digit_count > total_significant_digit_count) { | 227 | 80 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 228 | 80 | total_significant_digit_count); | 229 | 80 | } | 230 | 2.64M | } else { | 231 | | // leading zeros of fraction part | 232 | 2.40k | actual_frac_part_count = -result_int_part_digit_count; | 233 | 2.40k | } | 234 | | // get fraction part number | 235 | 22.4M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 236 | 19.8M | if (UNLIKELY(s[digit_index] == '.')) { | 237 | 2.36M | continue; | 238 | 2.36M | } | 239 | 17.4M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 240 | 17.4M | ++actual_frac_part_count; | 241 | 17.4M | } | 242 | 2.64M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional | 244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions | 245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of | 246 | | // rounding up. | 247 | 2.64M | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { | 248 | 20.1k | if (UNLIKELY(s[digit_index] == '.')) { | 249 | 852 | ++digit_index; | 250 | 852 | } | 251 | 20.1k | if (digit_index != end_digit_index) { | 252 | | // example: test 1.5 -> decimal(1, 0) | 253 | 19.9k | if (s[digit_index] >= '5') { | 254 | 8.03k | ++frac_part_number; | 255 | 8.03k | if (frac_part_number == type_scale_multiplier) { | 256 | 906 | frac_part_number = 0; | 257 | 906 | ++int_part_number; | 258 | 906 | } | 259 | 8.03k | } | 260 | 19.9k | } | 261 | 2.62M | } else { | 262 | 2.62M | if (actual_frac_part_count < type_scale) { | 263 | 366k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 264 | 366k | } | 265 | 2.62M | } | 266 | 2.64M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 267 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 268 | 16 | return 0; | 269 | 16 | } | 270 | | | 271 | 2.64M | T value = int_part_number * type_scale_multiplier + frac_part_number; | 272 | 2.64M | *result = StringParser::PARSE_SUCCESS; | 273 | 2.64M | return is_negative ? T(-value) : T(value); | 274 | 2.64M | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 61 | 14.1k | ParseResult* result) { | 62 | 14.1k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 63 | 14.1k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 64 | 14.1k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 65 | 14.1k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 66 | 14.1k | "wide::Int256."); | 67 | | | 68 | | // Parse in two logical coordinate systems: | 69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and | 70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so | 71 | | // ".14E+3" is parsed as significand "14" with exponent 3. | 72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position | 73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, | 74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. | 75 | | // `digit_index` always indexes the normalized significand string, which may still contain a | 76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. | 77 | | // Ignore leading and trailing spaces. | 78 | 14.1k | s = skip_ascii_whitespaces(s, len); | 79 | | | 80 | 14.1k | bool is_negative = false; | 81 | 14.1k | if (len > 0) { | 82 | 14.1k | switch (*s) { | 83 | 6.77k | case '-': | 84 | 6.77k | is_negative = true; | 85 | 6.77k | [[fallthrough]]; | 86 | 6.77k | case '+': | 87 | 6.77k | ++s; | 88 | 6.77k | --len; | 89 | 14.1k | } | 90 | 14.1k | } | 91 | | // Ignore leading zeros. | 92 | 14.1k | bool found_value = false; | 93 | 53.0k | while (len > 0 && UNLIKELY(*s == '0')) { | 94 | 38.9k | found_value = true; | 95 | 38.9k | ++s; | 96 | 38.9k | --len; | 97 | 38.9k | } | 98 | | | 99 | 14.1k | int found_dot = 0; | 100 | 14.1k | if (len > 0 && *s == '.') { | 101 | 2.08k | found_dot = 1; | 102 | 2.08k | ++s; | 103 | 2.08k | --len; | 104 | 2.08k | } | 105 | 14.1k | int int_part_count = 0; | 106 | 14.1k | int i = 0; | 107 | 287k | for (; i != len; ++i) { | 108 | 273k | const char& c = s[i]; | 109 | 273k | if (LIKELY('0' <= c && c <= '9')) { | 110 | 261k | found_value = true; | 111 | 261k | if (!found_dot) { | 112 | 139k | ++int_part_count; | 113 | 139k | } | 114 | 261k | } else if (c == '.') { | 115 | 11.8k | if (found_dot) { | 116 | 0 | *result = StringParser::PARSE_FAILURE; | 117 | 0 | return 0; | 118 | 0 | } | 119 | 11.8k | found_dot = 1; | 120 | 11.8k | } else { | 121 | 12 | break; | 122 | 12 | } | 123 | 273k | } | 124 | 14.1k | if (!found_value) { | 125 | | // '', '.' | 126 | 11 | *result = StringParser::PARSE_FAILURE; | 127 | 11 | return 0; | 128 | 11 | } | 129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts | 130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after | 131 | | // "E+2". | 132 | 14.0k | int64_t exponent = 0; | 133 | 14.0k | auto end_digit_index = i; | 134 | 14.0k | if (i != len) { | 135 | 1 | bool negative_exponent = false; | 136 | 1 | if (s[i] == 'e' || s[i] == 'E') { | 137 | 0 | ++i; | 138 | 0 | if (i != len) { | 139 | 0 | switch (s[i]) { | 140 | 0 | case '-': | 141 | 0 | negative_exponent = true; | 142 | 0 | [[fallthrough]]; | 143 | 0 | case '+': | 144 | 0 | ++i; | 145 | 0 | } | 146 | 0 | } | 147 | 0 | if (i == len) { | 148 | | // '123e', '123e+', '123e-' | 149 | 0 | *result = StringParser::PARSE_FAILURE; | 150 | 0 | return 0; | 151 | 0 | } | 152 | 0 | for (; i != len; ++i) { | 153 | 0 | const char& c = s[i]; | 154 | 0 | if (LIKELY('0' <= c && c <= '9')) { | 155 | 0 | exponent = exponent * 10 + (c - '0'); | 156 | | // max string len is config::string_type_length_soft_limit_bytes, | 157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 158 | | // just check overflow of int32_t to simplify the logic | 159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 160 | 0 | if (exponent > std::numeric_limits<int32_t>::max()) { | 161 | 0 | *result = StringParser::PARSE_OVERFLOW; | 162 | 0 | return 0; | 163 | 0 | } | 164 | 0 | } else { | 165 | | // '123e12abc', '123e1.2' | 166 | 0 | *result = StringParser::PARSE_FAILURE; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 0 | } | 170 | 0 | if (negative_exponent) { | 171 | 0 | exponent = -exponent; | 172 | 0 | } | 173 | 1 | } else { | 174 | 1 | *result = StringParser::PARSE_FAILURE; | 175 | 1 | return 0; | 176 | 1 | } | 177 | 1 | } | 178 | | // TODO: check limit values of exponent and add UT | 179 | | // max string len is config::string_type_length_soft_limit_bytes, | 180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 181 | | // so int_part_count will be in range of int32_t, | 182 | | // and int_part_count + exponent will be in range of int64_t | 183 | 14.0k | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 184 | 14.0k | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 185 | 14.0k | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 187 | 0 | return 0; | 188 | 0 | } | 189 | 14.0k | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 190 | 14.0k | T int_part_number = 0; | 191 | 14.0k | T frac_part_number = 0; | 192 | 14.0k | int actual_frac_part_count = 0; | 193 | 14.0k | int digit_index = 0; | 194 | 14.0k | if (result_int_part_digit_count >= 0) { | 195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra | 196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to | 197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index | 198 | | // because missing digits are appended later by multiplying with powers of 10. | 199 | 14.0k | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 200 | 13.8k | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 201 | 14.0k | : result_int_part_digit_count, | 202 | 14.0k | end_digit_index); | 203 | 14.0k | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 204 | | // skip zero number | 205 | 14.0k | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 206 | 0 | } | 207 | | // test 0.00, .00, 0.{00...}e2147483647 | 208 | | // 0.00000e2147483647 | 209 | 14.0k | if (digit_index != max_index && | 210 | 14.0k | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 211 | 8 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 212 | 8 | return 0; | 213 | 8 | } | 214 | | // get int part number | 215 | 153k | for (; digit_index != max_index; ++digit_index) { | 216 | 139k | if (UNLIKELY(s[digit_index] == '.')) { | 217 | 0 | continue; | 218 | 0 | } | 219 | 139k | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 220 | 139k | } | 221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal | 222 | | // point past all available significant digits, append zeros by scaling the integer part: | 223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. | 224 | 14.0k | auto total_significant_digit_count = | 225 | 14.0k | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); | 226 | 14.0k | if (result_int_part_digit_count > total_significant_digit_count) { | 227 | 0 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 228 | 0 | total_significant_digit_count); | 229 | 0 | } | 230 | 14.0k | } else { | 231 | | // leading zeros of fraction part | 232 | 0 | actual_frac_part_count = -result_int_part_digit_count; | 233 | 0 | } | 234 | | // get fraction part number | 235 | 147k | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 236 | 133k | if (UNLIKELY(s[digit_index] == '.')) { | 237 | 11.8k | continue; | 238 | 11.8k | } | 239 | 121k | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 240 | 121k | ++actual_frac_part_count; | 241 | 121k | } | 242 | 14.0k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional | 244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions | 245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of | 246 | | // rounding up. | 247 | 14.0k | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { | 248 | 17 | if (UNLIKELY(s[digit_index] == '.')) { | 249 | 0 | ++digit_index; | 250 | 0 | } | 251 | 17 | if (digit_index != end_digit_index) { | 252 | | // example: test 1.5 -> decimal(1, 0) | 253 | 17 | if (s[digit_index] >= '5') { | 254 | 17 | ++frac_part_number; | 255 | 17 | if (frac_part_number == type_scale_multiplier) { | 256 | 0 | frac_part_number = 0; | 257 | 0 | ++int_part_number; | 258 | 0 | } | 259 | 17 | } | 260 | 17 | } | 261 | 14.0k | } else { | 262 | 14.0k | if (actual_frac_part_count < type_scale) { | 263 | 2.16k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 264 | 2.16k | } | 265 | 14.0k | } | 266 | 14.0k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 267 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 268 | 0 | return 0; | 269 | 0 | } | 270 | | | 271 | 14.0k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 272 | 14.0k | *result = StringParser::PARSE_SUCCESS; | 273 | 14.0k | return is_negative ? T(-value) : T(value); | 274 | 14.0k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 61 | 134k | ParseResult* result) { | 62 | 134k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 63 | 134k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 64 | 134k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 65 | 134k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 66 | 134k | "wide::Int256."); | 67 | | | 68 | | // Parse in two logical coordinate systems: | 69 | | // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and | 70 | | // leading zeros. If the original value starts with '.', the dot is also skipped so | 71 | | // ".14E+3" is parsed as significand "14" with exponent 3. | 72 | | // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position | 73 | | // after applying scientific notation. For example, "1.4E+2" has int_part_count=1, | 74 | | // exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140. | 75 | | // `digit_index` always indexes the normalized significand string, which may still contain a | 76 | | // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly. | 77 | | // Ignore leading and trailing spaces. | 78 | 134k | s = skip_ascii_whitespaces(s, len); | 79 | | | 80 | 134k | bool is_negative = false; | 81 | 134k | if (len > 0) { | 82 | 134k | switch (*s) { | 83 | 20.4k | case '-': | 84 | 20.4k | is_negative = true; | 85 | 20.4k | [[fallthrough]]; | 86 | 27.1k | case '+': | 87 | 27.1k | ++s; | 88 | 27.1k | --len; | 89 | 134k | } | 90 | 134k | } | 91 | | // Ignore leading zeros. | 92 | 134k | bool found_value = false; | 93 | 233k | while (len > 0 && UNLIKELY(*s == '0')) { | 94 | 98.8k | found_value = true; | 95 | 98.8k | ++s; | 96 | 98.8k | --len; | 97 | 98.8k | } | 98 | | | 99 | 134k | int found_dot = 0; | 100 | 134k | if (len > 0 && *s == '.') { | 101 | 17.0k | found_dot = 1; | 102 | 17.0k | ++s; | 103 | 17.0k | --len; | 104 | 17.0k | } | 105 | 134k | int int_part_count = 0; | 106 | 134k | int i = 0; | 107 | 4.96M | for (; i != len; ++i) { | 108 | 4.91M | const char& c = s[i]; | 109 | 4.91M | if (LIKELY('0' <= c && c <= '9')) { | 110 | 4.72M | found_value = true; | 111 | 4.72M | if (!found_dot) { | 112 | 1.72M | ++int_part_count; | 113 | 1.72M | } | 114 | 4.72M | } else if (c == '.') { | 115 | 105k | if (found_dot) { | 116 | 0 | *result = StringParser::PARSE_FAILURE; | 117 | 0 | return 0; | 118 | 0 | } | 119 | 105k | found_dot = 1; | 120 | 105k | } else { | 121 | 78.0k | break; | 122 | 78.0k | } | 123 | 4.91M | } | 124 | 134k | if (!found_value) { | 125 | | // '', '.' | 126 | 78 | *result = StringParser::PARSE_FAILURE; | 127 | 78 | return 0; | 128 | 78 | } | 129 | | // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts | 130 | | // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after | 131 | | // "E+2". | 132 | 134k | int64_t exponent = 0; | 133 | 134k | auto end_digit_index = i; | 134 | 134k | if (i != len) { | 135 | 78.0k | bool negative_exponent = false; | 136 | 78.0k | if (s[i] == 'e' || s[i] == 'E') { | 137 | 77.9k | ++i; | 138 | 77.9k | if (i != len) { | 139 | 77.9k | switch (s[i]) { | 140 | 3.58k | case '-': | 141 | 3.58k | negative_exponent = true; | 142 | 3.58k | [[fallthrough]]; | 143 | 70.2k | case '+': | 144 | 70.2k | ++i; | 145 | 77.9k | } | 146 | 77.9k | } | 147 | 77.9k | if (i == len) { | 148 | | // '123e', '123e+', '123e-' | 149 | 0 | *result = StringParser::PARSE_FAILURE; | 150 | 0 | return 0; | 151 | 0 | } | 152 | 234k | for (; i != len; ++i) { | 153 | 156k | const char& c = s[i]; | 154 | 156k | if (LIKELY('0' <= c && c <= '9')) { | 155 | 156k | exponent = exponent * 10 + (c - '0'); | 156 | | // max string len is config::string_type_length_soft_limit_bytes, | 157 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 158 | | // just check overflow of int32_t to simplify the logic | 159 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 160 | 156k | if (exponent > std::numeric_limits<int32_t>::max()) { | 161 | 0 | *result = StringParser::PARSE_OVERFLOW; | 162 | 0 | return 0; | 163 | 0 | } | 164 | 156k | } else { | 165 | | // '123e12abc', '123e1.2' | 166 | 10 | *result = StringParser::PARSE_FAILURE; | 167 | 10 | return 0; | 168 | 10 | } | 169 | 156k | } | 170 | 77.9k | if (negative_exponent) { | 171 | 3.58k | exponent = -exponent; | 172 | 3.58k | } | 173 | 77.9k | } else { | 174 | 14 | *result = StringParser::PARSE_FAILURE; | 175 | 14 | return 0; | 176 | 14 | } | 177 | 78.0k | } | 178 | | // TODO: check limit values of exponent and add UT | 179 | | // max string len is config::string_type_length_soft_limit_bytes, | 180 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 181 | | // so int_part_count will be in range of int32_t, | 182 | | // and int_part_count + exponent will be in range of int64_t | 183 | 134k | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 184 | 134k | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 185 | 134k | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 186 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 187 | 0 | return 0; | 188 | 0 | } | 189 | 134k | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 190 | 134k | T int_part_number = 0; | 191 | 134k | T frac_part_number = 0; | 192 | 134k | int actual_frac_part_count = 0; | 193 | 134k | int digit_index = 0; | 194 | 134k | if (result_int_part_digit_count >= 0) { | 195 | | // `max_index` is the raw significand index where integer-part digits stop. Add one extra | 196 | | // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to | 197 | | // collect three integer digits after the exponent shift. It is capped by end_digit_index | 198 | | // because missing digits are appended later by multiplying with powers of 10. | 199 | 134k | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 200 | 122k | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 201 | 134k | : result_int_part_digit_count, | 202 | 134k | end_digit_index); | 203 | 134k | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 204 | | // skip zero number | 205 | 352k | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 206 | 217k | } | 207 | | // test 0.00, .00, 0.{00...}e2147483647 | 208 | | // 0.00000e2147483647 | 209 | 134k | if (digit_index != max_index && | 210 | 134k | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 211 | 392 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 212 | 392 | return 0; | 213 | 392 | } | 214 | | // get int part number | 215 | 2.70M | for (; digit_index != max_index; ++digit_index) { | 216 | 2.56M | if (UNLIKELY(s[digit_index] == '.')) { | 217 | 67.5k | continue; | 218 | 67.5k | } | 219 | 2.49M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 220 | 2.49M | } | 221 | | // Count only significand digits, not exponent syntax. If the exponent moves the decimal | 222 | | // point past all available significant digits, append zeros by scaling the integer part: | 223 | | // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10. | 224 | 134k | auto total_significant_digit_count = | 225 | 134k | end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0); | 226 | 134k | if (result_int_part_digit_count > total_significant_digit_count) { | 227 | 64.6k | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 228 | 64.6k | total_significant_digit_count); | 229 | 64.6k | } | 230 | 134k | } else { | 231 | | // leading zeros of fraction part | 232 | 178 | actual_frac_part_count = -result_int_part_digit_count; | 233 | 178 | } | 234 | | // get fraction part number | 235 | 1.92M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 236 | 1.78M | if (UNLIKELY(s[digit_index] == '.')) { | 237 | 34.0k | continue; | 238 | 34.0k | } | 239 | 1.75M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 240 | 1.75M | ++actual_frac_part_count; | 241 | 1.75M | } | 242 | 134k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 243 | | // Round only when the next parsed significand digit is exactly the first discarded fractional | 244 | | // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions | 245 | | // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of | 246 | | // rounding up. | 247 | 134k | if (actual_frac_part_count == type_scale && digit_index != end_digit_index) { | 248 | 21.4k | if (UNLIKELY(s[digit_index] == '.')) { | 249 | 862 | ++digit_index; | 250 | 862 | } | 251 | 21.4k | if (digit_index != end_digit_index) { | 252 | | // example: test 1.5 -> decimal(1, 0) | 253 | 21.2k | if (s[digit_index] >= '5') { | 254 | 8.99k | ++frac_part_number; | 255 | 8.99k | if (frac_part_number == type_scale_multiplier) { | 256 | 988 | frac_part_number = 0; | 257 | 988 | ++int_part_number; | 258 | 988 | } | 259 | 8.99k | } | 260 | 21.2k | } | 261 | 112k | } else { | 262 | 112k | if (actual_frac_part_count < type_scale) { | 263 | 91.6k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 264 | 91.6k | } | 265 | 112k | } | 266 | 134k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 267 | 56 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 268 | 56 | return 0; | 269 | 56 | } | 270 | | | 271 | 134k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 272 | 134k | *result = StringParser::PARSE_SUCCESS; | 273 | 134k | return is_negative ? T(-value) : T(value); | 274 | 134k | } |
|
275 | | |
276 | | template Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
277 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
278 | | ParseResult* result); |
279 | | template Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
280 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
281 | | ParseResult* result); |
282 | | template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
283 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
284 | | ParseResult* result); |
285 | | template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
286 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
287 | | ParseResult* result); |
288 | | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
289 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
290 | | ParseResult* result); |
291 | | } // end namespace doris |
292 | | #include "common/compile_check_avoid_end.h" |