be/src/util/string_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/string_parser.hpp" |
19 | | |
20 | | #include <limits> |
21 | | |
22 | | #include "core/extended_types.h" |
23 | | #include "core/types.h" |
24 | | namespace doris { |
25 | | #include "common/compile_check_avoid_begin.h" |
26 | | // Supported decimal number format: |
27 | | // <decimal> ::= <whitespace>* <value> <whitespace>* |
28 | | // |
29 | | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
30 | | // |
31 | | // <value> ::= <sign>? <significand> <exponent>? |
32 | | // |
33 | | // <sign> ::= "+" | "-" |
34 | | // |
35 | | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
36 | | // |
37 | | // <digits> ::= <digit>+ |
38 | | // |
39 | | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
40 | | // |
41 | | // <exponent> ::= <e_marker> <sign>? <digits> |
42 | | // |
43 | | // <e_marker> ::= "e" | "E" |
44 | | template <PrimitiveType P> |
45 | | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
46 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
47 | 17.6M | ParseResult* result) { |
48 | 17.6M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
49 | 17.6M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
50 | 17.6M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
51 | 17.6M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
52 | 17.6M | "wide::Int256."); |
53 | | // Ignore leading and trailing spaces. |
54 | 17.6M | s = skip_ascii_whitespaces(s, len); |
55 | | |
56 | 17.6M | bool is_negative = false; |
57 | 17.6M | if (len > 0) { |
58 | 17.5M | switch (*s) { |
59 | 184k | case '-': |
60 | 184k | is_negative = true; |
61 | 184k | [[fallthrough]]; |
62 | 211k | case '+': |
63 | 211k | ++s; |
64 | 211k | --len; |
65 | 17.5M | } |
66 | 17.5M | } |
67 | | // Ignore leading zeros. |
68 | 17.6M | bool found_value = false; |
69 | 25.2M | while (len > 0 && UNLIKELY(*s == '0')) { |
70 | 7.53M | found_value = true; |
71 | 7.53M | ++s; |
72 | 7.53M | --len; |
73 | 7.53M | } |
74 | | |
75 | 17.6M | int found_dot = 0; |
76 | 17.6M | if (len > 0 && *s == '.') { |
77 | 7.27M | found_dot = 1; |
78 | 7.27M | ++s; |
79 | 7.27M | --len; |
80 | 7.27M | } |
81 | 17.6M | int int_part_count = 0; |
82 | 17.6M | int i = 0; |
83 | 114M | for (; i != len; ++i) { |
84 | 96.4M | const char& c = s[i]; |
85 | 96.4M | if (LIKELY('0' <= c && c <= '9')) { |
86 | 89.9M | found_value = true; |
87 | 89.9M | if (!found_dot) { |
88 | 54.1M | ++int_part_count; |
89 | 54.1M | } |
90 | 89.9M | } else if (c == '.') { |
91 | 6.41M | if (found_dot) { |
92 | 2 | *result = StringParser::PARSE_FAILURE; |
93 | 2 | return 0; |
94 | 2 | } |
95 | 6.41M | found_dot = 1; |
96 | 6.41M | } else { |
97 | 91.2k | break; |
98 | 91.2k | } |
99 | 96.4M | } |
100 | 17.6M | if (!found_value) { |
101 | | // '', '.' |
102 | 97.9k | *result = StringParser::PARSE_FAILURE; |
103 | 97.9k | return 0; |
104 | 97.9k | } |
105 | | // parse exponent if any |
106 | 17.5M | int64_t exponent = 0; |
107 | 17.5M | auto end_digit_index = i; |
108 | 17.5M | if (i != len) { |
109 | 112k | bool negative_exponent = false; |
110 | 112k | if (s[i] == 'e' || s[i] == 'E') { |
111 | 112k | ++i; |
112 | 112k | if (i != len) { |
113 | 112k | switch (s[i]) { |
114 | 14.5k | case '-': |
115 | 14.5k | negative_exponent = true; |
116 | 14.5k | [[fallthrough]]; |
117 | 81.1k | case '+': |
118 | 81.1k | ++i; |
119 | 112k | } |
120 | 112k | } |
121 | 112k | if (i == len) { |
122 | | // '123e', '123e+', '123e-' |
123 | 6 | *result = StringParser::PARSE_FAILURE; |
124 | 6 | return 0; |
125 | 6 | } |
126 | 331k | for (; i != len; ++i) { |
127 | 219k | const char& c = s[i]; |
128 | 219k | if (LIKELY('0' <= c && c <= '9')) { |
129 | 219k | exponent = exponent * 10 + (c - '0'); |
130 | | // max string len is config::string_type_length_soft_limit_bytes, |
131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
132 | | // just check overflow of int32_t to simplify the logic |
133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 |
134 | 219k | if (exponent > std::numeric_limits<int32_t>::max()) { |
135 | 0 | *result = StringParser::PARSE_OVERFLOW; |
136 | 0 | return 0; |
137 | 0 | } |
138 | 219k | } else { |
139 | | // '123e12abc', '123e1.2' |
140 | 22 | *result = StringParser::PARSE_FAILURE; |
141 | 22 | return 0; |
142 | 22 | } |
143 | 219k | } |
144 | 112k | if (negative_exponent) { |
145 | 14.5k | exponent = -exponent; |
146 | 14.5k | } |
147 | 112k | } else { |
148 | 193 | *result = StringParser::PARSE_FAILURE; |
149 | 193 | return 0; |
150 | 193 | } |
151 | 112k | } |
152 | 17.5M | T int_part_number = 0; |
153 | 17.5M | T frac_part_number = 0; |
154 | | // TODO: check limit values of exponent and add UT |
155 | | // max string len is config::string_type_length_soft_limit_bytes, |
156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
157 | | // so int_part_count will be in range of int32_t, |
158 | | // and int_part_count + exponent will be in range of int64_t |
159 | 17.5M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; |
160 | 17.5M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || |
161 | 17.5M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { |
162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
163 | 0 | return 0; |
164 | 0 | } |
165 | 17.5M | int result_int_part_digit_count = tmp_result_int_part_digit_count; |
166 | 17.5M | int actual_frac_part_count = 0; |
167 | 17.5M | int digit_index = 0; |
168 | 17.5M | if (result_int_part_digit_count >= 0) { |
169 | 17.5M | int max_index = std::min(found_dot ? (result_int_part_digit_count + |
170 | 13.6M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) |
171 | 17.5M | : result_int_part_digit_count, |
172 | 17.5M | end_digit_index); |
173 | 17.5M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); |
174 | | // skip zero number |
175 | 18.4M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { |
176 | 857k | } |
177 | | // test 0.00, .00, 0.{00...}e2147483647 |
178 | | // 0.00000e2147483647 |
179 | 17.5M | if (digit_index != max_index && |
180 | 17.5M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { |
181 | 16.9k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
182 | 16.9k | return 0; |
183 | 16.9k | } |
184 | | // get int part number |
185 | 72.1M | for (; digit_index != max_index; ++digit_index) { |
186 | 54.6M | if (UNLIKELY(s[digit_index] == '.')) { |
187 | 71.1k | continue; |
188 | 71.1k | } |
189 | 54.5M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); |
190 | 54.5M | } |
191 | 17.5M | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); |
192 | 17.5M | if (result_int_part_digit_count > total_significant_digit_count) { |
193 | 2.45k | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - |
194 | 2.45k | total_significant_digit_count); |
195 | 2.45k | } |
196 | 18.4E | } else { |
197 | | // leading zeros of fraction part |
198 | 18.4E | actual_frac_part_count = -result_int_part_digit_count; |
199 | 18.4E | } |
200 | | // get fraction part number |
201 | 57.2M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { |
202 | 39.7M | if (UNLIKELY(s[digit_index] == '.')) { |
203 | 6.10M | continue; |
204 | 6.10M | } |
205 | 33.6M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); |
206 | 33.6M | ++actual_frac_part_count; |
207 | 33.6M | } |
208 | 17.5M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
209 | | // there are still extra fraction digits left, check rounding |
210 | 17.5M | if (digit_index != end_digit_index) { |
211 | 293k | if (UNLIKELY(s[digit_index] == '.')) { |
212 | 210k | ++digit_index; |
213 | 210k | } |
214 | 293k | if (digit_index != end_digit_index) { |
215 | | // example: test 1.5 -> decimal(1, 0) |
216 | 292k | if (s[digit_index] >= '5') { |
217 | 207k | ++frac_part_number; |
218 | 207k | if (frac_part_number == type_scale_multiplier) { |
219 | 176k | frac_part_number = 0; |
220 | 176k | ++int_part_number; |
221 | 176k | } |
222 | 207k | } |
223 | 292k | } |
224 | 17.2M | } else { |
225 | 17.2M | if (actual_frac_part_count < type_scale) { |
226 | 4.11M | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
227 | 4.11M | } |
228 | 17.2M | } |
229 | 17.5M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
230 | 136 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
231 | 136 | return 0; |
232 | 136 | } |
233 | | |
234 | 17.5M | T value = int_part_number * type_scale_multiplier + frac_part_number; |
235 | 17.5M | *result = StringParser::PARSE_SUCCESS; |
236 | 17.5M | return is_negative ? T(-value) : T(value); |
237 | 17.5M | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 47 | 335k | ParseResult* result) { | 48 | 335k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 49 | 335k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 50 | 335k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 51 | 335k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 52 | 335k | "wide::Int256."); | 53 | | // Ignore leading and trailing spaces. | 54 | 335k | s = skip_ascii_whitespaces(s, len); | 55 | | | 56 | 335k | bool is_negative = false; | 57 | 335k | if (len > 0) { | 58 | 330k | switch (*s) { | 59 | 58.2k | case '-': | 60 | 58.2k | is_negative = true; | 61 | 58.2k | [[fallthrough]]; | 62 | 65.5k | case '+': | 63 | 65.5k | ++s; | 64 | 65.5k | --len; | 65 | 330k | } | 66 | 330k | } | 67 | | // Ignore leading zeros. | 68 | 335k | bool found_value = false; | 69 | 391k | while (len > 0 && UNLIKELY(*s == '0')) { | 70 | 55.9k | found_value = true; | 71 | 55.9k | ++s; | 72 | 55.9k | --len; | 73 | 55.9k | } | 74 | | | 75 | 335k | int found_dot = 0; | 76 | 335k | if (len > 0 && *s == '.') { | 77 | 19.4k | found_dot = 1; | 78 | 19.4k | ++s; | 79 | 19.4k | --len; | 80 | 19.4k | } | 81 | 335k | int int_part_count = 0; | 82 | 335k | int i = 0; | 83 | 2.21M | for (; i != len; ++i) { | 84 | 1.89M | const char& c = s[i]; | 85 | 1.89M | if (LIKELY('0' <= c && c <= '9')) { | 86 | 1.61M | found_value = true; | 87 | 1.61M | if (!found_dot) { | 88 | 903k | ++int_part_count; | 89 | 903k | } | 90 | 1.61M | } else if (c == '.') { | 91 | 269k | if (found_dot) { | 92 | 2 | *result = StringParser::PARSE_FAILURE; | 93 | 2 | return 0; | 94 | 2 | } | 95 | 269k | found_dot = 1; | 96 | 269k | } else { | 97 | 10.9k | break; | 98 | 10.9k | } | 99 | 1.89M | } | 100 | 335k | if (!found_value) { | 101 | | // '', '.' | 102 | 6.96k | *result = StringParser::PARSE_FAILURE; | 103 | 6.96k | return 0; | 104 | 6.96k | } | 105 | | // parse exponent if any | 106 | 328k | int64_t exponent = 0; | 107 | 328k | auto end_digit_index = i; | 108 | 328k | if (i != len) { | 109 | 9.41k | bool negative_exponent = false; | 110 | 9.41k | if (s[i] == 'e' || s[i] == 'E') { | 111 | 9.33k | ++i; | 112 | 9.33k | if (i != len) { | 113 | 9.33k | switch (s[i]) { | 114 | 1.54k | case '-': | 115 | 1.54k | negative_exponent = true; | 116 | 1.54k | [[fallthrough]]; | 117 | 1.54k | case '+': | 118 | 1.54k | ++i; | 119 | 9.33k | } | 120 | 9.33k | } | 121 | 9.33k | if (i == len) { | 122 | | // '123e', '123e+', '123e-' | 123 | 6 | *result = StringParser::PARSE_FAILURE; | 124 | 6 | return 0; | 125 | 6 | } | 126 | 24.6k | for (; i != len; ++i) { | 127 | 15.3k | const char& c = s[i]; | 128 | 15.3k | if (LIKELY('0' <= c && c <= '9')) { | 129 | 15.3k | exponent = exponent * 10 + (c - '0'); | 130 | | // max string len is config::string_type_length_soft_limit_bytes, | 131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 132 | | // just check overflow of int32_t to simplify the logic | 133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 134 | 15.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 135 | 0 | *result = StringParser::PARSE_OVERFLOW; | 136 | 0 | return 0; | 137 | 0 | } | 138 | 15.3k | } else { | 139 | | // '123e12abc', '123e1.2' | 140 | 12 | *result = StringParser::PARSE_FAILURE; | 141 | 12 | return 0; | 142 | 12 | } | 143 | 15.3k | } | 144 | 9.31k | if (negative_exponent) { | 145 | 1.53k | exponent = -exponent; | 146 | 1.53k | } | 147 | 9.31k | } else { | 148 | 84 | *result = StringParser::PARSE_FAILURE; | 149 | 84 | return 0; | 150 | 84 | } | 151 | 9.41k | } | 152 | 328k | T int_part_number = 0; | 153 | 328k | T frac_part_number = 0; | 154 | | // TODO: check limit values of exponent and add UT | 155 | | // max string len is config::string_type_length_soft_limit_bytes, | 156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 157 | | // so int_part_count will be in range of int32_t, | 158 | | // and int_part_count + exponent will be in range of int64_t | 159 | 328k | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 160 | 328k | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 161 | 328k | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 163 | 0 | return 0; | 164 | 0 | } | 165 | 328k | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 166 | 328k | int actual_frac_part_count = 0; | 167 | 328k | int digit_index = 0; | 168 | 328k | if (result_int_part_digit_count >= 0) { | 169 | 328k | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 170 | 288k | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 171 | 328k | : result_int_part_digit_count, | 172 | 328k | end_digit_index); | 173 | 328k | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 174 | | // skip zero number | 175 | 541k | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 176 | 212k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 328k | if (digit_index != max_index && | 180 | 328k | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 181 | 6.02k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 182 | 6.02k | return 0; | 183 | 6.02k | } | 184 | | // get int part number | 185 | 1.12M | for (; digit_index != max_index; ++digit_index) { | 186 | 804k | if (UNLIKELY(s[digit_index] == '.')) { | 187 | 1.60k | continue; | 188 | 1.60k | } | 189 | 802k | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 190 | 802k | } | 191 | 322k | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); | 192 | 322k | if (result_int_part_digit_count > total_significant_digit_count) { | 193 | 100 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 194 | 100 | total_significant_digit_count); | 195 | 100 | } | 196 | 322k | } else { | 197 | | // leading zeros of fraction part | 198 | 51 | actual_frac_part_count = -result_int_part_digit_count; | 199 | 51 | } | 200 | | // get fraction part number | 201 | 628k | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 202 | 306k | if (UNLIKELY(s[digit_index] == '.')) { | 203 | 52.9k | continue; | 204 | 52.9k | } | 205 | 253k | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 206 | 253k | ++actual_frac_part_count; | 207 | 253k | } | 208 | 322k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 209 | | // there are still extra fraction digits left, check rounding | 210 | 322k | if (digit_index != end_digit_index) { | 211 | 228k | if (UNLIKELY(s[digit_index] == '.')) { | 212 | 208k | ++digit_index; | 213 | 208k | } | 214 | 228k | if (digit_index != end_digit_index) { | 215 | | // example: test 1.5 -> decimal(1, 0) | 216 | 228k | if (s[digit_index] >= '5') { | 217 | 181k | ++frac_part_number; | 218 | 181k | if (frac_part_number == type_scale_multiplier) { | 219 | 173k | frac_part_number = 0; | 220 | 173k | ++int_part_number; | 221 | 173k | } | 222 | 181k | } | 223 | 228k | } | 224 | 228k | } else { | 225 | 93.8k | if (actual_frac_part_count < type_scale) { | 226 | 30.0k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 227 | 30.0k | } | 228 | 93.8k | } | 229 | 322k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 230 | 24 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 231 | 24 | return 0; | 232 | 24 | } | 233 | | | 234 | 322k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 235 | 322k | *result = StringParser::PARSE_SUCCESS; | 236 | 322k | return is_negative ? T(-value) : T(value); | 237 | 322k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 47 | 15.0M | ParseResult* result) { | 48 | 15.0M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 49 | 15.0M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 50 | 15.0M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 51 | 15.0M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 52 | 15.0M | "wide::Int256."); | 53 | | // Ignore leading and trailing spaces. | 54 | 15.0M | s = skip_ascii_whitespaces(s, len); | 55 | | | 56 | 15.0M | bool is_negative = false; | 57 | 15.0M | if (len > 0) { | 58 | 15.0M | switch (*s) { | 59 | 29.9k | case '-': | 60 | 29.9k | is_negative = true; | 61 | 29.9k | [[fallthrough]]; | 62 | 36.5k | case '+': | 63 | 36.5k | ++s; | 64 | 36.5k | --len; | 65 | 15.0M | } | 66 | 15.0M | } | 67 | | // Ignore leading zeros. | 68 | 15.0M | bool found_value = false; | 69 | 22.3M | while (len > 0 && UNLIKELY(*s == '0')) { | 70 | 7.26M | found_value = true; | 71 | 7.26M | ++s; | 72 | 7.26M | --len; | 73 | 7.26M | } | 74 | | | 75 | 15.0M | int found_dot = 0; | 76 | 15.0M | if (len > 0 && *s == '.') { | 77 | 7.21M | found_dot = 1; | 78 | 7.21M | ++s; | 79 | 7.21M | --len; | 80 | 7.21M | } | 81 | 15.0M | int int_part_count = 0; | 82 | 15.0M | int i = 0; | 83 | 70.5M | for (; i != len; ++i) { | 84 | 55.4M | const char& c = s[i]; | 85 | 55.4M | if (LIKELY('0' <= c && c <= '9')) { | 86 | 51.1M | found_value = true; | 87 | 51.1M | if (!found_dot) { | 88 | 27.5M | ++int_part_count; | 89 | 27.5M | } | 90 | 51.1M | } else if (c == '.') { | 91 | 4.28M | if (found_dot) { | 92 | 0 | *result = StringParser::PARSE_FAILURE; | 93 | 0 | return 0; | 94 | 0 | } | 95 | 4.28M | found_dot = 1; | 96 | 18.4E | } else { | 97 | 18.4E | break; | 98 | 18.4E | } | 99 | 55.4M | } | 100 | 15.0M | if (!found_value) { | 101 | | // '', '.' | 102 | 374 | *result = StringParser::PARSE_FAILURE; | 103 | 374 | return 0; | 104 | 374 | } | 105 | | // parse exponent if any | 106 | 15.0M | int64_t exponent = 0; | 107 | 15.0M | auto end_digit_index = i; | 108 | 15.0M | if (i != len) { | 109 | 13.1k | bool negative_exponent = false; | 110 | 13.1k | if (s[i] == 'e' || s[i] == 'E') { | 111 | 13.1k | ++i; | 112 | 13.1k | if (i != len) { | 113 | 13.1k | switch (s[i]) { | 114 | 5.35k | case '-': | 115 | 5.35k | negative_exponent = true; | 116 | 5.35k | [[fallthrough]]; | 117 | 5.35k | case '+': | 118 | 5.35k | ++i; | 119 | 13.1k | } | 120 | 13.1k | } | 121 | 13.1k | if (i == len) { | 122 | | // '123e', '123e+', '123e-' | 123 | 0 | *result = StringParser::PARSE_FAILURE; | 124 | 0 | return 0; | 125 | 0 | } | 126 | 38.4k | for (; i != len; ++i) { | 127 | 25.3k | const char& c = s[i]; | 128 | 25.3k | if (LIKELY('0' <= c && c <= '9')) { | 129 | 25.3k | exponent = exponent * 10 + (c - '0'); | 130 | | // max string len is config::string_type_length_soft_limit_bytes, | 131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 132 | | // just check overflow of int32_t to simplify the logic | 133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 134 | 25.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 135 | 0 | *result = StringParser::PARSE_OVERFLOW; | 136 | 0 | return 0; | 137 | 0 | } | 138 | 25.3k | } else { | 139 | | // '123e12abc', '123e1.2' | 140 | 0 | *result = StringParser::PARSE_FAILURE; | 141 | 0 | return 0; | 142 | 0 | } | 143 | 25.3k | } | 144 | 13.1k | if (negative_exponent) { | 145 | 5.35k | exponent = -exponent; | 146 | 5.35k | } | 147 | 13.1k | } else { | 148 | 76 | *result = StringParser::PARSE_FAILURE; | 149 | 76 | return 0; | 150 | 76 | } | 151 | 13.1k | } | 152 | 15.0M | T int_part_number = 0; | 153 | 15.0M | T frac_part_number = 0; | 154 | | // TODO: check limit values of exponent and add UT | 155 | | // max string len is config::string_type_length_soft_limit_bytes, | 156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 157 | | // so int_part_count will be in range of int32_t, | 158 | | // and int_part_count + exponent will be in range of int64_t | 159 | 15.0M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 160 | 15.0M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 161 | 15.1M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 163 | 0 | return 0; | 164 | 0 | } | 165 | 15.0M | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 166 | 15.0M | int actual_frac_part_count = 0; | 167 | 15.0M | int digit_index = 0; | 168 | 15.1M | if (result_int_part_digit_count >= 0) { | 169 | 15.1M | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 170 | 11.4M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 171 | 15.1M | : result_int_part_digit_count, | 172 | 15.1M | end_digit_index); | 173 | 15.1M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 174 | | // skip zero number | 175 | 15.3M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 176 | 213k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 15.1M | if (digit_index != max_index && | 180 | 15.1M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 181 | 10.4k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 182 | 10.4k | return 0; | 183 | 10.4k | } | 184 | | // get int part number | 185 | 42.3M | for (; digit_index != max_index; ++digit_index) { | 186 | 27.2M | if (UNLIKELY(s[digit_index] == '.')) { | 187 | 960 | continue; | 188 | 960 | } | 189 | 27.2M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 190 | 27.2M | } | 191 | 15.0M | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); | 192 | 15.0M | if (result_int_part_digit_count > total_significant_digit_count) { | 193 | 88 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 194 | 88 | total_significant_digit_count); | 195 | 88 | } | 196 | 18.4E | } else { | 197 | | // leading zeros of fraction part | 198 | 18.4E | actual_frac_part_count = -result_int_part_digit_count; | 199 | 18.4E | } | 200 | | // get fraction part number | 201 | 42.7M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 202 | 27.6M | if (UNLIKELY(s[digit_index] == '.')) { | 203 | 4.26M | continue; | 204 | 4.26M | } | 205 | 23.4M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 206 | 23.4M | ++actual_frac_part_count; | 207 | 23.4M | } | 208 | 15.0M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 209 | | // there are still extra fraction digits left, check rounding | 210 | 15.0M | if (digit_index != end_digit_index) { | 211 | 21.8k | if (UNLIKELY(s[digit_index] == '.')) { | 212 | 867 | ++digit_index; | 213 | 867 | } | 214 | 21.8k | if (digit_index != end_digit_index) { | 215 | | // example: test 1.5 -> decimal(1, 0) | 216 | 21.5k | if (s[digit_index] >= '5') { | 217 | 8.73k | ++frac_part_number; | 218 | 8.73k | if (frac_part_number == type_scale_multiplier) { | 219 | 952 | frac_part_number = 0; | 220 | 952 | ++int_part_number; | 221 | 952 | } | 222 | 8.73k | } | 223 | 21.5k | } | 224 | 15.0M | } else { | 225 | 15.0M | if (actual_frac_part_count < type_scale) { | 226 | 3.63M | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 227 | 3.63M | } | 228 | 15.0M | } | 229 | 15.0M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 230 | 48 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 231 | 48 | return 0; | 232 | 48 | } | 233 | | | 234 | 15.0M | T value = int_part_number * type_scale_multiplier + frac_part_number; | 235 | 15.0M | *result = StringParser::PARSE_SUCCESS; | 236 | 15.0M | return is_negative ? T(-value) : T(value); | 237 | 15.0M | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 47 | 2.09M | ParseResult* result) { | 48 | 2.09M | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 49 | 2.09M | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 50 | 2.09M | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 51 | 2.09M | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 52 | 2.09M | "wide::Int256."); | 53 | | // Ignore leading and trailing spaces. | 54 | 2.09M | s = skip_ascii_whitespaces(s, len); | 55 | | | 56 | 2.09M | bool is_negative = false; | 57 | 2.09M | if (len > 0) { | 58 | 2.00M | switch (*s) { | 59 | 69.3k | case '-': | 60 | 69.3k | is_negative = true; | 61 | 69.3k | [[fallthrough]]; | 62 | 76.0k | case '+': | 63 | 76.0k | ++s; | 64 | 76.0k | --len; | 65 | 2.00M | } | 66 | 2.00M | } | 67 | | // Ignore leading zeros. | 68 | 2.09M | bool found_value = false; | 69 | 2.17M | while (len > 0 && UNLIKELY(*s == '0')) { | 70 | 79.1k | found_value = true; | 71 | 79.1k | ++s; | 72 | 79.1k | --len; | 73 | 79.1k | } | 74 | | | 75 | 2.09M | int found_dot = 0; | 76 | 2.09M | if (len > 0 && *s == '.') { | 77 | 28.9k | found_dot = 1; | 78 | 28.9k | ++s; | 79 | 28.9k | --len; | 80 | 28.9k | } | 81 | 2.09M | int int_part_count = 0; | 82 | 2.09M | int i = 0; | 83 | 36.1M | for (; i != len; ++i) { | 84 | 34.0M | const char& c = s[i]; | 85 | 34.0M | if (LIKELY('0' <= c && c <= '9')) { | 86 | 32.3M | found_value = true; | 87 | 32.3M | if (!found_dot) { | 88 | 23.8M | ++int_part_count; | 89 | 23.8M | } | 90 | 32.3M | } else if (c == '.') { | 91 | 1.74M | if (found_dot) { | 92 | 0 | *result = StringParser::PARSE_FAILURE; | 93 | 0 | return 0; | 94 | 0 | } | 95 | 1.74M | found_dot = 1; | 96 | 1.74M | } else { | 97 | 10.6k | break; | 98 | 10.6k | } | 99 | 34.0M | } | 100 | 2.09M | if (!found_value) { | 101 | | // '', '.' | 102 | 90.5k | *result = StringParser::PARSE_FAILURE; | 103 | 90.5k | return 0; | 104 | 90.5k | } | 105 | | // parse exponent if any | 106 | 2.00M | int64_t exponent = 0; | 107 | 2.00M | auto end_digit_index = i; | 108 | 2.00M | if (i != len) { | 109 | 12.3k | bool negative_exponent = false; | 110 | 12.3k | if (s[i] == 'e' || s[i] == 'E') { | 111 | 12.3k | ++i; | 112 | 12.3k | if (i != len) { | 113 | 12.3k | switch (s[i]) { | 114 | 4.61k | case '-': | 115 | 4.61k | negative_exponent = true; | 116 | 4.61k | [[fallthrough]]; | 117 | 4.61k | case '+': | 118 | 4.61k | ++i; | 119 | 12.3k | } | 120 | 12.3k | } | 121 | 12.3k | if (i == len) { | 122 | | // '123e', '123e+', '123e-' | 123 | 0 | *result = StringParser::PARSE_FAILURE; | 124 | 0 | return 0; | 125 | 0 | } | 126 | 35.6k | for (; i != len; ++i) { | 127 | 23.3k | const char& c = s[i]; | 128 | 23.3k | if (LIKELY('0' <= c && c <= '9')) { | 129 | 23.3k | exponent = exponent * 10 + (c - '0'); | 130 | | // max string len is config::string_type_length_soft_limit_bytes, | 131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 132 | | // just check overflow of int32_t to simplify the logic | 133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 134 | 23.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 135 | 0 | *result = StringParser::PARSE_OVERFLOW; | 136 | 0 | return 0; | 137 | 0 | } | 138 | 23.3k | } else { | 139 | | // '123e12abc', '123e1.2' | 140 | 0 | *result = StringParser::PARSE_FAILURE; | 141 | 0 | return 0; | 142 | 0 | } | 143 | 23.3k | } | 144 | 12.3k | if (negative_exponent) { | 145 | 4.61k | exponent = -exponent; | 146 | 4.61k | } | 147 | 12.3k | } else { | 148 | 18 | *result = StringParser::PARSE_FAILURE; | 149 | 18 | return 0; | 150 | 18 | } | 151 | 12.3k | } | 152 | 2.00M | T int_part_number = 0; | 153 | 2.00M | T frac_part_number = 0; | 154 | | // TODO: check limit values of exponent and add UT | 155 | | // max string len is config::string_type_length_soft_limit_bytes, | 156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 157 | | // so int_part_count will be in range of int32_t, | 158 | | // and int_part_count + exponent will be in range of int64_t | 159 | 2.00M | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 160 | 2.00M | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 161 | 2.00M | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 163 | 0 | return 0; | 164 | 0 | } | 165 | 2.00M | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 166 | 2.00M | int actual_frac_part_count = 0; | 167 | 2.00M | int digit_index = 0; | 168 | 2.00M | if (result_int_part_digit_count >= 0) { | 169 | 2.00M | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 170 | 1.77M | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 171 | 2.00M | : result_int_part_digit_count, | 172 | 2.00M | end_digit_index); | 173 | 2.00M | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 174 | | // skip zero number | 175 | 2.21M | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 176 | 213k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 2.00M | if (digit_index != max_index && | 180 | 2.00M | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 181 | 141 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 182 | 141 | return 0; | 183 | 141 | } | 184 | | // get int part number | 185 | 25.8M | for (; digit_index != max_index; ++digit_index) { | 186 | 23.8M | if (UNLIKELY(s[digit_index] == '.')) { | 187 | 960 | continue; | 188 | 960 | } | 189 | 23.8M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 190 | 23.8M | } | 191 | 2.00M | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); | 192 | 2.00M | if (result_int_part_digit_count > total_significant_digit_count) { | 193 | 76 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 194 | 76 | total_significant_digit_count); | 195 | 76 | } | 196 | 2.00M | } else { | 197 | | // leading zeros of fraction part | 198 | 1.06k | actual_frac_part_count = -result_int_part_digit_count; | 199 | 1.06k | } | 200 | | // get fraction part number | 201 | 11.8M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 202 | 9.88M | if (UNLIKELY(s[digit_index] == '.')) { | 203 | 1.74M | continue; | 204 | 1.74M | } | 205 | 8.14M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 206 | 8.14M | ++actual_frac_part_count; | 207 | 8.14M | } | 208 | 2.00M | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 209 | | // there are still extra fraction digits left, check rounding | 210 | 2.00M | if (digit_index != end_digit_index) { | 211 | 21.7k | if (UNLIKELY(s[digit_index] == '.')) { | 212 | 852 | ++digit_index; | 213 | 852 | } | 214 | 21.7k | if (digit_index != end_digit_index) { | 215 | | // example: test 1.5 -> decimal(1, 0) | 216 | 21.4k | if (s[digit_index] >= '5') { | 217 | 8.02k | ++frac_part_number; | 218 | 8.02k | if (frac_part_number == type_scale_multiplier) { | 219 | 906 | frac_part_number = 0; | 220 | 906 | ++int_part_number; | 221 | 906 | } | 222 | 8.02k | } | 223 | 21.4k | } | 224 | 1.98M | } else { | 225 | 1.98M | if (actual_frac_part_count < type_scale) { | 226 | 350k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 227 | 350k | } | 228 | 1.98M | } | 229 | 2.00M | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 230 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 231 | 16 | return 0; | 232 | 16 | } | 233 | | | 234 | 2.00M | T value = int_part_number * type_scale_multiplier + frac_part_number; | 235 | 2.00M | *result = StringParser::PARSE_SUCCESS; | 236 | 2.00M | return is_negative ? T(-value) : T(value); | 237 | 2.00M | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 47 | 13.8k | ParseResult* result) { | 48 | 13.8k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 49 | 13.8k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 50 | 13.8k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 51 | 13.8k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 52 | 13.8k | "wide::Int256."); | 53 | | // Ignore leading and trailing spaces. | 54 | 13.8k | s = skip_ascii_whitespaces(s, len); | 55 | | | 56 | 13.8k | bool is_negative = false; | 57 | 13.8k | if (len > 0) { | 58 | 13.8k | switch (*s) { | 59 | 6.72k | case '-': | 60 | 6.72k | is_negative = true; | 61 | 6.72k | [[fallthrough]]; | 62 | 6.72k | case '+': | 63 | 6.72k | ++s; | 64 | 6.72k | --len; | 65 | 13.8k | } | 66 | 13.8k | } | 67 | | // Ignore leading zeros. | 68 | 13.8k | bool found_value = false; | 69 | 52.7k | while (len > 0 && UNLIKELY(*s == '0')) { | 70 | 38.8k | found_value = true; | 71 | 38.8k | ++s; | 72 | 38.8k | --len; | 73 | 38.8k | } | 74 | | | 75 | 13.8k | int found_dot = 0; | 76 | 13.8k | if (len > 0 && *s == '.') { | 77 | 2.04k | found_dot = 1; | 78 | 2.04k | ++s; | 79 | 2.04k | --len; | 80 | 2.04k | } | 81 | 13.8k | int int_part_count = 0; | 82 | 13.8k | int i = 0; | 83 | 283k | for (; i != len; ++i) { | 84 | 270k | const char& c = s[i]; | 85 | 270k | if (LIKELY('0' <= c && c <= '9')) { | 86 | 258k | found_value = true; | 87 | 258k | if (!found_dot) { | 88 | 138k | ++int_part_count; | 89 | 138k | } | 90 | 258k | } else if (c == '.') { | 91 | 11.6k | if (found_dot) { | 92 | 0 | *result = StringParser::PARSE_FAILURE; | 93 | 0 | return 0; | 94 | 0 | } | 95 | 11.6k | found_dot = 1; | 96 | 11.6k | } else { | 97 | 12 | break; | 98 | 12 | } | 99 | 270k | } | 100 | 13.8k | if (!found_value) { | 101 | | // '', '.' | 102 | 11 | *result = StringParser::PARSE_FAILURE; | 103 | 11 | return 0; | 104 | 11 | } | 105 | | // parse exponent if any | 106 | 13.8k | int64_t exponent = 0; | 107 | 13.8k | auto end_digit_index = i; | 108 | 13.8k | if (i != len) { | 109 | 1 | bool negative_exponent = false; | 110 | 1 | if (s[i] == 'e' || s[i] == 'E') { | 111 | 0 | ++i; | 112 | 0 | if (i != len) { | 113 | 0 | switch (s[i]) { | 114 | 0 | case '-': | 115 | 0 | negative_exponent = true; | 116 | 0 | [[fallthrough]]; | 117 | 0 | case '+': | 118 | 0 | ++i; | 119 | 0 | } | 120 | 0 | } | 121 | 0 | if (i == len) { | 122 | | // '123e', '123e+', '123e-' | 123 | 0 | *result = StringParser::PARSE_FAILURE; | 124 | 0 | return 0; | 125 | 0 | } | 126 | 0 | for (; i != len; ++i) { | 127 | 0 | const char& c = s[i]; | 128 | 0 | if (LIKELY('0' <= c && c <= '9')) { | 129 | 0 | exponent = exponent * 10 + (c - '0'); | 130 | | // max string len is config::string_type_length_soft_limit_bytes, | 131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 132 | | // just check overflow of int32_t to simplify the logic | 133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 134 | 0 | if (exponent > std::numeric_limits<int32_t>::max()) { | 135 | 0 | *result = StringParser::PARSE_OVERFLOW; | 136 | 0 | return 0; | 137 | 0 | } | 138 | 0 | } else { | 139 | | // '123e12abc', '123e1.2' | 140 | 0 | *result = StringParser::PARSE_FAILURE; | 141 | 0 | return 0; | 142 | 0 | } | 143 | 0 | } | 144 | 0 | if (negative_exponent) { | 145 | 0 | exponent = -exponent; | 146 | 0 | } | 147 | 1 | } else { | 148 | 1 | *result = StringParser::PARSE_FAILURE; | 149 | 1 | return 0; | 150 | 1 | } | 151 | 1 | } | 152 | 13.8k | T int_part_number = 0; | 153 | 13.8k | T frac_part_number = 0; | 154 | | // TODO: check limit values of exponent and add UT | 155 | | // max string len is config::string_type_length_soft_limit_bytes, | 156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 157 | | // so int_part_count will be in range of int32_t, | 158 | | // and int_part_count + exponent will be in range of int64_t | 159 | 13.8k | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 160 | 13.8k | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 161 | 13.8k | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 163 | 0 | return 0; | 164 | 0 | } | 165 | 13.8k | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 166 | 13.8k | int actual_frac_part_count = 0; | 167 | 13.8k | int digit_index = 0; | 168 | 13.8k | if (result_int_part_digit_count >= 0) { | 169 | 13.8k | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 170 | 13.6k | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 171 | 13.8k | : result_int_part_digit_count, | 172 | 13.8k | end_digit_index); | 173 | 13.8k | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 174 | | // skip zero number | 175 | 13.8k | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 176 | 0 | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 13.8k | if (digit_index != max_index && | 180 | 13.8k | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 181 | 8 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 182 | 8 | return 0; | 183 | 8 | } | 184 | | // get int part number | 185 | 152k | for (; digit_index != max_index; ++digit_index) { | 186 | 138k | if (UNLIKELY(s[digit_index] == '.')) { | 187 | 0 | continue; | 188 | 0 | } | 189 | 138k | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 190 | 138k | } | 191 | 13.8k | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); | 192 | 13.8k | if (result_int_part_digit_count > total_significant_digit_count) { | 193 | 0 | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 194 | 0 | total_significant_digit_count); | 195 | 0 | } | 196 | 13.8k | } else { | 197 | | // leading zeros of fraction part | 198 | 0 | actual_frac_part_count = -result_int_part_digit_count; | 199 | 0 | } | 200 | | // get fraction part number | 201 | 145k | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 202 | 131k | if (UNLIKELY(s[digit_index] == '.')) { | 203 | 11.6k | continue; | 204 | 11.6k | } | 205 | 119k | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 206 | 119k | ++actual_frac_part_count; | 207 | 119k | } | 208 | 13.8k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 209 | | // there are still extra fraction digits left, check rounding | 210 | 13.8k | if (digit_index != end_digit_index) { | 211 | 17 | if (UNLIKELY(s[digit_index] == '.')) { | 212 | 0 | ++digit_index; | 213 | 0 | } | 214 | 17 | if (digit_index != end_digit_index) { | 215 | | // example: test 1.5 -> decimal(1, 0) | 216 | 17 | if (s[digit_index] >= '5') { | 217 | 17 | ++frac_part_number; | 218 | 17 | if (frac_part_number == type_scale_multiplier) { | 219 | 0 | frac_part_number = 0; | 220 | 0 | ++int_part_number; | 221 | 0 | } | 222 | 17 | } | 223 | 17 | } | 224 | 13.8k | } else { | 225 | 13.8k | if (actual_frac_part_count < type_scale) { | 226 | 2.17k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 227 | 2.17k | } | 228 | 13.8k | } | 229 | 13.8k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 230 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 231 | 0 | return 0; | 232 | 0 | } | 233 | | | 234 | 13.8k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 235 | 13.8k | *result = StringParser::PARSE_SUCCESS; | 236 | 13.8k | return is_negative ? T(-value) : T(value); | 237 | 13.8k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 47 | 133k | ParseResult* result) { | 48 | 133k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 49 | 133k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 50 | 133k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 51 | 133k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 52 | 133k | "wide::Int256."); | 53 | | // Ignore leading and trailing spaces. | 54 | 133k | s = skip_ascii_whitespaces(s, len); | 55 | | | 56 | 133k | bool is_negative = false; | 57 | 133k | if (len > 0) { | 58 | 133k | switch (*s) { | 59 | 19.8k | case '-': | 60 | 19.8k | is_negative = true; | 61 | 19.8k | [[fallthrough]]; | 62 | 26.5k | case '+': | 63 | 26.5k | ++s; | 64 | 26.5k | --len; | 65 | 133k | } | 66 | 133k | } | 67 | | // Ignore leading zeros. | 68 | 133k | bool found_value = false; | 69 | 231k | while (len > 0 && UNLIKELY(*s == '0')) { | 70 | 97.6k | found_value = true; | 71 | 97.6k | ++s; | 72 | 97.6k | --len; | 73 | 97.6k | } | 74 | | | 75 | 133k | int found_dot = 0; | 76 | 133k | if (len > 0 && *s == '.') { | 77 | 16.7k | found_dot = 1; | 78 | 16.7k | ++s; | 79 | 16.7k | --len; | 80 | 16.7k | } | 81 | 133k | int int_part_count = 0; | 82 | 133k | int i = 0; | 83 | 4.88M | for (; i != len; ++i) { | 84 | 4.82M | const char& c = s[i]; | 85 | 4.82M | if (LIKELY('0' <= c && c <= '9')) { | 86 | 4.64M | found_value = true; | 87 | 4.64M | if (!found_dot) { | 88 | 1.68M | ++int_part_count; | 89 | 1.68M | } | 90 | 4.64M | } else if (c == '.') { | 91 | 104k | if (found_dot) { | 92 | 0 | *result = StringParser::PARSE_FAILURE; | 93 | 0 | return 0; | 94 | 0 | } | 95 | 104k | found_dot = 1; | 96 | 104k | } else { | 97 | 77.4k | break; | 98 | 77.4k | } | 99 | 4.82M | } | 100 | 133k | if (!found_value) { | 101 | | // '', '.' | 102 | 78 | *result = StringParser::PARSE_FAILURE; | 103 | 78 | return 0; | 104 | 78 | } | 105 | | // parse exponent if any | 106 | 133k | int64_t exponent = 0; | 107 | 133k | auto end_digit_index = i; | 108 | 133k | if (i != len) { | 109 | 77.4k | bool negative_exponent = false; | 110 | 77.4k | if (s[i] == 'e' || s[i] == 'E') { | 111 | 77.4k | ++i; | 112 | 77.4k | if (i != len) { | 113 | 77.4k | switch (s[i]) { | 114 | 3.04k | case '-': | 115 | 3.04k | negative_exponent = true; | 116 | 3.04k | [[fallthrough]]; | 117 | 69.6k | case '+': | 118 | 69.6k | ++i; | 119 | 77.4k | } | 120 | 77.4k | } | 121 | 77.4k | if (i == len) { | 122 | | // '123e', '123e+', '123e-' | 123 | 0 | *result = StringParser::PARSE_FAILURE; | 124 | 0 | return 0; | 125 | 0 | } | 126 | 232k | for (; i != len; ++i) { | 127 | 155k | const char& c = s[i]; | 128 | 155k | if (LIKELY('0' <= c && c <= '9')) { | 129 | 155k | exponent = exponent * 10 + (c - '0'); | 130 | | // max string len is config::string_type_length_soft_limit_bytes, | 131 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 132 | | // just check overflow of int32_t to simplify the logic | 133 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 134 | 155k | if (exponent > std::numeric_limits<int32_t>::max()) { | 135 | 0 | *result = StringParser::PARSE_OVERFLOW; | 136 | 0 | return 0; | 137 | 0 | } | 138 | 155k | } else { | 139 | | // '123e12abc', '123e1.2' | 140 | 10 | *result = StringParser::PARSE_FAILURE; | 141 | 10 | return 0; | 142 | 10 | } | 143 | 155k | } | 144 | 77.4k | if (negative_exponent) { | 145 | 3.04k | exponent = -exponent; | 146 | 3.04k | } | 147 | 77.4k | } else { | 148 | 14 | *result = StringParser::PARSE_FAILURE; | 149 | 14 | return 0; | 150 | 14 | } | 151 | 77.4k | } | 152 | 133k | T int_part_number = 0; | 153 | 133k | T frac_part_number = 0; | 154 | | // TODO: check limit values of exponent and add UT | 155 | | // max string len is config::string_type_length_soft_limit_bytes, | 156 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 157 | | // so int_part_count will be in range of int32_t, | 158 | | // and int_part_count + exponent will be in range of int64_t | 159 | 133k | int64_t tmp_result_int_part_digit_count = int_part_count + exponent; | 160 | 133k | if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() || | 161 | 133k | tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) { | 162 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 163 | 0 | return 0; | 164 | 0 | } | 165 | 133k | int result_int_part_digit_count = tmp_result_int_part_digit_count; | 166 | 133k | int actual_frac_part_count = 0; | 167 | 133k | int digit_index = 0; | 168 | 133k | if (result_int_part_digit_count >= 0) { | 169 | 133k | int max_index = std::min(found_dot ? (result_int_part_digit_count + | 170 | 121k | ((int_part_count > 0 && exponent > 0) ? 1 : 0)) | 171 | 133k | : result_int_part_digit_count, | 172 | 133k | end_digit_index); | 173 | 133k | max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index); | 174 | | // skip zero number | 175 | 350k | for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) { | 176 | 217k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 133k | if (digit_index != max_index && | 180 | 133k | (result_int_part_digit_count - digit_index > type_precision - type_scale)) { | 181 | 336 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 182 | 336 | return 0; | 183 | 336 | } | 184 | | // get int part number | 185 | 2.71M | for (; digit_index != max_index; ++digit_index) { | 186 | 2.58M | if (UNLIKELY(s[digit_index] == '.')) { | 187 | 67.5k | continue; | 188 | 67.5k | } | 189 | 2.51M | int_part_number = int_part_number * 10 + (s[digit_index] - '0'); | 190 | 2.51M | } | 191 | 133k | auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0); | 192 | 133k | if (result_int_part_digit_count > total_significant_digit_count) { | 193 | 2.18k | int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count - | 194 | 2.18k | total_significant_digit_count); | 195 | 2.18k | } | 196 | 133k | } else { | 197 | | // leading zeros of fraction part | 198 | 147 | actual_frac_part_count = -result_int_part_digit_count; | 199 | 147 | } | 200 | | // get fraction part number | 201 | 1.86M | for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) { | 202 | 1.72M | if (UNLIKELY(s[digit_index] == '.')) { | 203 | 33.9k | continue; | 204 | 33.9k | } | 205 | 1.69M | frac_part_number = frac_part_number * 10 + (s[digit_index] - '0'); | 206 | 1.69M | ++actual_frac_part_count; | 207 | 1.69M | } | 208 | 133k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 209 | | // there are still extra fraction digits left, check rounding | 210 | 133k | if (digit_index != end_digit_index) { | 211 | 20.8k | if (UNLIKELY(s[digit_index] == '.')) { | 212 | 860 | ++digit_index; | 213 | 860 | } | 214 | 20.8k | if (digit_index != end_digit_index) { | 215 | | // example: test 1.5 -> decimal(1, 0) | 216 | 20.5k | if (s[digit_index] >= '5') { | 217 | 8.73k | ++frac_part_number; | 218 | 8.73k | if (frac_part_number == type_scale_multiplier) { | 219 | 952 | frac_part_number = 0; | 220 | 952 | ++int_part_number; | 221 | 952 | } | 222 | 8.73k | } | 223 | 20.5k | } | 224 | 112k | } else { | 225 | 112k | if (actual_frac_part_count < type_scale) { | 226 | 91.5k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 227 | 91.5k | } | 228 | 112k | } | 229 | 133k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 230 | 48 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 231 | 48 | return 0; | 232 | 48 | } | 233 | | | 234 | 133k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 235 | 133k | *result = StringParser::PARSE_SUCCESS; | 236 | 133k | return is_negative ? T(-value) : T(value); | 237 | 133k | } |
|
238 | | |
239 | | template Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
240 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
241 | | ParseResult* result); |
242 | | template Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
243 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
244 | | ParseResult* result); |
245 | | template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
246 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
247 | | ParseResult* result); |
248 | | template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
249 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
250 | | ParseResult* result); |
251 | | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
252 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
253 | | ParseResult* result); |
254 | | } // end namespace doris |
255 | | #include "common/compile_check_avoid_end.h" |