/root/doris/be/src/util/string_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/string_parser.hpp" |
19 | | |
20 | | #include <limits> |
21 | | |
22 | | #include "vec/core/extended_types.h" |
23 | | namespace doris { |
24 | | #include "common/compile_check_avoid_begin.h" |
25 | | // Supported decimal number format: |
26 | | // <decimal> ::= <whitespace>* <value> <whitespace>* |
27 | | // |
28 | | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
29 | | // |
30 | | // <value> ::= <sign>? <significand> <exponent>? |
31 | | // |
32 | | // <sign> ::= "+" | "-" |
33 | | // |
34 | | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
35 | | // |
36 | | // <digits> ::= <digit>+ |
37 | | // |
38 | | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
39 | | // |
40 | | // <exponent> ::= <e_marker> <sign>? <digits> |
41 | | // |
42 | | // <e_marker> ::= "e" | "E" |
43 | | template <PrimitiveType P> |
44 | | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
45 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
46 | 354k | ParseResult* result) { |
47 | 354k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
48 | 354k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
49 | 354k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
50 | 354k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
51 | 354k | "wide::Int256."); |
52 | | // Ignore leading and trailing spaces. |
53 | 354k | s = skip_ascii_whitespaces(s, len); |
54 | | |
55 | 354k | bool is_negative = false; |
56 | 354k | if (len > 0) { |
57 | 354k | switch (*s) { |
58 | 92.4k | case '-': |
59 | 92.4k | is_negative = true; |
60 | 92.4k | [[fallthrough]]; |
61 | 119k | case '+': |
62 | 119k | ++s; |
63 | 119k | --len; |
64 | 354k | } |
65 | 354k | } |
66 | | // Ignore leading zeros. |
67 | 354k | bool found_value = false; |
68 | 691k | while (len > 0 && UNLIKELY(*s == '0')) { |
69 | 337k | found_value = true; |
70 | 337k | ++s; |
71 | 337k | --len; |
72 | 337k | } |
73 | | |
74 | 354k | int found_dot = 0; |
75 | 354k | if (len > 0 && *s == '.') { |
76 | 84.7k | found_dot = 1; |
77 | 84.7k | ++s; |
78 | 84.7k | --len; |
79 | 84.7k | } |
80 | 354k | int int_part_count = 0; |
81 | 354k | std::vector<unsigned char> digits; |
82 | 354k | if (len > 0) { |
83 | 345k | digits.resize(len); |
84 | 345k | } |
85 | 354k | int total_digit_count = 0; |
86 | 354k | int i = 0; |
87 | 8.23M | for (; i != len; ++i) { |
88 | 7.99M | const char& c = s[i]; |
89 | 7.99M | if (LIKELY('0' <= c && c <= '9')) { |
90 | 7.65M | found_value = true; |
91 | 7.65M | digits[total_digit_count++] = c - '0'; |
92 | 7.65M | if (!found_dot) { |
93 | 2.36M | ++int_part_count; |
94 | 2.36M | } |
95 | 7.65M | } else if (c == '.') { |
96 | 224k | if (found_dot) { |
97 | 2 | *result = StringParser::PARSE_FAILURE; |
98 | 2 | return 0; |
99 | 2 | } |
100 | 224k | found_dot = 1; |
101 | 224k | } else { |
102 | 111k | break; |
103 | 111k | } |
104 | 7.99M | } |
105 | 354k | if (!found_value) { |
106 | | // '', '.' |
107 | 750 | *result = StringParser::PARSE_FAILURE; |
108 | 750 | return 0; |
109 | 750 | } |
110 | | // parse exponent if any |
111 | 353k | int64_t exponent = 0; |
112 | 353k | if (i != len) { |
113 | 110k | bool negative_exponent = false; |
114 | 110k | if (s[i] == 'e' || s[i] == 'E') { |
115 | 110k | ++i; |
116 | 110k | if (i != len) { |
117 | 110k | switch (s[i]) { |
118 | 11.6k | case '-': |
119 | 11.6k | negative_exponent = true; |
120 | 11.6k | [[fallthrough]]; |
121 | 79.2k | case '+': |
122 | 79.2k | ++i; |
123 | 110k | } |
124 | 110k | } |
125 | 110k | if (i == len) { |
126 | | // '123e', '123e+', '123e-' |
127 | 6 | *result = StringParser::PARSE_FAILURE; |
128 | 6 | return 0; |
129 | 6 | } |
130 | 322k | for (; i != len; ++i) { |
131 | 211k | const char& c = s[i]; |
132 | 211k | if (LIKELY('0' <= c && c <= '9')) { |
133 | 211k | exponent = exponent * 10 + (c - '0'); |
134 | | // max string len is config::string_type_length_soft_limit_bytes, |
135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
136 | | // just check overflow of int32_t to simplify the logic |
137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 |
138 | 211k | if (exponent > std::numeric_limits<int32_t>::max()) { |
139 | 0 | *result = StringParser::PARSE_OVERFLOW; |
140 | 0 | return 0; |
141 | 0 | } |
142 | 211k | } else { |
143 | | // '123e12abc', '123e1.2' |
144 | 22 | *result = StringParser::PARSE_FAILURE; |
145 | 22 | return 0; |
146 | 22 | } |
147 | 211k | } |
148 | 110k | if (negative_exponent) { |
149 | 11.6k | exponent = -exponent; |
150 | 11.6k | } |
151 | 110k | } else { |
152 | 116 | *result = StringParser::PARSE_FAILURE; |
153 | 116 | return 0; |
154 | 116 | } |
155 | 110k | } |
156 | 353k | T int_part_number = 0; |
157 | 353k | T frac_part_number = 0; |
158 | | // TODO: check limit values of exponent and add UT |
159 | | // max string len is config::string_type_length_soft_limit_bytes, |
160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
161 | | // so int_part_count will be in range of int32_t, |
162 | | // and int_part_count + exponent will be in range of int64_t |
163 | 353k | int64_t tmp_actual_int_part_count = int_part_count + exponent; |
164 | 353k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |
165 | 353k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |
166 | 0 | *result = StringParser::PARSE_OVERFLOW; |
167 | 0 | return 0; |
168 | 0 | } |
169 | 353k | int actual_int_part_count = tmp_actual_int_part_count; |
170 | 353k | int actual_frac_part_count = 0; |
171 | 353k | int digit_index = 0; |
172 | 353k | if (actual_int_part_count >= 0) { |
173 | 347k | int max_index = std::min(actual_int_part_count, total_digit_count); |
174 | | // skip zero number |
175 | 1.20M | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |
176 | 855k | } |
177 | | // test 0.00, .00, 0.{00...}e2147483647 |
178 | | // 0.00000e2147483647 |
179 | 347k | if (max_index - digit_index > type_precision - type_scale) { |
180 | 11.8k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
181 | 11.8k | return 0; |
182 | 11.8k | } |
183 | | // get int part number |
184 | 3.42M | for (; digit_index != max_index; ++digit_index) { |
185 | 3.08M | int_part_number = int_part_number * 10 + digits[digit_index]; |
186 | 3.08M | } |
187 | 335k | if (digit_index != actual_int_part_count) { |
188 | 65.7k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |
189 | 65.7k | } |
190 | 335k | } else { |
191 | | // leading zeros of fraction part |
192 | 5.65k | actual_frac_part_count = -actual_int_part_count; |
193 | 5.65k | } |
194 | | // get fraction part number |
195 | 3.50M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |
196 | 3.16M | ++digit_index, ++actual_frac_part_count) { |
197 | 3.16M | frac_part_number = frac_part_number * 10 + digits[digit_index]; |
198 | 3.16M | } |
199 | 341k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
200 | | // there are still extra fraction digits left, check rounding |
201 | 341k | if (digit_index != total_digit_count) { |
202 | | // example: test 1.5 -> decimal(1, 0) |
203 | 81.3k | if (digits[digit_index] >= 5) { |
204 | 32.9k | ++frac_part_number; |
205 | 32.9k | if (frac_part_number == type_scale_multiplier) { |
206 | 3.43k | frac_part_number = 0; |
207 | 3.43k | ++int_part_number; |
208 | 3.43k | } |
209 | 32.9k | } |
210 | 260k | } else { |
211 | 260k | if (actual_frac_part_count < type_scale) { |
212 | 194k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
213 | 194k | } |
214 | 260k | } |
215 | 341k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
216 | 73 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
217 | 73 | return 0; |
218 | 73 | } |
219 | | |
220 | 341k | T value = int_part_number * type_scale_multiplier + frac_part_number; |
221 | 341k | *result = StringParser::PARSE_SUCCESS; |
222 | 341k | return is_negative ? T(-value) : T(value); |
223 | 341k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 54.5k | ParseResult* result) { | 47 | 54.5k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 54.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 54.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 54.5k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 54.5k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 54.5k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 54.5k | bool is_negative = false; | 56 | 54.5k | if (len > 0) { | 57 | 54.5k | switch (*s) { | 58 | 25.0k | case '-': | 59 | 25.0k | is_negative = true; | 60 | 25.0k | [[fallthrough]]; | 61 | 32.3k | case '+': | 62 | 32.3k | ++s; | 63 | 32.3k | --len; | 64 | 54.5k | } | 65 | 54.5k | } | 66 | | // Ignore leading zeros. | 67 | 54.5k | bool found_value = false; | 68 | 108k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 53.6k | found_value = true; | 70 | 53.6k | ++s; | 71 | 53.6k | --len; | 72 | 53.6k | } | 73 | | | 74 | 54.5k | int found_dot = 0; | 75 | 54.5k | if (len > 0 && *s == '.') { | 76 | 17.9k | found_dot = 1; | 77 | 17.9k | ++s; | 78 | 17.9k | --len; | 79 | 17.9k | } | 80 | 54.5k | int int_part_count = 0; | 81 | 54.5k | std::vector<unsigned char> digits; | 82 | 54.5k | if (len > 0) { | 83 | 52.4k | digits.resize(len); | 84 | 52.4k | } | 85 | 54.5k | int total_digit_count = 0; | 86 | 54.5k | int i = 0; | 87 | 578k | for (; i != len; ++i) { | 88 | 533k | const char& c = s[i]; | 89 | 533k | if (LIKELY('0' <= c && c <= '9')) { | 90 | 509k | found_value = true; | 91 | 509k | digits[total_digit_count++] = c - '0'; | 92 | 509k | if (!found_dot) { | 93 | 159k | ++int_part_count; | 94 | 159k | } | 95 | 509k | } else if (c == '.') { | 96 | 14.6k | if (found_dot) { | 97 | 2 | *result = StringParser::PARSE_FAILURE; | 98 | 2 | return 0; | 99 | 2 | } | 100 | 14.6k | found_dot = 1; | 101 | 14.6k | } else { | 102 | 9.56k | break; | 103 | 9.56k | } | 104 | 533k | } | 105 | 54.5k | if (!found_value) { | 106 | | // '', '.' | 107 | 190 | *result = StringParser::PARSE_FAILURE; | 108 | 190 | return 0; | 109 | 190 | } | 110 | | // parse exponent if any | 111 | 54.3k | int64_t exponent = 0; | 112 | 54.3k | if (i != len) { | 113 | 9.39k | bool negative_exponent = false; | 114 | 9.39k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 9.33k | ++i; | 116 | 9.33k | if (i != len) { | 117 | 9.33k | switch (s[i]) { | 118 | 1.54k | case '-': | 119 | 1.54k | negative_exponent = true; | 120 | 1.54k | [[fallthrough]]; | 121 | 1.54k | case '+': | 122 | 1.54k | ++i; | 123 | 9.33k | } | 124 | 9.33k | } | 125 | 9.33k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 6 | *result = StringParser::PARSE_FAILURE; | 128 | 6 | return 0; | 129 | 6 | } | 130 | 24.6k | for (; i != len; ++i) { | 131 | 15.3k | const char& c = s[i]; | 132 | 15.3k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 15.3k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 15.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 15.3k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 12 | *result = StringParser::PARSE_FAILURE; | 145 | 12 | return 0; | 146 | 12 | } | 147 | 15.3k | } | 148 | 9.31k | if (negative_exponent) { | 149 | 1.53k | exponent = -exponent; | 150 | 1.53k | } | 151 | 9.31k | } else { | 152 | 60 | *result = StringParser::PARSE_FAILURE; | 153 | 60 | return 0; | 154 | 60 | } | 155 | 9.39k | } | 156 | 54.2k | T int_part_number = 0; | 157 | 54.2k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 54.2k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 54.2k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 54.2k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 54.2k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 54.2k | int actual_frac_part_count = 0; | 171 | 54.2k | int digit_index = 0; | 172 | 54.2k | if (actual_int_part_count >= 0) { | 173 | 54.1k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 266k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 212k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 54.1k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 1.31k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 1.31k | return 0; | 182 | 1.31k | } | 183 | | // get int part number | 184 | 153k | for (; digit_index != max_index; ++digit_index) { | 185 | 100k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 100k | } | 187 | 52.8k | if (digit_index != actual_int_part_count) { | 188 | 100 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 100 | } | 190 | 52.8k | } else { | 191 | | // leading zeros of fraction part | 192 | 48 | actual_frac_part_count = -actual_int_part_count; | 193 | 48 | } | 194 | | // get fraction part number | 195 | 159k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 106k | ++digit_index, ++actual_frac_part_count) { | 197 | 106k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 106k | } | 199 | 52.9k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 52.9k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 21.1k | if (digits[digit_index] >= 5) { | 204 | 8.96k | ++frac_part_number; | 205 | 8.96k | if (frac_part_number == type_scale_multiplier) { | 206 | 856 | frac_part_number = 0; | 207 | 856 | ++int_part_number; | 208 | 856 | } | 209 | 8.96k | } | 210 | 31.8k | } else { | 211 | 31.8k | if (actual_frac_part_count < type_scale) { | 212 | 28.0k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 28.0k | } | 214 | 31.8k | } | 215 | 52.9k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 24 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 24 | return 0; | 218 | 24 | } | 219 | | | 220 | 52.9k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 52.9k | *result = StringParser::PARSE_SUCCESS; | 222 | 52.9k | return is_negative ? T(-value) : T(value); | 223 | 52.9k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 86.6k | ParseResult* result) { | 47 | 86.6k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 86.6k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 86.6k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 86.6k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 86.6k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 86.6k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 86.6k | bool is_negative = false; | 56 | 86.6k | if (len > 0) { | 57 | 86.6k | switch (*s) { | 58 | 21.7k | case '-': | 59 | 21.7k | is_negative = true; | 60 | 21.7k | [[fallthrough]]; | 61 | 28.3k | case '+': | 62 | 28.3k | ++s; | 63 | 28.3k | --len; | 64 | 86.6k | } | 65 | 86.6k | } | 66 | | // Ignore leading zeros. | 67 | 86.6k | bool found_value = false; | 68 | 162k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 76.1k | found_value = true; | 70 | 76.1k | ++s; | 71 | 76.1k | --len; | 72 | 76.1k | } | 73 | | | 74 | 86.6k | int found_dot = 0; | 75 | 86.6k | if (len > 0 && *s == '.') { | 76 | 23.9k | found_dot = 1; | 77 | 23.9k | ++s; | 78 | 23.9k | --len; | 79 | 23.9k | } | 80 | 86.6k | int int_part_count = 0; | 81 | 86.6k | std::vector<unsigned char> digits; | 82 | 86.6k | if (len > 0) { | 83 | 84.5k | digits.resize(len); | 84 | 84.5k | } | 85 | 86.6k | int total_digit_count = 0; | 86 | 86.6k | int i = 0; | 87 | 1.40M | for (; i != len; ++i) { | 88 | 1.33M | const char& c = s[i]; | 89 | 1.33M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 1.26M | found_value = true; | 91 | 1.26M | digits[total_digit_count++] = c - '0'; | 92 | 1.26M | if (!found_dot) { | 93 | 521k | ++int_part_count; | 94 | 521k | } | 95 | 1.26M | } else if (c == '.') { | 96 | 53.6k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 53.6k | found_dot = 1; | 101 | 53.6k | } else { | 102 | 12.0k | break; | 103 | 12.0k | } | 104 | 1.33M | } | 105 | 86.6k | if (!found_value) { | 106 | | // '', '.' | 107 | 379 | *result = StringParser::PARSE_FAILURE; | 108 | 379 | return 0; | 109 | 379 | } | 110 | | // parse exponent if any | 111 | 86.2k | int64_t exponent = 0; | 112 | 86.2k | if (i != len) { | 113 | 11.6k | bool negative_exponent = false; | 114 | 11.6k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 11.6k | ++i; | 116 | 11.6k | if (i != len) { | 117 | 11.6k | switch (s[i]) { | 118 | 3.87k | case '-': | 119 | 3.87k | negative_exponent = true; | 120 | 3.87k | [[fallthrough]]; | 121 | 3.87k | case '+': | 122 | 3.87k | ++i; | 123 | 11.6k | } | 124 | 11.6k | } | 125 | 11.6k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 32.4k | for (; i != len; ++i) { | 131 | 20.8k | const char& c = s[i]; | 132 | 20.8k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 20.8k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 20.8k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 20.8k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 20.8k | } | 148 | 11.6k | if (negative_exponent) { | 149 | 3.87k | exponent = -exponent; | 150 | 3.87k | } | 151 | 11.6k | } else { | 152 | 23 | *result = StringParser::PARSE_FAILURE; | 153 | 23 | return 0; | 154 | 23 | } | 155 | 11.6k | } | 156 | 86.2k | T int_part_number = 0; | 157 | 86.2k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 86.2k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 86.2k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 86.2k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 86.2k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 86.2k | int actual_frac_part_count = 0; | 171 | 86.2k | int digit_index = 0; | 172 | 86.2k | if (actual_int_part_count >= 0) { | 173 | 83.8k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 296k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 212k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 83.8k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 10.2k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 10.2k | return 0; | 182 | 10.2k | } | 183 | | // get int part number | 184 | 409k | for (; digit_index != max_index; ++digit_index) { | 185 | 335k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 335k | } | 187 | 73.5k | if (digit_index != actual_int_part_count) { | 188 | 77 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 77 | } | 190 | 73.5k | } else { | 191 | | // leading zeros of fraction part | 192 | 2.39k | actual_frac_part_count = -actual_int_part_count; | 193 | 2.39k | } | 194 | | // get fraction part number | 195 | 551k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 475k | ++digit_index, ++actual_frac_part_count) { | 197 | 475k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 475k | } | 199 | 75.9k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 75.9k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 19.8k | if (digits[digit_index] >= 5) { | 204 | 7.95k | ++frac_part_number; | 205 | 7.95k | if (frac_part_number == type_scale_multiplier) { | 206 | 836 | frac_part_number = 0; | 207 | 836 | ++int_part_number; | 208 | 836 | } | 209 | 7.95k | } | 210 | 56.0k | } else { | 211 | 56.0k | if (actual_frac_part_count < type_scale) { | 212 | 31.7k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 31.7k | } | 214 | 56.0k | } | 215 | 75.9k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 17 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 17 | return 0; | 218 | 17 | } | 219 | | | 220 | 75.9k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 75.9k | *result = StringParser::PARSE_SUCCESS; | 222 | 75.9k | return is_negative ? T(-value) : T(value); | 223 | 75.9k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 82.8k | ParseResult* result) { | 47 | 82.8k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 82.8k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 82.8k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 82.8k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 82.8k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 82.8k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 82.8k | bool is_negative = false; | 56 | 82.8k | if (len > 0) { | 57 | 82.8k | switch (*s) { | 58 | 21.7k | case '-': | 59 | 21.7k | is_negative = true; | 60 | 21.7k | [[fallthrough]]; | 61 | 28.3k | case '+': | 62 | 28.3k | ++s; | 63 | 28.3k | --len; | 64 | 82.8k | } | 65 | 82.8k | } | 66 | | // Ignore leading zeros. | 67 | 82.8k | bool found_value = false; | 68 | 157k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 74.7k | found_value = true; | 70 | 74.7k | ++s; | 71 | 74.7k | --len; | 72 | 74.7k | } | 73 | | | 74 | 82.8k | int found_dot = 0; | 75 | 82.8k | if (len > 0 && *s == '.') { | 76 | 25.0k | found_dot = 1; | 77 | 25.0k | ++s; | 78 | 25.0k | --len; | 79 | 25.0k | } | 80 | 82.8k | int int_part_count = 0; | 81 | 82.8k | std::vector<unsigned char> digits; | 82 | 82.8k | if (len > 0) { | 83 | 80.8k | digits.resize(len); | 84 | 80.8k | } | 85 | 82.8k | int total_digit_count = 0; | 86 | 82.8k | int i = 0; | 87 | 2.06M | for (; i != len; ++i) { | 88 | 1.99M | const char& c = s[i]; | 89 | 1.99M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 1.92M | found_value = true; | 91 | 1.92M | digits[total_digit_count++] = c - '0'; | 92 | 1.92M | if (!found_dot) { | 93 | 560k | ++int_part_count; | 94 | 560k | } | 95 | 1.92M | } else if (c == '.') { | 96 | 49.9k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 49.9k | found_dot = 1; | 101 | 49.9k | } else { | 102 | 12.5k | break; | 103 | 12.5k | } | 104 | 1.99M | } | 105 | 82.8k | if (!found_value) { | 106 | | // '', '.' | 107 | 79 | *result = StringParser::PARSE_FAILURE; | 108 | 79 | return 0; | 109 | 79 | } | 110 | | // parse exponent if any | 111 | 82.8k | int64_t exponent = 0; | 112 | 82.8k | if (i != len) { | 113 | 12.4k | bool negative_exponent = false; | 114 | 12.4k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 12.4k | ++i; | 116 | 12.4k | if (i != len) { | 117 | 12.4k | switch (s[i]) { | 118 | 4.66k | case '-': | 119 | 4.66k | negative_exponent = true; | 120 | 4.66k | [[fallthrough]]; | 121 | 4.66k | case '+': | 122 | 4.66k | ++i; | 123 | 12.4k | } | 124 | 12.4k | } | 125 | 12.4k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 35.8k | for (; i != len; ++i) { | 131 | 23.3k | const char& c = s[i]; | 132 | 23.3k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 23.3k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 23.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 23.3k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 23.3k | } | 148 | 12.4k | if (negative_exponent) { | 149 | 4.66k | exponent = -exponent; | 150 | 4.66k | } | 151 | 12.4k | } else { | 152 | 12 | *result = StringParser::PARSE_FAILURE; | 153 | 12 | return 0; | 154 | 12 | } | 155 | 12.4k | } | 156 | 82.7k | T int_part_number = 0; | 157 | 82.7k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 82.7k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 82.7k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 82.7k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 82.7k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 82.7k | int actual_frac_part_count = 0; | 171 | 82.7k | int digit_index = 0; | 172 | 82.7k | if (actual_int_part_count >= 0) { | 173 | 79.6k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 293k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 213k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 79.6k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 140 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 140 | return 0; | 182 | 140 | } | 183 | | // get int part number | 184 | 598k | for (; digit_index != max_index; ++digit_index) { | 185 | 519k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 519k | } | 187 | 79.4k | if (digit_index != actual_int_part_count) { | 188 | 76 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 76 | } | 190 | 79.4k | } else { | 191 | | // leading zeros of fraction part | 192 | 3.17k | actual_frac_part_count = -actual_int_part_count; | 193 | 3.17k | } | 194 | | // get fraction part number | 195 | 1.17M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 1.08M | ++digit_index, ++actual_frac_part_count) { | 197 | 1.08M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 1.08M | } | 199 | 82.6k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 82.6k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 21.5k | if (digits[digit_index] >= 5) { | 204 | 8.03k | ++frac_part_number; | 205 | 8.03k | if (frac_part_number == type_scale_multiplier) { | 206 | 907 | frac_part_number = 0; | 207 | 907 | ++int_part_number; | 208 | 907 | } | 209 | 8.03k | } | 210 | 61.1k | } else { | 211 | 61.1k | if (actual_frac_part_count < type_scale) { | 212 | 45.0k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 45.0k | } | 214 | 61.1k | } | 215 | 82.6k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 16 | return 0; | 218 | 16 | } | 219 | | | 220 | 82.6k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 82.6k | *result = StringParser::PARSE_SUCCESS; | 222 | 82.6k | return is_negative ? T(-value) : T(value); | 223 | 82.6k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 13.5k | ParseResult* result) { | 47 | 13.5k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 13.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 13.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 13.5k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 13.5k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 13.5k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 13.5k | bool is_negative = false; | 56 | 13.5k | if (len > 0) { | 57 | 13.5k | switch (*s) { | 58 | 6.68k | case '-': | 59 | 6.68k | is_negative = true; | 60 | 6.68k | [[fallthrough]]; | 61 | 6.68k | case '+': | 62 | 6.68k | ++s; | 63 | 6.68k | --len; | 64 | 13.5k | } | 65 | 13.5k | } | 66 | | // Ignore leading zeros. | 67 | 13.5k | bool found_value = false; | 68 | 52.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 38.8k | found_value = true; | 70 | 38.8k | ++s; | 71 | 38.8k | --len; | 72 | 38.8k | } | 73 | | | 74 | 13.5k | int found_dot = 0; | 75 | 13.5k | if (len > 0 && *s == '.') { | 76 | 2.00k | found_dot = 1; | 77 | 2.00k | ++s; | 78 | 2.00k | --len; | 79 | 2.00k | } | 80 | 13.5k | int int_part_count = 0; | 81 | 13.5k | std::vector<unsigned char> digits; | 82 | 13.5k | if (len > 0) { | 83 | 13.5k | digits.resize(len); | 84 | 13.5k | } | 85 | 13.5k | int total_digit_count = 0; | 86 | 13.5k | int i = 0; | 87 | 279k | for (; i != len; ++i) { | 88 | 266k | const char& c = s[i]; | 89 | 266k | if (LIKELY('0' <= c && c <= '9')) { | 90 | 254k | found_value = true; | 91 | 254k | digits[total_digit_count++] = c - '0'; | 92 | 254k | if (!found_dot) { | 93 | 136k | ++int_part_count; | 94 | 136k | } | 95 | 254k | } else if (c == '.') { | 96 | 11.4k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 11.4k | found_dot = 1; | 101 | 11.4k | } else { | 102 | 11 | break; | 103 | 11 | } | 104 | 266k | } | 105 | 13.5k | if (!found_value) { | 106 | | // '', '.' | 107 | 10 | *result = StringParser::PARSE_FAILURE; | 108 | 10 | return 0; | 109 | 10 | } | 110 | | // parse exponent if any | 111 | 13.5k | int64_t exponent = 0; | 112 | 13.5k | if (i != len) { | 113 | 1 | bool negative_exponent = false; | 114 | 1 | if (s[i] == 'e' || s[i] == 'E') { | 115 | 0 | ++i; | 116 | 0 | if (i != len) { | 117 | 0 | switch (s[i]) { | 118 | 0 | case '-': | 119 | 0 | negative_exponent = true; | 120 | 0 | [[fallthrough]]; | 121 | 0 | case '+': | 122 | 0 | ++i; | 123 | 0 | } | 124 | 0 | } | 125 | 0 | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 0 | for (; i != len; ++i) { | 131 | 0 | const char& c = s[i]; | 132 | 0 | if (LIKELY('0' <= c && c <= '9')) { | 133 | 0 | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 0 | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 0 | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 0 | } | 148 | 0 | if (negative_exponent) { | 149 | 0 | exponent = -exponent; | 150 | 0 | } | 151 | 1 | } else { | 152 | 1 | *result = StringParser::PARSE_FAILURE; | 153 | 1 | return 0; | 154 | 1 | } | 155 | 1 | } | 156 | 13.5k | T int_part_number = 0; | 157 | 13.5k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 13.5k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 13.5k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 13.5k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 13.5k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 13.5k | int actual_frac_part_count = 0; | 171 | 13.5k | int digit_index = 0; | 172 | 13.5k | if (actual_int_part_count >= 0) { | 173 | 13.5k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 13.5k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 0 | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 13.5k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 8 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 8 | return 0; | 182 | 8 | } | 183 | | // get int part number | 184 | 149k | for (; digit_index != max_index; ++digit_index) { | 185 | 136k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 136k | } | 187 | 13.5k | if (digit_index != actual_int_part_count) { | 188 | 0 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 0 | } | 190 | 13.5k | } else { | 191 | | // leading zeros of fraction part | 192 | 0 | actual_frac_part_count = -actual_int_part_count; | 193 | 0 | } | 194 | | // get fraction part number | 195 | 131k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 118k | ++digit_index, ++actual_frac_part_count) { | 197 | 118k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 118k | } | 199 | 13.5k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 13.5k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 17 | if (digits[digit_index] >= 5) { | 204 | 17 | ++frac_part_number; | 205 | 17 | if (frac_part_number == type_scale_multiplier) { | 206 | 0 | frac_part_number = 0; | 207 | 0 | ++int_part_number; | 208 | 0 | } | 209 | 17 | } | 210 | 13.4k | } else { | 211 | 13.4k | if (actual_frac_part_count < type_scale) { | 212 | 1.94k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 1.94k | } | 214 | 13.4k | } | 215 | 13.5k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 0 | return 0; | 218 | 0 | } | 219 | | | 220 | 13.5k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 13.5k | *result = StringParser::PARSE_SUCCESS; | 222 | 13.5k | return is_negative ? T(-value) : T(value); | 223 | 13.5k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 116k | ParseResult* result) { | 47 | 116k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 116k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 116k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 116k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 116k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 116k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 116k | bool is_negative = false; | 56 | 116k | if (len > 0) { | 57 | 116k | switch (*s) { | 58 | 17.3k | case '-': | 59 | 17.3k | is_negative = true; | 60 | 17.3k | [[fallthrough]]; | 61 | 23.9k | case '+': | 62 | 23.9k | ++s; | 63 | 23.9k | --len; | 64 | 116k | } | 65 | 116k | } | 66 | | // Ignore leading zeros. | 67 | 116k | bool found_value = false; | 68 | 210k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 94.0k | found_value = true; | 70 | 94.0k | ++s; | 71 | 94.0k | --len; | 72 | 94.0k | } | 73 | | | 74 | 116k | int found_dot = 0; | 75 | 116k | if (len > 0 && *s == '.') { | 76 | 15.8k | found_dot = 1; | 77 | 15.8k | ++s; | 78 | 15.8k | --len; | 79 | 15.8k | } | 80 | 116k | int int_part_count = 0; | 81 | 116k | std::vector<unsigned char> digits; | 82 | 116k | if (len > 0) { | 83 | 114k | digits.resize(len); | 84 | 114k | } | 85 | 116k | int total_digit_count = 0; | 86 | 116k | int i = 0; | 87 | 3.91M | for (; i != len; ++i) { | 88 | 3.87M | const char& c = s[i]; | 89 | 3.87M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 3.69M | found_value = true; | 91 | 3.69M | digits[total_digit_count++] = c - '0'; | 92 | 3.69M | if (!found_dot) { | 93 | 991k | ++int_part_count; | 94 | 991k | } | 95 | 3.69M | } else if (c == '.') { | 96 | 94.5k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 94.5k | found_dot = 1; | 101 | 94.5k | } else { | 102 | 76.9k | break; | 103 | 76.9k | } | 104 | 3.87M | } | 105 | 116k | if (!found_value) { | 106 | | // '', '.' | 107 | 92 | *result = StringParser::PARSE_FAILURE; | 108 | 92 | return 0; | 109 | 92 | } | 110 | | // parse exponent if any | 111 | 116k | int64_t exponent = 0; | 112 | 116k | if (i != len) { | 113 | 76.9k | bool negative_exponent = false; | 114 | 76.9k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 76.8k | ++i; | 116 | 76.8k | if (i != len) { | 117 | 76.8k | switch (s[i]) { | 118 | 1.53k | case '-': | 119 | 1.53k | negative_exponent = true; | 120 | 1.53k | [[fallthrough]]; | 121 | 69.1k | case '+': | 122 | 69.1k | ++i; | 123 | 76.8k | } | 124 | 76.8k | } | 125 | 76.8k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 229k | for (; i != len; ++i) { | 131 | 152k | const char& c = s[i]; | 132 | 152k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 152k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 152k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 152k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 10 | *result = StringParser::PARSE_FAILURE; | 145 | 10 | return 0; | 146 | 10 | } | 147 | 152k | } | 148 | 76.8k | if (negative_exponent) { | 149 | 1.53k | exponent = -exponent; | 150 | 1.53k | } | 151 | 76.8k | } else { | 152 | 20 | *result = StringParser::PARSE_FAILURE; | 153 | 20 | return 0; | 154 | 20 | } | 155 | 76.9k | } | 156 | 116k | T int_part_number = 0; | 157 | 116k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 116k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 116k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 116k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 116k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 116k | int actual_frac_part_count = 0; | 171 | 116k | int digit_index = 0; | 172 | 116k | if (actual_int_part_count >= 0) { | 173 | 116k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 332k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 216k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 116k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 112 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 112 | return 0; | 182 | 112 | } | 183 | | // get int part number | 184 | 2.11M | for (; digit_index != max_index; ++digit_index) { | 185 | 1.99M | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 1.99M | } | 187 | 116k | if (digit_index != actual_int_part_count) { | 188 | 65.5k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 65.5k | } | 190 | 116k | } else { | 191 | | // leading zeros of fraction part | 192 | 48 | actual_frac_part_count = -actual_int_part_count; | 193 | 48 | } | 194 | | // get fraction part number | 195 | 1.49M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 1.37M | ++digit_index, ++actual_frac_part_count) { | 197 | 1.37M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 1.37M | } | 199 | 116k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 116k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 18.8k | if (digits[digit_index] >= 5) { | 204 | 7.94k | ++frac_part_number; | 205 | 7.94k | if (frac_part_number == type_scale_multiplier) { | 206 | 836 | frac_part_number = 0; | 207 | 836 | ++int_part_number; | 208 | 836 | } | 209 | 7.94k | } | 210 | 97.6k | } else { | 211 | 97.6k | if (actual_frac_part_count < type_scale) { | 212 | 88.2k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 88.2k | } | 214 | 97.6k | } | 215 | 116k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 16 | return 0; | 218 | 16 | } | 219 | | | 220 | 116k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 116k | *result = StringParser::PARSE_SUCCESS; | 222 | 116k | return is_negative ? T(-value) : T(value); | 223 | 116k | } |
|
224 | | template vectorized::Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
225 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
226 | | ParseResult* result); |
227 | | template vectorized::Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
228 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
229 | | ParseResult* result); |
230 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
231 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
232 | | ParseResult* result); |
233 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
234 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
235 | | ParseResult* result); |
236 | | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
237 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
238 | | ParseResult* result); |
239 | | } // end namespace doris |
240 | | #include "common/compile_check_avoid_end.h" |