/root/doris/be/src/util/string_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/string_parser.hpp" |
19 | | |
20 | | #include <limits> |
21 | | |
22 | | #include "vec/core/extended_types.h" |
23 | | namespace doris { |
24 | | // Supported decimal number format: |
25 | | // <decimal> ::= <whitespace>* <value> <whitespace>* |
26 | | // |
27 | | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
28 | | // |
29 | | // <value> ::= <sign>? <significand> <exponent>? |
30 | | // |
31 | | // <sign> ::= "+" | "-" |
32 | | // |
33 | | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
34 | | // |
35 | | // <digits> ::= <digit>+ |
36 | | // |
37 | | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
38 | | // |
39 | | // <exponent> ::= <e_marker> <sign>? <digits> |
40 | | // |
41 | | // <e_marker> ::= "e" | "E" |
42 | | template <PrimitiveType P> |
43 | | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
44 | | const char* __restrict s, int len, int type_precision, int type_scale, |
45 | 352k | ParseResult* result) { |
46 | 352k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
47 | 352k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
48 | 352k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
49 | 352k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
50 | 352k | "wide::Int256."); |
51 | | // Ignore leading and trailing spaces. |
52 | 352k | s = skip_ascii_whitespaces(s, len); |
53 | | |
54 | 352k | bool is_negative = false; |
55 | 352k | if (len > 0) { |
56 | 352k | switch (*s) { |
57 | 92.4k | case '-': |
58 | 92.4k | is_negative = true; |
59 | 92.4k | [[fallthrough]]; |
60 | 119k | case '+': |
61 | 119k | ++s; |
62 | 119k | --len; |
63 | 352k | } |
64 | 352k | } |
65 | | // Ignore leading zeros. |
66 | 352k | bool found_value = false; |
67 | 689k | while (len > 0 && UNLIKELY(*s == '0')) { |
68 | 337k | found_value = true; |
69 | 337k | ++s; |
70 | 337k | --len; |
71 | 337k | } |
72 | | |
73 | 352k | int found_dot = 0; |
74 | 352k | if (len > 0 && *s == '.') { |
75 | 84.4k | found_dot = 1; |
76 | 84.4k | ++s; |
77 | 84.4k | --len; |
78 | 84.4k | } |
79 | 352k | int int_part_count = 0; |
80 | 352k | std::vector<unsigned char> digits; |
81 | 352k | if (len > 0) { |
82 | 343k | digits.resize(len); |
83 | 343k | } |
84 | 352k | int total_digit_count = 0; |
85 | 352k | int i = 0; |
86 | 8.11M | for (; i != len; ++i) { |
87 | 7.87M | const char& c = s[i]; |
88 | 7.87M | if (LIKELY('0' <= c && c <= '9')) { |
89 | 7.53M | found_value = true; |
90 | 7.53M | digits[total_digit_count++] = c - '0'; |
91 | 7.53M | if (!found_dot) { |
92 | 2.33M | ++int_part_count; |
93 | 2.33M | } |
94 | 7.53M | } else if (c == '.') { |
95 | 222k | if (found_dot) { |
96 | 2 | *result = StringParser::PARSE_FAILURE; |
97 | 2 | return 0; |
98 | 2 | } |
99 | 222k | found_dot = 1; |
100 | 222k | } else { |
101 | 111k | break; |
102 | 111k | } |
103 | 7.87M | } |
104 | 352k | if (!found_value) { |
105 | | // '', '.' |
106 | 344 | *result = StringParser::PARSE_FAILURE; |
107 | 344 | return 0; |
108 | 344 | } |
109 | | // parse exponent if any |
110 | 351k | int64_t exponent = 0; |
111 | 351k | if (i != len) { |
112 | 111k | bool negative_exponent = false; |
113 | 111k | if (s[i] == 'e' || s[i] == 'E') { |
114 | 111k | ++i; |
115 | 111k | if (i != len) { |
116 | 111k | switch (s[i]) { |
117 | 11.6k | case '-': |
118 | 11.6k | negative_exponent = true; |
119 | 11.6k | [[fallthrough]]; |
120 | 80.2k | case '+': |
121 | 80.2k | ++i; |
122 | 111k | } |
123 | 111k | } |
124 | 111k | if (i == len) { |
125 | | // '123e', '123e+', '123e-' |
126 | 6 | *result = StringParser::PARSE_FAILURE; |
127 | 6 | return 0; |
128 | 6 | } |
129 | 325k | for (; i != len; ++i) { |
130 | 213k | const char& c = s[i]; |
131 | 213k | if (LIKELY('0' <= c && c <= '9')) { |
132 | 213k | exponent = exponent * 10 + (c - '0'); |
133 | | // max string len is config::string_type_length_soft_limit_bytes, |
134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
135 | | // just check overflow of int32_t to simplify the logic |
136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 |
137 | 213k | if (exponent > std::numeric_limits<int32_t>::max()) { |
138 | 0 | *result = StringParser::PARSE_OVERFLOW; |
139 | 0 | return 0; |
140 | 0 | } |
141 | 213k | } else { |
142 | | // '123e12abc', '123e1.2' |
143 | 22 | *result = StringParser::PARSE_FAILURE; |
144 | 22 | return 0; |
145 | 22 | } |
146 | 213k | } |
147 | 111k | if (negative_exponent) { |
148 | 11.6k | exponent = -exponent; |
149 | 11.6k | } |
150 | 111k | } else { |
151 | 115 | *result = StringParser::PARSE_FAILURE; |
152 | 115 | return 0; |
153 | 115 | } |
154 | 111k | } |
155 | 351k | T int_part_number = 0; |
156 | 351k | T frac_part_number = 0; |
157 | | // TODO: check limit values of exponent and add UT |
158 | | // max string len is config::string_type_length_soft_limit_bytes, |
159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
160 | | // so int_part_count will be in range of int32_t, |
161 | | // and int_part_count + exponent will be in range of int64_t |
162 | 351k | int64_t tmp_actual_int_part_count = int_part_count + exponent; |
163 | 351k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |
164 | 351k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |
165 | 0 | *result = StringParser::PARSE_OVERFLOW; |
166 | 0 | return 0; |
167 | 0 | } |
168 | 351k | int actual_int_part_count = tmp_actual_int_part_count; |
169 | 351k | int actual_frac_part_count = 0; |
170 | 351k | int digit_index = 0; |
171 | 351k | if (actual_int_part_count >= 0) { |
172 | 345k | int max_index = std::min(actual_int_part_count, total_digit_count); |
173 | | // skip zero number |
174 | 1.20M | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |
175 | 855k | } |
176 | | // test 0.00, .00, 0.{00...}e2147483647 |
177 | | // 0.00000e2147483647 |
178 | 345k | if (max_index - digit_index > type_precision - type_scale) { |
179 | 12.0k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
180 | 12.0k | return 0; |
181 | 12.0k | } |
182 | | // get int part number |
183 | 3.39M | for (; digit_index != max_index; ++digit_index) { |
184 | 3.06M | int_part_number = int_part_number * 10 + digits[digit_index]; |
185 | 3.06M | } |
186 | 333k | if (digit_index != actual_int_part_count) { |
187 | 66.7k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |
188 | 66.7k | } |
189 | 333k | } else { |
190 | | // leading zeros of fraction part |
191 | 5.73k | actual_frac_part_count = -actual_int_part_count; |
192 | 5.73k | } |
193 | | // get fraction part number |
194 | 3.40M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |
195 | 3.06M | ++digit_index, ++actual_frac_part_count) { |
196 | 3.06M | frac_part_number = frac_part_number * 10 + digits[digit_index]; |
197 | 3.06M | } |
198 | 339k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
199 | | // there are still extra fraction digits left, check rounding |
200 | 339k | if (digit_index != total_digit_count) { |
201 | | // example: test 1.5 -> decimal(1, 0) |
202 | 81.4k | if (digits[digit_index] >= 5) { |
203 | 32.9k | ++frac_part_number; |
204 | 32.9k | if (frac_part_number == type_scale_multiplier) { |
205 | 3.43k | frac_part_number = 0; |
206 | 3.43k | ++int_part_number; |
207 | 3.43k | } |
208 | 32.9k | } |
209 | 258k | } else { |
210 | 258k | if (actual_frac_part_count < type_scale) { |
211 | 197k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
212 | 197k | } |
213 | 258k | } |
214 | 339k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
215 | 73 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
216 | 73 | return 0; |
217 | 73 | } |
218 | | |
219 | 339k | T value = int_part_number * type_scale_multiplier + frac_part_number; |
220 | 339k | *result = StringParser::PARSE_SUCCESS; |
221 | 339k | return is_negative ? T(-value) : T(value); |
222 | 339k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKciiiPNS0_11ParseResultE Line | Count | Source | 45 | 54.2k | ParseResult* result) { | 46 | 54.2k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 47 | 54.2k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 48 | 54.2k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 49 | 54.2k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 50 | 54.2k | "wide::Int256."); | 51 | | // Ignore leading and trailing spaces. | 52 | 54.2k | s = skip_ascii_whitespaces(s, len); | 53 | | | 54 | 54.2k | bool is_negative = false; | 55 | 54.2k | if (len > 0) { | 56 | 54.2k | switch (*s) { | 57 | 24.9k | case '-': | 58 | 24.9k | is_negative = true; | 59 | 24.9k | [[fallthrough]]; | 60 | 32.2k | case '+': | 61 | 32.2k | ++s; | 62 | 32.2k | --len; | 63 | 54.2k | } | 64 | 54.2k | } | 65 | | // Ignore leading zeros. | 66 | 54.2k | bool found_value = false; | 67 | 107k | while (len > 0 && UNLIKELY(*s == '0')) { | 68 | 53.6k | found_value = true; | 69 | 53.6k | ++s; | 70 | 53.6k | --len; | 71 | 53.6k | } | 72 | | | 73 | 54.2k | int found_dot = 0; | 74 | 54.2k | if (len > 0 && *s == '.') { | 75 | 17.9k | found_dot = 1; | 76 | 17.9k | ++s; | 77 | 17.9k | --len; | 78 | 17.9k | } | 79 | 54.2k | int int_part_count = 0; | 80 | 54.2k | std::vector<unsigned char> digits; | 81 | 54.2k | if (len > 0) { | 82 | 52.1k | digits.resize(len); | 83 | 52.1k | } | 84 | 54.2k | int total_digit_count = 0; | 85 | 54.2k | int i = 0; | 86 | 575k | for (; i != len; ++i) { | 87 | 530k | const char& c = s[i]; | 88 | 530k | if (LIKELY('0' <= c && c <= '9')) { | 89 | 506k | found_value = true; | 90 | 506k | digits[total_digit_count++] = c - '0'; | 91 | 506k | if (!found_dot) { | 92 | 159k | ++int_part_count; | 93 | 159k | } | 94 | 506k | } else if (c == '.') { | 95 | 14.2k | if (found_dot) { | 96 | 2 | *result = StringParser::PARSE_FAILURE; | 97 | 2 | return 0; | 98 | 2 | } | 99 | 14.2k | found_dot = 1; | 100 | 14.2k | } else { | 101 | 9.52k | break; | 102 | 9.52k | } | 103 | 530k | } | 104 | 54.2k | if (!found_value) { | 105 | | // '', '.' | 106 | 146 | *result = StringParser::PARSE_FAILURE; | 107 | 146 | return 0; | 108 | 146 | } | 109 | | // parse exponent if any | 110 | 54.1k | int64_t exponent = 0; | 111 | 54.1k | if (i != len) { | 112 | 9.39k | bool negative_exponent = false; | 113 | 9.39k | if (s[i] == 'e' || s[i] == 'E') { | 114 | 9.33k | ++i; | 115 | 9.33k | if (i != len) { | 116 | 9.33k | switch (s[i]) { | 117 | 1.54k | case '-': | 118 | 1.54k | negative_exponent = true; | 119 | 1.54k | [[fallthrough]]; | 120 | 1.54k | case '+': | 121 | 1.54k | ++i; | 122 | 9.33k | } | 123 | 9.33k | } | 124 | 9.33k | if (i == len) { | 125 | | // '123e', '123e+', '123e-' | 126 | 6 | *result = StringParser::PARSE_FAILURE; | 127 | 6 | return 0; | 128 | 6 | } | 129 | 24.6k | for (; i != len; ++i) { | 130 | 15.3k | const char& c = s[i]; | 131 | 15.3k | if (LIKELY('0' <= c && c <= '9')) { | 132 | 15.3k | exponent = exponent * 10 + (c - '0'); | 133 | | // max string len is config::string_type_length_soft_limit_bytes, | 134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 135 | | // just check overflow of int32_t to simplify the logic | 136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 137 | 15.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 138 | 0 | *result = StringParser::PARSE_OVERFLOW; | 139 | 0 | return 0; | 140 | 0 | } | 141 | 15.3k | } else { | 142 | | // '123e12abc', '123e1.2' | 143 | 12 | *result = StringParser::PARSE_FAILURE; | 144 | 12 | return 0; | 145 | 12 | } | 146 | 15.3k | } | 147 | 9.31k | if (negative_exponent) { | 148 | 1.53k | exponent = -exponent; | 149 | 1.53k | } | 150 | 9.31k | } else { | 151 | 60 | *result = StringParser::PARSE_FAILURE; | 152 | 60 | return 0; | 153 | 60 | } | 154 | 9.39k | } | 155 | 54.0k | T int_part_number = 0; | 156 | 54.0k | T frac_part_number = 0; | 157 | | // TODO: check limit values of exponent and add UT | 158 | | // max string len is config::string_type_length_soft_limit_bytes, | 159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 160 | | // so int_part_count will be in range of int32_t, | 161 | | // and int_part_count + exponent will be in range of int64_t | 162 | 54.0k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 163 | 54.0k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 164 | 54.0k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 165 | 0 | *result = StringParser::PARSE_OVERFLOW; | 166 | 0 | return 0; | 167 | 0 | } | 168 | 54.0k | int actual_int_part_count = tmp_actual_int_part_count; | 169 | 54.0k | int actual_frac_part_count = 0; | 170 | 54.0k | int digit_index = 0; | 171 | 54.0k | if (actual_int_part_count >= 0) { | 172 | 54.0k | int max_index = std::min(actual_int_part_count, total_digit_count); | 173 | | // skip zero number | 174 | 266k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 175 | 212k | } | 176 | | // test 0.00, .00, 0.{00...}e2147483647 | 177 | | // 0.00000e2147483647 | 178 | 54.0k | if (max_index - digit_index > type_precision - type_scale) { | 179 | 1.33k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 180 | 1.33k | return 0; | 181 | 1.33k | } | 182 | | // get int part number | 183 | 153k | for (; digit_index != max_index; ++digit_index) { | 184 | 100k | int_part_number = int_part_number * 10 + digits[digit_index]; | 185 | 100k | } | 186 | 52.6k | if (digit_index != actual_int_part_count) { | 187 | 100 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 188 | 100 | } | 189 | 52.6k | } else { | 190 | | // leading zeros of fraction part | 191 | 48 | actual_frac_part_count = -actual_int_part_count; | 192 | 48 | } | 193 | | // get fraction part number | 194 | 156k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 195 | 103k | ++digit_index, ++actual_frac_part_count) { | 196 | 103k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 197 | 103k | } | 198 | 52.7k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 199 | | // there are still extra fraction digits left, check rounding | 200 | 52.7k | if (digit_index != total_digit_count) { | 201 | | // example: test 1.5 -> decimal(1, 0) | 202 | 21.1k | if (digits[digit_index] >= 5) { | 203 | 8.96k | ++frac_part_number; | 204 | 8.96k | if (frac_part_number == type_scale_multiplier) { | 205 | 856 | frac_part_number = 0; | 206 | 856 | ++int_part_number; | 207 | 856 | } | 208 | 8.96k | } | 209 | 31.6k | } else { | 210 | 31.6k | if (actual_frac_part_count < type_scale) { | 211 | 28.3k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 212 | 28.3k | } | 213 | 31.6k | } | 214 | 52.7k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 215 | 24 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 216 | 24 | return 0; | 217 | 24 | } | 218 | | | 219 | 52.7k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 220 | 52.7k | *result = StringParser::PARSE_SUCCESS; | 221 | 52.7k | return is_negative ? T(-value) : T(value); | 222 | 52.7k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKciiiPNS0_11ParseResultE Line | Count | Source | 45 | 86.0k | ParseResult* result) { | 46 | 86.0k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 47 | 86.0k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 48 | 86.0k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 49 | 86.0k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 50 | 86.0k | "wide::Int256."); | 51 | | // Ignore leading and trailing spaces. | 52 | 86.0k | s = skip_ascii_whitespaces(s, len); | 53 | | | 54 | 86.0k | bool is_negative = false; | 55 | 86.0k | if (len > 0) { | 56 | 86.0k | switch (*s) { | 57 | 21.7k | case '-': | 58 | 21.7k | is_negative = true; | 59 | 21.7k | [[fallthrough]]; | 60 | 28.3k | case '+': | 61 | 28.3k | ++s; | 62 | 28.3k | --len; | 63 | 86.0k | } | 64 | 86.0k | } | 65 | | // Ignore leading zeros. | 66 | 86.0k | bool found_value = false; | 67 | 162k | while (len > 0 && UNLIKELY(*s == '0')) { | 68 | 75.9k | found_value = true; | 69 | 75.9k | ++s; | 70 | 75.9k | --len; | 71 | 75.9k | } | 72 | | | 73 | 86.0k | int found_dot = 0; | 74 | 86.0k | if (len > 0 && *s == '.') { | 75 | 23.8k | found_dot = 1; | 76 | 23.8k | ++s; | 77 | 23.8k | --len; | 78 | 23.8k | } | 79 | 86.0k | int int_part_count = 0; | 80 | 86.0k | std::vector<unsigned char> digits; | 81 | 86.0k | if (len > 0) { | 82 | 83.9k | digits.resize(len); | 83 | 83.9k | } | 84 | 86.0k | int total_digit_count = 0; | 85 | 86.0k | int i = 0; | 86 | 1.40M | for (; i != len; ++i) { | 87 | 1.32M | const char& c = s[i]; | 88 | 1.32M | if (LIKELY('0' <= c && c <= '9')) { | 89 | 1.26M | found_value = true; | 90 | 1.26M | digits[total_digit_count++] = c - '0'; | 91 | 1.26M | if (!found_dot) { | 92 | 521k | ++int_part_count; | 93 | 521k | } | 94 | 1.26M | } else if (c == '.') { | 95 | 53.5k | if (found_dot) { | 96 | 0 | *result = StringParser::PARSE_FAILURE; | 97 | 0 | return 0; | 98 | 0 | } | 99 | 53.5k | found_dot = 1; | 100 | 53.5k | } else { | 101 | 11.7k | break; | 102 | 11.7k | } | 103 | 1.32M | } | 104 | 86.0k | if (!found_value) { | 105 | | // '', '.' | 106 | 69 | *result = StringParser::PARSE_FAILURE; | 107 | 69 | return 0; | 108 | 69 | } | 109 | | // parse exponent if any | 110 | 85.9k | int64_t exponent = 0; | 111 | 85.9k | if (i != len) { | 112 | 11.6k | bool negative_exponent = false; | 113 | 11.6k | if (s[i] == 'e' || s[i] == 'E') { | 114 | 11.6k | ++i; | 115 | 11.6k | if (i != len) { | 116 | 11.6k | switch (s[i]) { | 117 | 3.91k | case '-': | 118 | 3.91k | negative_exponent = true; | 119 | 3.91k | [[fallthrough]]; | 120 | 3.91k | case '+': | 121 | 3.91k | ++i; | 122 | 11.6k | } | 123 | 11.6k | } | 124 | 11.6k | if (i == len) { | 125 | | // '123e', '123e+', '123e-' | 126 | 0 | *result = StringParser::PARSE_FAILURE; | 127 | 0 | return 0; | 128 | 0 | } | 129 | 32.5k | for (; i != len; ++i) { | 130 | 20.8k | const char& c = s[i]; | 131 | 20.8k | if (LIKELY('0' <= c && c <= '9')) { | 132 | 20.8k | exponent = exponent * 10 + (c - '0'); | 133 | | // max string len is config::string_type_length_soft_limit_bytes, | 134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 135 | | // just check overflow of int32_t to simplify the logic | 136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 137 | 20.8k | if (exponent > std::numeric_limits<int32_t>::max()) { | 138 | 0 | *result = StringParser::PARSE_OVERFLOW; | 139 | 0 | return 0; | 140 | 0 | } | 141 | 20.8k | } else { | 142 | | // '123e12abc', '123e1.2' | 143 | 0 | *result = StringParser::PARSE_FAILURE; | 144 | 0 | return 0; | 145 | 0 | } | 146 | 20.8k | } | 147 | 11.6k | if (negative_exponent) { | 148 | 3.91k | exponent = -exponent; | 149 | 3.91k | } | 150 | 11.6k | } else { | 151 | 23 | *result = StringParser::PARSE_FAILURE; | 152 | 23 | return 0; | 153 | 23 | } | 154 | 11.6k | } | 155 | 85.9k | T int_part_number = 0; | 156 | 85.9k | T frac_part_number = 0; | 157 | | // TODO: check limit values of exponent and add UT | 158 | | // max string len is config::string_type_length_soft_limit_bytes, | 159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 160 | | // so int_part_count will be in range of int32_t, | 161 | | // and int_part_count + exponent will be in range of int64_t | 162 | 85.9k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 163 | 85.9k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 164 | 85.9k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 165 | 0 | *result = StringParser::PARSE_OVERFLOW; | 166 | 0 | return 0; | 167 | 0 | } | 168 | 85.9k | int actual_int_part_count = tmp_actual_int_part_count; | 169 | 85.9k | int actual_frac_part_count = 0; | 170 | 85.9k | int digit_index = 0; | 171 | 85.9k | if (actual_int_part_count >= 0) { | 172 | 83.5k | int max_index = std::min(actual_int_part_count, total_digit_count); | 173 | | // skip zero number | 174 | 296k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 175 | 212k | } | 176 | | // test 0.00, .00, 0.{00...}e2147483647 | 177 | | // 0.00000e2147483647 | 178 | 83.5k | if (max_index - digit_index > type_precision - type_scale) { | 179 | 10.4k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 180 | 10.4k | return 0; | 181 | 10.4k | } | 182 | | // get int part number | 183 | 407k | for (; digit_index != max_index; ++digit_index) { | 184 | 334k | int_part_number = int_part_number * 10 + digits[digit_index]; | 185 | 334k | } | 186 | 73.1k | if (digit_index != actual_int_part_count) { | 187 | 77 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 188 | 77 | } | 189 | 73.1k | } else { | 190 | | // leading zeros of fraction part | 191 | 2.42k | actual_frac_part_count = -actual_int_part_count; | 192 | 2.42k | } | 193 | | // get fraction part number | 194 | 545k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 195 | 470k | ++digit_index, ++actual_frac_part_count) { | 196 | 470k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 197 | 470k | } | 198 | 75.5k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 199 | | // there are still extra fraction digits left, check rounding | 200 | 75.5k | if (digit_index != total_digit_count) { | 201 | | // example: test 1.5 -> decimal(1, 0) | 202 | 19.8k | if (digits[digit_index] >= 5) { | 203 | 7.95k | ++frac_part_number; | 204 | 7.95k | if (frac_part_number == type_scale_multiplier) { | 205 | 836 | frac_part_number = 0; | 206 | 836 | ++int_part_number; | 207 | 836 | } | 208 | 7.95k | } | 209 | 55.6k | } else { | 210 | 55.6k | if (actual_frac_part_count < type_scale) { | 211 | 32.0k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 212 | 32.0k | } | 213 | 55.6k | } | 214 | 75.5k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 215 | 17 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 216 | 17 | return 0; | 217 | 17 | } | 218 | | | 219 | 75.5k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 220 | 75.5k | *result = StringParser::PARSE_SUCCESS; | 221 | 75.5k | return is_negative ? T(-value) : T(value); | 222 | 75.5k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKciiiPNS0_11ParseResultE Line | Count | Source | 45 | 82.4k | ParseResult* result) { | 46 | 82.4k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 47 | 82.4k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 48 | 82.4k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 49 | 82.4k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 50 | 82.4k | "wide::Int256."); | 51 | | // Ignore leading and trailing spaces. | 52 | 82.4k | s = skip_ascii_whitespaces(s, len); | 53 | | | 54 | 82.4k | bool is_negative = false; | 55 | 82.4k | if (len > 0) { | 56 | 82.4k | switch (*s) { | 57 | 21.7k | case '-': | 58 | 21.7k | is_negative = true; | 59 | 21.7k | [[fallthrough]]; | 60 | 28.3k | case '+': | 61 | 28.3k | ++s; | 62 | 28.3k | --len; | 63 | 82.4k | } | 64 | 82.4k | } | 65 | | // Ignore leading zeros. | 66 | 82.4k | bool found_value = false; | 67 | 157k | while (len > 0 && UNLIKELY(*s == '0')) { | 68 | 74.6k | found_value = true; | 69 | 74.6k | ++s; | 70 | 74.6k | --len; | 71 | 74.6k | } | 72 | | | 73 | 82.4k | int found_dot = 0; | 74 | 82.4k | if (len > 0 && *s == '.') { | 75 | 24.8k | found_dot = 1; | 76 | 24.8k | ++s; | 77 | 24.8k | --len; | 78 | 24.8k | } | 79 | 82.4k | int int_part_count = 0; | 80 | 82.4k | std::vector<unsigned char> digits; | 81 | 82.4k | if (len > 0) { | 82 | 80.3k | digits.resize(len); | 83 | 80.3k | } | 84 | 82.4k | int total_digit_count = 0; | 85 | 82.4k | int i = 0; | 86 | 2.04M | for (; i != len; ++i) { | 87 | 1.97M | const char& c = s[i]; | 88 | 1.97M | if (LIKELY('0' <= c && c <= '9')) { | 89 | 1.91M | found_value = true; | 90 | 1.91M | digits[total_digit_count++] = c - '0'; | 91 | 1.91M | if (!found_dot) { | 92 | 558k | ++int_part_count; | 93 | 558k | } | 94 | 1.91M | } else if (c == '.') { | 95 | 49.6k | if (found_dot) { | 96 | 0 | *result = StringParser::PARSE_FAILURE; | 97 | 0 | return 0; | 98 | 0 | } | 99 | 49.6k | found_dot = 1; | 100 | 49.6k | } else { | 101 | 12.5k | break; | 102 | 12.5k | } | 103 | 1.97M | } | 104 | 82.4k | if (!found_value) { | 105 | | // '', '.' | 106 | 53 | *result = StringParser::PARSE_FAILURE; | 107 | 53 | return 0; | 108 | 53 | } | 109 | | // parse exponent if any | 110 | 82.3k | int64_t exponent = 0; | 111 | 82.3k | if (i != len) { | 112 | 12.4k | bool negative_exponent = false; | 113 | 12.4k | if (s[i] == 'e' || s[i] == 'E') { | 114 | 12.4k | ++i; | 115 | 12.4k | if (i != len) { | 116 | 12.4k | switch (s[i]) { | 117 | 4.70k | case '-': | 118 | 4.70k | negative_exponent = true; | 119 | 4.70k | [[fallthrough]]; | 120 | 4.70k | case '+': | 121 | 4.70k | ++i; | 122 | 12.4k | } | 123 | 12.4k | } | 124 | 12.4k | if (i == len) { | 125 | | // '123e', '123e+', '123e-' | 126 | 0 | *result = StringParser::PARSE_FAILURE; | 127 | 0 | return 0; | 128 | 0 | } | 129 | 35.9k | for (; i != len; ++i) { | 130 | 23.4k | const char& c = s[i]; | 131 | 23.4k | if (LIKELY('0' <= c && c <= '9')) { | 132 | 23.4k | exponent = exponent * 10 + (c - '0'); | 133 | | // max string len is config::string_type_length_soft_limit_bytes, | 134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 135 | | // just check overflow of int32_t to simplify the logic | 136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 137 | 23.4k | if (exponent > std::numeric_limits<int32_t>::max()) { | 138 | 0 | *result = StringParser::PARSE_OVERFLOW; | 139 | 0 | return 0; | 140 | 0 | } | 141 | 23.4k | } else { | 142 | | // '123e12abc', '123e1.2' | 143 | 0 | *result = StringParser::PARSE_FAILURE; | 144 | 0 | return 0; | 145 | 0 | } | 146 | 23.4k | } | 147 | 12.4k | if (negative_exponent) { | 148 | 4.70k | exponent = -exponent; | 149 | 4.70k | } | 150 | 12.4k | } else { | 151 | 12 | *result = StringParser::PARSE_FAILURE; | 152 | 12 | return 0; | 153 | 12 | } | 154 | 12.4k | } | 155 | 82.3k | T int_part_number = 0; | 156 | 82.3k | T frac_part_number = 0; | 157 | | // TODO: check limit values of exponent and add UT | 158 | | // max string len is config::string_type_length_soft_limit_bytes, | 159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 160 | | // so int_part_count will be in range of int32_t, | 161 | | // and int_part_count + exponent will be in range of int64_t | 162 | 82.3k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 163 | 82.3k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 164 | 82.3k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 165 | 0 | *result = StringParser::PARSE_OVERFLOW; | 166 | 0 | return 0; | 167 | 0 | } | 168 | 82.3k | int actual_int_part_count = tmp_actual_int_part_count; | 169 | 82.3k | int actual_frac_part_count = 0; | 170 | 82.3k | int digit_index = 0; | 171 | 82.3k | if (actual_int_part_count >= 0) { | 172 | 79.1k | int max_index = std::min(actual_int_part_count, total_digit_count); | 173 | | // skip zero number | 174 | 293k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 175 | 213k | } | 176 | | // test 0.00, .00, 0.{00...}e2147483647 | 177 | | // 0.00000e2147483647 | 178 | 79.1k | if (max_index - digit_index > type_precision - type_scale) { | 179 | 140 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 180 | 140 | return 0; | 181 | 140 | } | 182 | | // get int part number | 183 | 596k | for (; digit_index != max_index; ++digit_index) { | 184 | 517k | int_part_number = int_part_number * 10 + digits[digit_index]; | 185 | 517k | } | 186 | 79.0k | if (digit_index != actual_int_part_count) { | 187 | 76 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 188 | 76 | } | 189 | 79.0k | } else { | 190 | | // leading zeros of fraction part | 191 | 3.21k | actual_frac_part_count = -actual_int_part_count; | 192 | 3.21k | } | 193 | | // get fraction part number | 194 | 1.15M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 195 | 1.07M | ++digit_index, ++actual_frac_part_count) { | 196 | 1.07M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 197 | 1.07M | } | 198 | 82.2k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 199 | | // there are still extra fraction digits left, check rounding | 200 | 82.2k | if (digit_index != total_digit_count) { | 201 | | // example: test 1.5 -> decimal(1, 0) | 202 | 21.5k | if (digits[digit_index] >= 5) { | 203 | 8.03k | ++frac_part_number; | 204 | 8.03k | if (frac_part_number == type_scale_multiplier) { | 205 | 908 | frac_part_number = 0; | 206 | 908 | ++int_part_number; | 207 | 908 | } | 208 | 8.03k | } | 209 | 60.6k | } else { | 210 | 60.6k | if (actual_frac_part_count < type_scale) { | 211 | 45.5k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 212 | 45.5k | } | 213 | 60.6k | } | 214 | 82.2k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 215 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 216 | 16 | return 0; | 217 | 16 | } | 218 | | | 219 | 82.2k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 220 | 82.2k | *result = StringParser::PARSE_SUCCESS; | 221 | 82.2k | return is_negative ? T(-value) : T(value); | 222 | 82.2k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKciiiPNS0_11ParseResultE Line | Count | Source | 45 | 13.5k | ParseResult* result) { | 46 | 13.5k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 47 | 13.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 48 | 13.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 49 | 13.5k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 50 | 13.5k | "wide::Int256."); | 51 | | // Ignore leading and trailing spaces. | 52 | 13.5k | s = skip_ascii_whitespaces(s, len); | 53 | | | 54 | 13.5k | bool is_negative = false; | 55 | 13.5k | if (len > 0) { | 56 | 13.5k | switch (*s) { | 57 | 6.68k | case '-': | 58 | 6.68k | is_negative = true; | 59 | 6.68k | [[fallthrough]]; | 60 | 6.68k | case '+': | 61 | 6.68k | ++s; | 62 | 6.68k | --len; | 63 | 13.5k | } | 64 | 13.5k | } | 65 | | // Ignore leading zeros. | 66 | 13.5k | bool found_value = false; | 67 | 52.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 68 | 38.8k | found_value = true; | 69 | 38.8k | ++s; | 70 | 38.8k | --len; | 71 | 38.8k | } | 72 | | | 73 | 13.5k | int found_dot = 0; | 74 | 13.5k | if (len > 0 && *s == '.') { | 75 | 2.00k | found_dot = 1; | 76 | 2.00k | ++s; | 77 | 2.00k | --len; | 78 | 2.00k | } | 79 | 13.5k | int int_part_count = 0; | 80 | 13.5k | std::vector<unsigned char> digits; | 81 | 13.5k | if (len > 0) { | 82 | 13.5k | digits.resize(len); | 83 | 13.5k | } | 84 | 13.5k | int total_digit_count = 0; | 85 | 13.5k | int i = 0; | 86 | 279k | for (; i != len; ++i) { | 87 | 266k | const char& c = s[i]; | 88 | 266k | if (LIKELY('0' <= c && c <= '9')) { | 89 | 254k | found_value = true; | 90 | 254k | digits[total_digit_count++] = c - '0'; | 91 | 254k | if (!found_dot) { | 92 | 136k | ++int_part_count; | 93 | 136k | } | 94 | 254k | } else if (c == '.') { | 95 | 11.4k | if (found_dot) { | 96 | 0 | *result = StringParser::PARSE_FAILURE; | 97 | 0 | return 0; | 98 | 0 | } | 99 | 11.4k | found_dot = 1; | 100 | 11.4k | } else { | 101 | 16 | break; | 102 | 16 | } | 103 | 266k | } | 104 | 13.5k | if (!found_value) { | 105 | | // '', '.' | 106 | 10 | *result = StringParser::PARSE_FAILURE; | 107 | 10 | return 0; | 108 | 10 | } | 109 | | // parse exponent if any | 110 | 13.5k | int64_t exponent = 0; | 111 | 13.5k | if (i != len) { | 112 | 6 | bool negative_exponent = false; | 113 | 6 | if (s[i] == 'e' || s[i] == 'E') { | 114 | 0 | ++i; | 115 | 0 | if (i != len) { | 116 | 0 | switch (s[i]) { | 117 | 0 | case '-': | 118 | 0 | negative_exponent = true; | 119 | 0 | [[fallthrough]]; | 120 | 0 | case '+': | 121 | 0 | ++i; | 122 | 0 | } | 123 | 0 | } | 124 | 0 | if (i == len) { | 125 | | // '123e', '123e+', '123e-' | 126 | 0 | *result = StringParser::PARSE_FAILURE; | 127 | 0 | return 0; | 128 | 0 | } | 129 | 0 | for (; i != len; ++i) { | 130 | 0 | const char& c = s[i]; | 131 | 0 | if (LIKELY('0' <= c && c <= '9')) { | 132 | 0 | exponent = exponent * 10 + (c - '0'); | 133 | | // max string len is config::string_type_length_soft_limit_bytes, | 134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 135 | | // just check overflow of int32_t to simplify the logic | 136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 137 | 0 | if (exponent > std::numeric_limits<int32_t>::max()) { | 138 | 0 | *result = StringParser::PARSE_OVERFLOW; | 139 | 0 | return 0; | 140 | 0 | } | 141 | 0 | } else { | 142 | | // '123e12abc', '123e1.2' | 143 | 0 | *result = StringParser::PARSE_FAILURE; | 144 | 0 | return 0; | 145 | 0 | } | 146 | 0 | } | 147 | 0 | if (negative_exponent) { | 148 | 0 | exponent = -exponent; | 149 | 0 | } | 150 | 6 | } else { | 151 | 6 | *result = StringParser::PARSE_FAILURE; | 152 | 6 | return 0; | 153 | 6 | } | 154 | 6 | } | 155 | 13.5k | T int_part_number = 0; | 156 | 13.5k | T frac_part_number = 0; | 157 | | // TODO: check limit values of exponent and add UT | 158 | | // max string len is config::string_type_length_soft_limit_bytes, | 159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 160 | | // so int_part_count will be in range of int32_t, | 161 | | // and int_part_count + exponent will be in range of int64_t | 162 | 13.5k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 163 | 13.5k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 164 | 13.5k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 165 | 0 | *result = StringParser::PARSE_OVERFLOW; | 166 | 0 | return 0; | 167 | 0 | } | 168 | 13.5k | int actual_int_part_count = tmp_actual_int_part_count; | 169 | 13.5k | int actual_frac_part_count = 0; | 170 | 13.5k | int digit_index = 0; | 171 | 13.5k | if (actual_int_part_count >= 0) { | 172 | 13.5k | int max_index = std::min(actual_int_part_count, total_digit_count); | 173 | | // skip zero number | 174 | 13.5k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 175 | 0 | } | 176 | | // test 0.00, .00, 0.{00...}e2147483647 | 177 | | // 0.00000e2147483647 | 178 | 13.5k | if (max_index - digit_index > type_precision - type_scale) { | 179 | 8 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 180 | 8 | return 0; | 181 | 8 | } | 182 | | // get int part number | 183 | 149k | for (; digit_index != max_index; ++digit_index) { | 184 | 136k | int_part_number = int_part_number * 10 + digits[digit_index]; | 185 | 136k | } | 186 | 13.5k | if (digit_index != actual_int_part_count) { | 187 | 0 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 188 | 0 | } | 189 | 13.5k | } else { | 190 | | // leading zeros of fraction part | 191 | 0 | actual_frac_part_count = -actual_int_part_count; | 192 | 0 | } | 193 | | // get fraction part number | 194 | 131k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 195 | 118k | ++digit_index, ++actual_frac_part_count) { | 196 | 118k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 197 | 118k | } | 198 | 13.5k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 199 | | // there are still extra fraction digits left, check rounding | 200 | 13.5k | if (digit_index != total_digit_count) { | 201 | | // example: test 1.5 -> decimal(1, 0) | 202 | 17 | if (digits[digit_index] >= 5) { | 203 | 17 | ++frac_part_number; | 204 | 17 | if (frac_part_number == type_scale_multiplier) { | 205 | 0 | frac_part_number = 0; | 206 | 0 | ++int_part_number; | 207 | 0 | } | 208 | 17 | } | 209 | 13.5k | } else { | 210 | 13.5k | if (actual_frac_part_count < type_scale) { | 211 | 1.95k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 212 | 1.95k | } | 213 | 13.5k | } | 214 | 13.5k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 215 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 216 | 0 | return 0; | 217 | 0 | } | 218 | | | 219 | 13.5k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 220 | 13.5k | *result = StringParser::PARSE_SUCCESS; | 221 | 13.5k | return is_negative ? T(-value) : T(value); | 222 | 13.5k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKciiiPNS0_11ParseResultE Line | Count | Source | 45 | 115k | ParseResult* result) { | 46 | 115k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 47 | 115k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 48 | 115k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 49 | 115k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 50 | 115k | "wide::Int256."); | 51 | | // Ignore leading and trailing spaces. | 52 | 115k | s = skip_ascii_whitespaces(s, len); | 53 | | | 54 | 115k | bool is_negative = false; | 55 | 115k | if (len > 0) { | 56 | 115k | switch (*s) { | 57 | 17.3k | case '-': | 58 | 17.3k | is_negative = true; | 59 | 17.3k | [[fallthrough]]; | 60 | 23.9k | case '+': | 61 | 23.9k | ++s; | 62 | 23.9k | --len; | 63 | 115k | } | 64 | 115k | } | 65 | | // Ignore leading zeros. | 66 | 115k | bool found_value = false; | 67 | 209k | while (len > 0 && UNLIKELY(*s == '0')) { | 68 | 94.0k | found_value = true; | 69 | 94.0k | ++s; | 70 | 94.0k | --len; | 71 | 94.0k | } | 72 | | | 73 | 115k | int found_dot = 0; | 74 | 115k | if (len > 0 && *s == '.') { | 75 | 15.8k | found_dot = 1; | 76 | 15.8k | ++s; | 77 | 15.8k | --len; | 78 | 15.8k | } | 79 | 115k | int int_part_count = 0; | 80 | 115k | std::vector<unsigned char> digits; | 81 | 115k | if (len > 0) { | 82 | 113k | digits.resize(len); | 83 | 113k | } | 84 | 115k | int total_digit_count = 0; | 85 | 115k | int i = 0; | 86 | 3.81M | for (; i != len; ++i) { | 87 | 3.77M | const char& c = s[i]; | 88 | 3.77M | if (LIKELY('0' <= c && c <= '9')) { | 89 | 3.60M | found_value = true; | 90 | 3.60M | digits[total_digit_count++] = c - '0'; | 91 | 3.60M | if (!found_dot) { | 92 | 957k | ++int_part_count; | 93 | 957k | } | 94 | 3.60M | } else if (c == '.') { | 95 | 93.6k | if (found_dot) { | 96 | 0 | *result = StringParser::PARSE_FAILURE; | 97 | 0 | return 0; | 98 | 0 | } | 99 | 93.6k | found_dot = 1; | 100 | 93.6k | } else { | 101 | 77.9k | break; | 102 | 77.9k | } | 103 | 3.77M | } | 104 | 115k | if (!found_value) { | 105 | | // '', '.' | 106 | 66 | *result = StringParser::PARSE_FAILURE; | 107 | 66 | return 0; | 108 | 66 | } | 109 | | // parse exponent if any | 110 | 115k | int64_t exponent = 0; | 111 | 115k | if (i != len) { | 112 | 77.8k | bool negative_exponent = false; | 113 | 77.8k | if (s[i] == 'e' || s[i] == 'E') { | 114 | 77.8k | ++i; | 115 | 77.8k | if (i != len) { | 116 | 77.8k | switch (s[i]) { | 117 | 1.53k | case '-': | 118 | 1.53k | negative_exponent = true; | 119 | 1.53k | [[fallthrough]]; | 120 | 70.0k | case '+': | 121 | 70.0k | ++i; | 122 | 77.8k | } | 123 | 77.8k | } | 124 | 77.8k | if (i == len) { | 125 | | // '123e', '123e+', '123e-' | 126 | 0 | *result = StringParser::PARSE_FAILURE; | 127 | 0 | return 0; | 128 | 0 | } | 129 | 232k | for (; i != len; ++i) { | 130 | 154k | const char& c = s[i]; | 131 | 154k | if (LIKELY('0' <= c && c <= '9')) { | 132 | 154k | exponent = exponent * 10 + (c - '0'); | 133 | | // max string len is config::string_type_length_soft_limit_bytes, | 134 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 135 | | // just check overflow of int32_t to simplify the logic | 136 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 137 | 154k | if (exponent > std::numeric_limits<int32_t>::max()) { | 138 | 0 | *result = StringParser::PARSE_OVERFLOW; | 139 | 0 | return 0; | 140 | 0 | } | 141 | 154k | } else { | 142 | | // '123e12abc', '123e1.2' | 143 | 10 | *result = StringParser::PARSE_FAILURE; | 144 | 10 | return 0; | 145 | 10 | } | 146 | 154k | } | 147 | 77.8k | if (negative_exponent) { | 148 | 1.53k | exponent = -exponent; | 149 | 1.53k | } | 150 | 77.8k | } else { | 151 | 14 | *result = StringParser::PARSE_FAILURE; | 152 | 14 | return 0; | 153 | 14 | } | 154 | 77.8k | } | 155 | 115k | T int_part_number = 0; | 156 | 115k | T frac_part_number = 0; | 157 | | // TODO: check limit values of exponent and add UT | 158 | | // max string len is config::string_type_length_soft_limit_bytes, | 159 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 160 | | // so int_part_count will be in range of int32_t, | 161 | | // and int_part_count + exponent will be in range of int64_t | 162 | 115k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 163 | 115k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 164 | 115k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 165 | 0 | *result = StringParser::PARSE_OVERFLOW; | 166 | 0 | return 0; | 167 | 0 | } | 168 | 115k | int actual_int_part_count = tmp_actual_int_part_count; | 169 | 115k | int actual_frac_part_count = 0; | 170 | 115k | int digit_index = 0; | 171 | 115k | if (actual_int_part_count >= 0) { | 172 | 115k | int max_index = std::min(actual_int_part_count, total_digit_count); | 173 | | // skip zero number | 174 | 332k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 175 | 216k | } | 176 | | // test 0.00, .00, 0.{00...}e2147483647 | 177 | | // 0.00000e2147483647 | 178 | 115k | if (max_index - digit_index > type_precision - type_scale) { | 179 | 112 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 180 | 112 | return 0; | 181 | 112 | } | 182 | | // get int part number | 183 | 2.09M | for (; digit_index != max_index; ++digit_index) { | 184 | 1.97M | int_part_number = int_part_number * 10 + digits[digit_index]; | 185 | 1.97M | } | 186 | 115k | if (digit_index != actual_int_part_count) { | 187 | 66.4k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 188 | 66.4k | } | 189 | 115k | } else { | 190 | | // leading zeros of fraction part | 191 | 48 | actual_frac_part_count = -actual_int_part_count; | 192 | 48 | } | 193 | | // get fraction part number | 194 | 1.41M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 195 | 1.29M | ++digit_index, ++actual_frac_part_count) { | 196 | 1.29M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 197 | 1.29M | } | 198 | 115k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 199 | | // there are still extra fraction digits left, check rounding | 200 | 115k | if (digit_index != total_digit_count) { | 201 | | // example: test 1.5 -> decimal(1, 0) | 202 | 18.8k | if (digits[digit_index] >= 5) { | 203 | 7.94k | ++frac_part_number; | 204 | 7.94k | if (frac_part_number == type_scale_multiplier) { | 205 | 836 | frac_part_number = 0; | 206 | 836 | ++int_part_number; | 207 | 836 | } | 208 | 7.94k | } | 209 | 96.7k | } else { | 210 | 96.7k | if (actual_frac_part_count < type_scale) { | 211 | 89.3k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 212 | 89.3k | } | 213 | 96.7k | } | 214 | 115k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 215 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 216 | 16 | return 0; | 217 | 16 | } | 218 | | | 219 | 115k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 220 | 115k | *result = StringParser::PARSE_SUCCESS; | 221 | 115k | return is_negative ? T(-value) : T(value); | 222 | 115k | } |
|
223 | | template vectorized::Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
224 | | const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
225 | | template vectorized::Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
226 | | const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
227 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
228 | | const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
229 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
230 | | const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
231 | | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
232 | | const char* __restrict s, int len, int type_precision, int type_scale, ParseResult* result); |
233 | | } // end namespace doris |