/root/doris/be/src/util/string_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "util/string_parser.hpp" |
19 | | |
20 | | #include <limits> |
21 | | |
22 | | #include "vec/core/extended_types.h" |
23 | | namespace doris { |
24 | | #include "common/compile_check_avoid_begin.h" |
25 | | // Supported decimal number format: |
26 | | // <decimal> ::= <whitespace>* <value> <whitespace>* |
27 | | // |
28 | | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" |
29 | | // |
30 | | // <value> ::= <sign>? <significand> <exponent>? |
31 | | // |
32 | | // <sign> ::= "+" | "-" |
33 | | // |
34 | | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> |
35 | | // |
36 | | // <digits> ::= <digit>+ |
37 | | // |
38 | | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
39 | | // |
40 | | // <exponent> ::= <e_marker> <sign>? <digits> |
41 | | // |
42 | | // <e_marker> ::= "e" | "E" |
43 | | template <PrimitiveType P> |
44 | | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( |
45 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
46 | 356k | ParseResult* result) { |
47 | 356k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |
48 | 356k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
49 | 356k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
50 | 356k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
51 | 356k | "wide::Int256."); |
52 | | // Ignore leading and trailing spaces. |
53 | 356k | s = skip_ascii_whitespaces(s, len); |
54 | | |
55 | 356k | bool is_negative = false; |
56 | 356k | if (len > 0) { |
57 | 356k | switch (*s) { |
58 | 92.6k | case '-': |
59 | 92.6k | is_negative = true; |
60 | 92.6k | [[fallthrough]]; |
61 | 119k | case '+': |
62 | 119k | ++s; |
63 | 119k | --len; |
64 | 356k | } |
65 | 356k | } |
66 | | // Ignore leading zeros. |
67 | 356k | bool found_value = false; |
68 | 694k | while (len > 0 && UNLIKELY(*s == '0')) { |
69 | 337k | found_value = true; |
70 | 337k | ++s; |
71 | 337k | --len; |
72 | 337k | } |
73 | | |
74 | 356k | int found_dot = 0; |
75 | 356k | if (len > 0 && *s == '.') { |
76 | 84.9k | found_dot = 1; |
77 | 84.9k | ++s; |
78 | 84.9k | --len; |
79 | 84.9k | } |
80 | 356k | int int_part_count = 0; |
81 | 356k | std::vector<unsigned char> digits; |
82 | 356k | if (len > 0) { |
83 | 348k | digits.resize(len); |
84 | 348k | } |
85 | 356k | int total_digit_count = 0; |
86 | 356k | int i = 0; |
87 | 8.27M | for (; i != len; ++i) { |
88 | 8.03M | const char& c = s[i]; |
89 | 8.03M | if (LIKELY('0' <= c && c <= '9')) { |
90 | 7.69M | found_value = true; |
91 | 7.69M | digits[total_digit_count++] = c - '0'; |
92 | 7.69M | if (!found_dot) { |
93 | 2.37M | ++int_part_count; |
94 | 2.37M | } |
95 | 7.69M | } else if (c == '.') { |
96 | 226k | if (found_dot) { |
97 | 2 | *result = StringParser::PARSE_FAILURE; |
98 | 2 | return 0; |
99 | 2 | } |
100 | 226k | found_dot = 1; |
101 | 226k | } else { |
102 | 112k | break; |
103 | 112k | } |
104 | 8.03M | } |
105 | 356k | if (!found_value) { |
106 | | // '', '.' |
107 | 750 | *result = StringParser::PARSE_FAILURE; |
108 | 750 | return 0; |
109 | 750 | } |
110 | | // parse exponent if any |
111 | 356k | int64_t exponent = 0; |
112 | 356k | if (i != len) { |
113 | 111k | bool negative_exponent = false; |
114 | 111k | if (s[i] == 'e' || s[i] == 'E') { |
115 | 111k | ++i; |
116 | 111k | if (i != len) { |
117 | 111k | switch (s[i]) { |
118 | 11.6k | case '-': |
119 | 11.6k | negative_exponent = true; |
120 | 11.6k | [[fallthrough]]; |
121 | 80.2k | case '+': |
122 | 80.2k | ++i; |
123 | 111k | } |
124 | 111k | } |
125 | 111k | if (i == len) { |
126 | | // '123e', '123e+', '123e-' |
127 | 6 | *result = StringParser::PARSE_FAILURE; |
128 | 6 | return 0; |
129 | 6 | } |
130 | 325k | for (; i != len; ++i) { |
131 | 213k | const char& c = s[i]; |
132 | 213k | if (LIKELY('0' <= c && c <= '9')) { |
133 | 213k | exponent = exponent * 10 + (c - '0'); |
134 | | // max string len is config::string_type_length_soft_limit_bytes, |
135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
136 | | // just check overflow of int32_t to simplify the logic |
137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 |
138 | 213k | if (exponent > std::numeric_limits<int32_t>::max()) { |
139 | 0 | *result = StringParser::PARSE_OVERFLOW; |
140 | 0 | return 0; |
141 | 0 | } |
142 | 213k | } else { |
143 | | // '123e12abc', '123e1.2' |
144 | 22 | *result = StringParser::PARSE_FAILURE; |
145 | 22 | return 0; |
146 | 22 | } |
147 | 213k | } |
148 | 111k | if (negative_exponent) { |
149 | 11.6k | exponent = -exponent; |
150 | 11.6k | } |
151 | 111k | } else { |
152 | 117 | *result = StringParser::PARSE_FAILURE; |
153 | 117 | return 0; |
154 | 117 | } |
155 | 111k | } |
156 | 355k | T int_part_number = 0; |
157 | 355k | T frac_part_number = 0; |
158 | | // TODO: check limit values of exponent and add UT |
159 | | // max string len is config::string_type_length_soft_limit_bytes, |
160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, |
161 | | // so int_part_count will be in range of int32_t, |
162 | | // and int_part_count + exponent will be in range of int64_t |
163 | 355k | int64_t tmp_actual_int_part_count = int_part_count + exponent; |
164 | 355k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |
165 | 355k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |
166 | 0 | *result = StringParser::PARSE_OVERFLOW; |
167 | 0 | return 0; |
168 | 0 | } |
169 | 355k | int actual_int_part_count = tmp_actual_int_part_count; |
170 | 355k | int actual_frac_part_count = 0; |
171 | 355k | int digit_index = 0; |
172 | 355k | if (actual_int_part_count >= 0) { |
173 | 350k | int max_index = std::min(actual_int_part_count, total_digit_count); |
174 | | // skip zero number |
175 | 1.20M | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |
176 | 855k | } |
177 | | // test 0.00, .00, 0.{00...}e2147483647 |
178 | | // 0.00000e2147483647 |
179 | 350k | if (max_index - digit_index > type_precision - type_scale) { |
180 | 12.0k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
181 | 12.0k | return 0; |
182 | 12.0k | } |
183 | | // get int part number |
184 | 3.44M | for (; digit_index != max_index; ++digit_index) { |
185 | 3.10M | int_part_number = int_part_number * 10 + digits[digit_index]; |
186 | 3.10M | } |
187 | 338k | if (digit_index != actual_int_part_count) { |
188 | 66.7k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |
189 | 66.7k | } |
190 | 338k | } else { |
191 | | // leading zeros of fraction part |
192 | 5.73k | actual_frac_part_count = -actual_int_part_count; |
193 | 5.73k | } |
194 | | // get fraction part number |
195 | 3.51M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |
196 | 3.17M | ++digit_index, ++actual_frac_part_count) { |
197 | 3.17M | frac_part_number = frac_part_number * 10 + digits[digit_index]; |
198 | 3.17M | } |
199 | 343k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |
200 | | // there are still extra fraction digits left, check rounding |
201 | 343k | if (digit_index != total_digit_count) { |
202 | | // example: test 1.5 -> decimal(1, 0) |
203 | 81.4k | if (digits[digit_index] >= 5) { |
204 | 32.9k | ++frac_part_number; |
205 | 32.9k | if (frac_part_number == type_scale_multiplier) { |
206 | 3.43k | frac_part_number = 0; |
207 | 3.43k | ++int_part_number; |
208 | 3.43k | } |
209 | 32.9k | } |
210 | 262k | } else { |
211 | 262k | if (actual_frac_part_count < type_scale) { |
212 | 197k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |
213 | 197k | } |
214 | 262k | } |
215 | 343k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |
216 | 73 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |
217 | 73 | return 0; |
218 | 73 | } |
219 | | |
220 | 343k | T value = int_part_number * type_scale_multiplier + frac_part_number; |
221 | 343k | *result = StringParser::PARSE_SUCCESS; |
222 | 343k | return is_negative ? T(-value) : T(value); |
223 | 343k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 54.8k | ParseResult* result) { | 47 | 54.8k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 54.8k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 54.8k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 54.8k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 54.8k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 54.8k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 54.8k | bool is_negative = false; | 56 | 54.8k | if (len > 0) { | 57 | 54.8k | switch (*s) { | 58 | 25.1k | case '-': | 59 | 25.1k | is_negative = true; | 60 | 25.1k | [[fallthrough]]; | 61 | 32.4k | case '+': | 62 | 32.4k | ++s; | 63 | 32.4k | --len; | 64 | 54.8k | } | 65 | 54.8k | } | 66 | | // Ignore leading zeros. | 67 | 54.8k | bool found_value = false; | 68 | 108k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 53.6k | found_value = true; | 70 | 53.6k | ++s; | 71 | 53.6k | --len; | 72 | 53.6k | } | 73 | | | 74 | 54.8k | int found_dot = 0; | 75 | 54.8k | if (len > 0 && *s == '.') { | 76 | 17.9k | found_dot = 1; | 77 | 17.9k | ++s; | 78 | 17.9k | --len; | 79 | 17.9k | } | 80 | 54.8k | int int_part_count = 0; | 81 | 54.8k | std::vector<unsigned char> digits; | 82 | 54.8k | if (len > 0) { | 83 | 52.6k | digits.resize(len); | 84 | 52.6k | } | 85 | 54.8k | int total_digit_count = 0; | 86 | 54.8k | int i = 0; | 87 | 579k | for (; i != len; ++i) { | 88 | 534k | const char& c = s[i]; | 89 | 534k | if (LIKELY('0' <= c && c <= '9')) { | 90 | 509k | found_value = true; | 91 | 509k | digits[total_digit_count++] = c - '0'; | 92 | 509k | if (!found_dot) { | 93 | 159k | ++int_part_count; | 94 | 159k | } | 95 | 509k | } else if (c == '.') { | 96 | 14.7k | if (found_dot) { | 97 | 2 | *result = StringParser::PARSE_FAILURE; | 98 | 2 | return 0; | 99 | 2 | } | 100 | 14.6k | found_dot = 1; | 101 | 14.6k | } else { | 102 | 9.56k | break; | 103 | 9.56k | } | 104 | 534k | } | 105 | 54.8k | if (!found_value) { | 106 | | // '', '.' | 107 | 190 | *result = StringParser::PARSE_FAILURE; | 108 | 190 | return 0; | 109 | 190 | } | 110 | | // parse exponent if any | 111 | 54.6k | int64_t exponent = 0; | 112 | 54.6k | if (i != len) { | 113 | 9.39k | bool negative_exponent = false; | 114 | 9.39k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 9.33k | ++i; | 116 | 9.33k | if (i != len) { | 117 | 9.33k | switch (s[i]) { | 118 | 1.54k | case '-': | 119 | 1.54k | negative_exponent = true; | 120 | 1.54k | [[fallthrough]]; | 121 | 1.54k | case '+': | 122 | 1.54k | ++i; | 123 | 9.33k | } | 124 | 9.33k | } | 125 | 9.33k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 6 | *result = StringParser::PARSE_FAILURE; | 128 | 6 | return 0; | 129 | 6 | } | 130 | 24.6k | for (; i != len; ++i) { | 131 | 15.3k | const char& c = s[i]; | 132 | 15.3k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 15.3k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 15.3k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 15.3k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 12 | *result = StringParser::PARSE_FAILURE; | 145 | 12 | return 0; | 146 | 12 | } | 147 | 15.3k | } | 148 | 9.31k | if (negative_exponent) { | 149 | 1.53k | exponent = -exponent; | 150 | 1.53k | } | 151 | 9.31k | } else { | 152 | 60 | *result = StringParser::PARSE_FAILURE; | 153 | 60 | return 0; | 154 | 60 | } | 155 | 9.39k | } | 156 | 54.5k | T int_part_number = 0; | 157 | 54.5k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 54.5k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 54.5k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 54.5k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 54.5k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 54.5k | int actual_frac_part_count = 0; | 171 | 54.5k | int digit_index = 0; | 172 | 54.5k | if (actual_int_part_count >= 0) { | 173 | 54.5k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 267k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 212k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 54.5k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 1.33k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 1.33k | return 0; | 182 | 1.33k | } | 183 | | // get int part number | 184 | 154k | for (; digit_index != max_index; ++digit_index) { | 185 | 100k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 100k | } | 187 | 53.1k | if (digit_index != actual_int_part_count) { | 188 | 100 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 100 | } | 190 | 53.1k | } else { | 191 | | // leading zeros of fraction part | 192 | 48 | actual_frac_part_count = -actual_int_part_count; | 193 | 48 | } | 194 | | // get fraction part number | 195 | 159k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 106k | ++digit_index, ++actual_frac_part_count) { | 197 | 106k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 106k | } | 199 | 53.2k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 53.2k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 21.1k | if (digits[digit_index] >= 5) { | 204 | 8.96k | ++frac_part_number; | 205 | 8.96k | if (frac_part_number == type_scale_multiplier) { | 206 | 856 | frac_part_number = 0; | 207 | 856 | ++int_part_number; | 208 | 856 | } | 209 | 8.96k | } | 210 | 32.0k | } else { | 211 | 32.0k | if (actual_frac_part_count < type_scale) { | 212 | 28.3k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 28.3k | } | 214 | 32.0k | } | 215 | 53.2k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 24 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 24 | return 0; | 218 | 24 | } | 219 | | | 220 | 53.1k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 53.1k | *result = StringParser::PARSE_SUCCESS; | 222 | 53.1k | return is_negative ? T(-value) : T(value); | 223 | 53.2k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 87.2k | ParseResult* result) { | 47 | 87.2k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 87.2k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 87.2k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 87.2k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 87.2k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 87.2k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 87.2k | bool is_negative = false; | 56 | 87.2k | if (len > 0) { | 57 | 87.2k | switch (*s) { | 58 | 21.7k | case '-': | 59 | 21.7k | is_negative = true; | 60 | 21.7k | [[fallthrough]]; | 61 | 28.3k | case '+': | 62 | 28.3k | ++s; | 63 | 28.3k | --len; | 64 | 87.2k | } | 65 | 87.2k | } | 66 | | // Ignore leading zeros. | 67 | 87.2k | bool found_value = false; | 68 | 163k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 76.2k | found_value = true; | 70 | 76.2k | ++s; | 71 | 76.2k | --len; | 72 | 76.2k | } | 73 | | | 74 | 87.2k | int found_dot = 0; | 75 | 87.2k | if (len > 0 && *s == '.') { | 76 | 24.0k | found_dot = 1; | 77 | 24.0k | ++s; | 78 | 24.0k | --len; | 79 | 24.0k | } | 80 | 87.2k | int int_part_count = 0; | 81 | 87.2k | std::vector<unsigned char> digits; | 82 | 87.2k | if (len > 0) { | 83 | 85.1k | digits.resize(len); | 84 | 85.1k | } | 85 | 87.2k | int total_digit_count = 0; | 86 | 87.2k | int i = 0; | 87 | 1.41M | for (; i != len; ++i) { | 88 | 1.34M | const char& c = s[i]; | 89 | 1.34M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 1.27M | found_value = true; | 91 | 1.27M | digits[total_digit_count++] = c - '0'; | 92 | 1.27M | if (!found_dot) { | 93 | 524k | ++int_part_count; | 94 | 524k | } | 95 | 1.27M | } else if (c == '.') { | 96 | 54.1k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 54.1k | found_dot = 1; | 101 | 54.1k | } else { | 102 | 12.0k | break; | 103 | 12.0k | } | 104 | 1.34M | } | 105 | 87.2k | if (!found_value) { | 106 | | // '', '.' | 107 | 379 | *result = StringParser::PARSE_FAILURE; | 108 | 379 | return 0; | 109 | 379 | } | 110 | | // parse exponent if any | 111 | 86.8k | int64_t exponent = 0; | 112 | 86.8k | if (i != len) { | 113 | 11.6k | bool negative_exponent = false; | 114 | 11.6k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 11.6k | ++i; | 116 | 11.6k | if (i != len) { | 117 | 11.6k | switch (s[i]) { | 118 | 3.91k | case '-': | 119 | 3.91k | negative_exponent = true; | 120 | 3.91k | [[fallthrough]]; | 121 | 3.91k | case '+': | 122 | 3.91k | ++i; | 123 | 11.6k | } | 124 | 11.6k | } | 125 | 11.6k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 32.5k | for (; i != len; ++i) { | 131 | 20.8k | const char& c = s[i]; | 132 | 20.8k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 20.8k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 20.8k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 20.8k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 20.8k | } | 148 | 11.6k | if (negative_exponent) { | 149 | 3.91k | exponent = -exponent; | 150 | 3.91k | } | 151 | 11.6k | } else { | 152 | 23 | *result = StringParser::PARSE_FAILURE; | 153 | 23 | return 0; | 154 | 23 | } | 155 | 11.6k | } | 156 | 86.8k | T int_part_number = 0; | 157 | 86.8k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 86.8k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 86.8k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 86.8k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 86.8k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 86.8k | int actual_frac_part_count = 0; | 171 | 86.8k | int digit_index = 0; | 172 | 86.8k | if (actual_int_part_count >= 0) { | 173 | 84.3k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 296k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 212k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 84.3k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 10.4k | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 10.4k | return 0; | 182 | 10.4k | } | 183 | | // get int part number | 184 | 411k | for (; digit_index != max_index; ++digit_index) { | 185 | 337k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 337k | } | 187 | 73.9k | if (digit_index != actual_int_part_count) { | 188 | 77 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 77 | } | 190 | 73.9k | } else { | 191 | | // leading zeros of fraction part | 192 | 2.42k | actual_frac_part_count = -actual_int_part_count; | 193 | 2.42k | } | 194 | | // get fraction part number | 195 | 554k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 477k | ++digit_index, ++actual_frac_part_count) { | 197 | 477k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 477k | } | 199 | 76.3k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 76.3k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 19.8k | if (digits[digit_index] >= 5) { | 204 | 7.95k | ++frac_part_number; | 205 | 7.95k | if (frac_part_number == type_scale_multiplier) { | 206 | 836 | frac_part_number = 0; | 207 | 836 | ++int_part_number; | 208 | 836 | } | 209 | 7.95k | } | 210 | 56.5k | } else { | 211 | 56.5k | if (actual_frac_part_count < type_scale) { | 212 | 32.0k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 32.0k | } | 214 | 56.5k | } | 215 | 76.3k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 17 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 17 | return 0; | 218 | 17 | } | 219 | | | 220 | 76.3k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 76.3k | *result = StringParser::PARSE_SUCCESS; | 222 | 76.3k | return is_negative ? T(-value) : T(value); | 223 | 76.3k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 83.4k | ParseResult* result) { | 47 | 83.4k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 83.4k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 83.4k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 83.4k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 83.4k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 83.4k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 83.4k | bool is_negative = false; | 56 | 83.4k | if (len > 0) { | 57 | 83.4k | switch (*s) { | 58 | 21.7k | case '-': | 59 | 21.7k | is_negative = true; | 60 | 21.7k | [[fallthrough]]; | 61 | 28.3k | case '+': | 62 | 28.3k | ++s; | 63 | 28.3k | --len; | 64 | 83.4k | } | 65 | 83.4k | } | 66 | | // Ignore leading zeros. | 67 | 83.4k | bool found_value = false; | 68 | 158k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 74.9k | found_value = true; | 70 | 74.9k | ++s; | 71 | 74.9k | --len; | 72 | 74.9k | } | 73 | | | 74 | 83.4k | int found_dot = 0; | 75 | 83.4k | if (len > 0 && *s == '.') { | 76 | 25.1k | found_dot = 1; | 77 | 25.1k | ++s; | 78 | 25.1k | --len; | 79 | 25.1k | } | 80 | 83.4k | int int_part_count = 0; | 81 | 83.4k | std::vector<unsigned char> digits; | 82 | 83.4k | if (len > 0) { | 83 | 81.3k | digits.resize(len); | 84 | 81.3k | } | 85 | 83.4k | int total_digit_count = 0; | 86 | 83.4k | int i = 0; | 87 | 2.06M | for (; i != len; ++i) { | 88 | 1.99M | const char& c = s[i]; | 89 | 1.99M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 1.93M | found_value = true; | 91 | 1.93M | digits[total_digit_count++] = c - '0'; | 92 | 1.93M | if (!found_dot) { | 93 | 562k | ++int_part_count; | 94 | 562k | } | 95 | 1.93M | } else if (c == '.') { | 96 | 50.3k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 50.3k | found_dot = 1; | 101 | 50.3k | } else { | 102 | 12.5k | break; | 103 | 12.5k | } | 104 | 1.99M | } | 105 | 83.4k | if (!found_value) { | 106 | | // '', '.' | 107 | 79 | *result = StringParser::PARSE_FAILURE; | 108 | 79 | return 0; | 109 | 79 | } | 110 | | // parse exponent if any | 111 | 83.3k | int64_t exponent = 0; | 112 | 83.3k | if (i != len) { | 113 | 12.4k | bool negative_exponent = false; | 114 | 12.4k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 12.4k | ++i; | 116 | 12.4k | if (i != len) { | 117 | 12.4k | switch (s[i]) { | 118 | 4.70k | case '-': | 119 | 4.70k | negative_exponent = true; | 120 | 4.70k | [[fallthrough]]; | 121 | 4.70k | case '+': | 122 | 4.70k | ++i; | 123 | 12.4k | } | 124 | 12.4k | } | 125 | 12.4k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 35.9k | for (; i != len; ++i) { | 131 | 23.4k | const char& c = s[i]; | 132 | 23.4k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 23.4k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 23.4k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 23.4k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 23.4k | } | 148 | 12.4k | if (negative_exponent) { | 149 | 4.70k | exponent = -exponent; | 150 | 4.70k | } | 151 | 12.4k | } else { | 152 | 12 | *result = StringParser::PARSE_FAILURE; | 153 | 12 | return 0; | 154 | 12 | } | 155 | 12.4k | } | 156 | 83.3k | T int_part_number = 0; | 157 | 83.3k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 83.3k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 83.3k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 83.3k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 83.3k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 83.3k | int actual_frac_part_count = 0; | 171 | 83.3k | int digit_index = 0; | 172 | 83.3k | if (actual_int_part_count >= 0) { | 173 | 80.1k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 293k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 213k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 80.1k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 140 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 140 | return 0; | 182 | 140 | } | 183 | | // get int part number | 184 | 601k | for (; digit_index != max_index; ++digit_index) { | 185 | 521k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 521k | } | 187 | 79.9k | if (digit_index != actual_int_part_count) { | 188 | 76 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 76 | } | 190 | 79.9k | } else { | 191 | | // leading zeros of fraction part | 192 | 3.21k | actual_frac_part_count = -actual_int_part_count; | 193 | 3.21k | } | 194 | | // get fraction part number | 195 | 1.17M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 1.09M | ++digit_index, ++actual_frac_part_count) { | 197 | 1.09M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 1.09M | } | 199 | 83.1k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 83.1k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 21.5k | if (digits[digit_index] >= 5) { | 204 | 8.03k | ++frac_part_number; | 205 | 8.03k | if (frac_part_number == type_scale_multiplier) { | 206 | 908 | frac_part_number = 0; | 207 | 908 | ++int_part_number; | 208 | 908 | } | 209 | 8.03k | } | 210 | 61.6k | } else { | 211 | 61.6k | if (actual_frac_part_count < type_scale) { | 212 | 45.5k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 45.5k | } | 214 | 61.6k | } | 215 | 83.1k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 16 | return 0; | 218 | 16 | } | 219 | | | 220 | 83.1k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 83.1k | *result = StringParser::PARSE_SUCCESS; | 222 | 83.1k | return is_negative ? T(-value) : T(value); | 223 | 83.1k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 13.5k | ParseResult* result) { | 47 | 13.5k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 13.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 13.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 13.5k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 13.5k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 13.5k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 13.5k | bool is_negative = false; | 56 | 13.5k | if (len > 0) { | 57 | 13.5k | switch (*s) { | 58 | 6.68k | case '-': | 59 | 6.68k | is_negative = true; | 60 | 6.68k | [[fallthrough]]; | 61 | 6.68k | case '+': | 62 | 6.68k | ++s; | 63 | 6.68k | --len; | 64 | 13.5k | } | 65 | 13.5k | } | 66 | | // Ignore leading zeros. | 67 | 13.5k | bool found_value = false; | 68 | 52.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 38.8k | found_value = true; | 70 | 38.8k | ++s; | 71 | 38.8k | --len; | 72 | 38.8k | } | 73 | | | 74 | 13.5k | int found_dot = 0; | 75 | 13.5k | if (len > 0 && *s == '.') { | 76 | 2.00k | found_dot = 1; | 77 | 2.00k | ++s; | 78 | 2.00k | --len; | 79 | 2.00k | } | 80 | 13.5k | int int_part_count = 0; | 81 | 13.5k | std::vector<unsigned char> digits; | 82 | 13.5k | if (len > 0) { | 83 | 13.5k | digits.resize(len); | 84 | 13.5k | } | 85 | 13.5k | int total_digit_count = 0; | 86 | 13.5k | int i = 0; | 87 | 279k | for (; i != len; ++i) { | 88 | 266k | const char& c = s[i]; | 89 | 266k | if (LIKELY('0' <= c && c <= '9')) { | 90 | 254k | found_value = true; | 91 | 254k | digits[total_digit_count++] = c - '0'; | 92 | 254k | if (!found_dot) { | 93 | 136k | ++int_part_count; | 94 | 136k | } | 95 | 254k | } else if (c == '.') { | 96 | 11.4k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 11.4k | found_dot = 1; | 101 | 11.4k | } else { | 102 | 12 | break; | 103 | 12 | } | 104 | 266k | } | 105 | 13.5k | if (!found_value) { | 106 | | // '', '.' | 107 | 10 | *result = StringParser::PARSE_FAILURE; | 108 | 10 | return 0; | 109 | 10 | } | 110 | | // parse exponent if any | 111 | 13.5k | int64_t exponent = 0; | 112 | 13.5k | if (i != len) { | 113 | 2 | bool negative_exponent = false; | 114 | 2 | if (s[i] == 'e' || s[i] == 'E') { | 115 | 0 | ++i; | 116 | 0 | if (i != len) { | 117 | 0 | switch (s[i]) { | 118 | 0 | case '-': | 119 | 0 | negative_exponent = true; | 120 | 0 | [[fallthrough]]; | 121 | 0 | case '+': | 122 | 0 | ++i; | 123 | 0 | } | 124 | 0 | } | 125 | 0 | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 0 | for (; i != len; ++i) { | 131 | 0 | const char& c = s[i]; | 132 | 0 | if (LIKELY('0' <= c && c <= '9')) { | 133 | 0 | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 0 | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 0 | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 0 | *result = StringParser::PARSE_FAILURE; | 145 | 0 | return 0; | 146 | 0 | } | 147 | 0 | } | 148 | 0 | if (negative_exponent) { | 149 | 0 | exponent = -exponent; | 150 | 0 | } | 151 | 2 | } else { | 152 | 2 | *result = StringParser::PARSE_FAILURE; | 153 | 2 | return 0; | 154 | 2 | } | 155 | 2 | } | 156 | 13.5k | T int_part_number = 0; | 157 | 13.5k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 13.5k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 13.5k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 13.5k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 13.5k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 13.5k | int actual_frac_part_count = 0; | 171 | 13.5k | int digit_index = 0; | 172 | 13.5k | if (actual_int_part_count >= 0) { | 173 | 13.5k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 13.5k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 0 | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 13.5k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 8 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 8 | return 0; | 182 | 8 | } | 183 | | // get int part number | 184 | 149k | for (; digit_index != max_index; ++digit_index) { | 185 | 136k | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 136k | } | 187 | 13.5k | if (digit_index != actual_int_part_count) { | 188 | 0 | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 0 | } | 190 | 13.5k | } else { | 191 | | // leading zeros of fraction part | 192 | 0 | actual_frac_part_count = -actual_int_part_count; | 193 | 0 | } | 194 | | // get fraction part number | 195 | 131k | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 118k | ++digit_index, ++actual_frac_part_count) { | 197 | 118k | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 118k | } | 199 | 13.5k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 13.5k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 17 | if (digits[digit_index] >= 5) { | 204 | 17 | ++frac_part_number; | 205 | 17 | if (frac_part_number == type_scale_multiplier) { | 206 | 0 | frac_part_number = 0; | 207 | 0 | ++int_part_number; | 208 | 0 | } | 209 | 17 | } | 210 | 13.5k | } else { | 211 | 13.5k | if (actual_frac_part_count < type_scale) { | 212 | 1.95k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 1.95k | } | 214 | 13.5k | } | 215 | 13.5k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 0 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 0 | return 0; | 218 | 0 | } | 219 | | | 220 | 13.5k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 13.5k | *result = StringParser::PARSE_SUCCESS; | 222 | 13.5k | return is_negative ? T(-value) : T(value); | 223 | 13.5k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE Line | Count | Source | 46 | 117k | ParseResult* result) { | 47 | 117k | using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 48 | 117k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 49 | 117k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 50 | 117k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 51 | 117k | "wide::Int256."); | 52 | | // Ignore leading and trailing spaces. | 53 | 117k | s = skip_ascii_whitespaces(s, len); | 54 | | | 55 | 117k | bool is_negative = false; | 56 | 117k | if (len > 0) { | 57 | 117k | switch (*s) { | 58 | 17.3k | case '-': | 59 | 17.3k | is_negative = true; | 60 | 17.3k | [[fallthrough]]; | 61 | 23.9k | case '+': | 62 | 23.9k | ++s; | 63 | 23.9k | --len; | 64 | 117k | } | 65 | 117k | } | 66 | | // Ignore leading zeros. | 67 | 117k | bool found_value = false; | 68 | 211k | while (len > 0 && UNLIKELY(*s == '0')) { | 69 | 94.0k | found_value = true; | 70 | 94.0k | ++s; | 71 | 94.0k | --len; | 72 | 94.0k | } | 73 | | | 74 | 117k | int found_dot = 0; | 75 | 117k | if (len > 0 && *s == '.') { | 76 | 15.8k | found_dot = 1; | 77 | 15.8k | ++s; | 78 | 15.8k | --len; | 79 | 15.8k | } | 80 | 117k | int int_part_count = 0; | 81 | 117k | std::vector<unsigned char> digits; | 82 | 117k | if (len > 0) { | 83 | 115k | digits.resize(len); | 84 | 115k | } | 85 | 117k | int total_digit_count = 0; | 86 | 117k | int i = 0; | 87 | 3.93M | for (; i != len; ++i) { | 88 | 3.89M | const char& c = s[i]; | 89 | 3.89M | if (LIKELY('0' <= c && c <= '9')) { | 90 | 3.72M | found_value = true; | 91 | 3.72M | digits[total_digit_count++] = c - '0'; | 92 | 3.72M | if (!found_dot) { | 93 | 994k | ++int_part_count; | 94 | 994k | } | 95 | 3.72M | } else if (c == '.') { | 96 | 95.6k | if (found_dot) { | 97 | 0 | *result = StringParser::PARSE_FAILURE; | 98 | 0 | return 0; | 99 | 0 | } | 100 | 95.6k | found_dot = 1; | 101 | 95.6k | } else { | 102 | 77.9k | break; | 103 | 77.9k | } | 104 | 3.89M | } | 105 | 117k | if (!found_value) { | 106 | | // '', '.' | 107 | 92 | *result = StringParser::PARSE_FAILURE; | 108 | 92 | return 0; | 109 | 92 | } | 110 | | // parse exponent if any | 111 | 117k | int64_t exponent = 0; | 112 | 117k | if (i != len) { | 113 | 77.8k | bool negative_exponent = false; | 114 | 77.8k | if (s[i] == 'e' || s[i] == 'E') { | 115 | 77.8k | ++i; | 116 | 77.8k | if (i != len) { | 117 | 77.8k | switch (s[i]) { | 118 | 1.53k | case '-': | 119 | 1.53k | negative_exponent = true; | 120 | 1.53k | [[fallthrough]]; | 121 | 70.0k | case '+': | 122 | 70.0k | ++i; | 123 | 77.8k | } | 124 | 77.8k | } | 125 | 77.8k | if (i == len) { | 126 | | // '123e', '123e+', '123e-' | 127 | 0 | *result = StringParser::PARSE_FAILURE; | 128 | 0 | return 0; | 129 | 0 | } | 130 | 232k | for (; i != len; ++i) { | 131 | 154k | const char& c = s[i]; | 132 | 154k | if (LIKELY('0' <= c && c <= '9')) { | 133 | 154k | exponent = exponent * 10 + (c - '0'); | 134 | | // max string len is config::string_type_length_soft_limit_bytes, | 135 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 136 | | // just check overflow of int32_t to simplify the logic | 137 | | // For edge cases like 0.{2147483647 zeros}e+2147483647 | 138 | 154k | if (exponent > std::numeric_limits<int32_t>::max()) { | 139 | 0 | *result = StringParser::PARSE_OVERFLOW; | 140 | 0 | return 0; | 141 | 0 | } | 142 | 154k | } else { | 143 | | // '123e12abc', '123e1.2' | 144 | 10 | *result = StringParser::PARSE_FAILURE; | 145 | 10 | return 0; | 146 | 10 | } | 147 | 154k | } | 148 | 77.8k | if (negative_exponent) { | 149 | 1.53k | exponent = -exponent; | 150 | 1.53k | } | 151 | 77.8k | } else { | 152 | 20 | *result = StringParser::PARSE_FAILURE; | 153 | 20 | return 0; | 154 | 20 | } | 155 | 77.8k | } | 156 | 117k | T int_part_number = 0; | 157 | 117k | T frac_part_number = 0; | 158 | | // TODO: check limit values of exponent and add UT | 159 | | // max string len is config::string_type_length_soft_limit_bytes, | 160 | | // whose max value is std::numeric_limits<int32_t>::max() - 4, | 161 | | // so int_part_count will be in range of int32_t, | 162 | | // and int_part_count + exponent will be in range of int64_t | 163 | 117k | int64_t tmp_actual_int_part_count = int_part_count + exponent; | 164 | 117k | if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 165 | 117k | tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 166 | 0 | *result = StringParser::PARSE_OVERFLOW; | 167 | 0 | return 0; | 168 | 0 | } | 169 | 117k | int actual_int_part_count = tmp_actual_int_part_count; | 170 | 117k | int actual_frac_part_count = 0; | 171 | 117k | int digit_index = 0; | 172 | 117k | if (actual_int_part_count >= 0) { | 173 | 117k | int max_index = std::min(actual_int_part_count, total_digit_count); | 174 | | // skip zero number | 175 | 334k | for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 176 | 216k | } | 177 | | // test 0.00, .00, 0.{00...}e2147483647 | 178 | | // 0.00000e2147483647 | 179 | 117k | if (max_index - digit_index > type_precision - type_scale) { | 180 | 112 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 181 | 112 | return 0; | 182 | 112 | } | 183 | | // get int part number | 184 | 2.13M | for (; digit_index != max_index; ++digit_index) { | 185 | 2.01M | int_part_number = int_part_number * 10 + digits[digit_index]; | 186 | 2.01M | } | 187 | 117k | if (digit_index != actual_int_part_count) { | 188 | 66.4k | int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 189 | 66.4k | } | 190 | 117k | } else { | 191 | | // leading zeros of fraction part | 192 | 48 | actual_frac_part_count = -actual_int_part_count; | 193 | 48 | } | 194 | | // get fraction part number | 195 | 1.49M | for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 196 | 1.37M | ++digit_index, ++actual_frac_part_count) { | 197 | 1.37M | frac_part_number = frac_part_number * 10 + digits[digit_index]; | 198 | 1.37M | } | 199 | 117k | auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 200 | | // there are still extra fraction digits left, check rounding | 201 | 117k | if (digit_index != total_digit_count) { | 202 | | // example: test 1.5 -> decimal(1, 0) | 203 | 18.8k | if (digits[digit_index] >= 5) { | 204 | 7.94k | ++frac_part_number; | 205 | 7.94k | if (frac_part_number == type_scale_multiplier) { | 206 | 836 | frac_part_number = 0; | 207 | 836 | ++int_part_number; | 208 | 836 | } | 209 | 7.94k | } | 210 | 98.7k | } else { | 211 | 98.7k | if (actual_frac_part_count < type_scale) { | 212 | 89.3k | frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 213 | 89.3k | } | 214 | 98.7k | } | 215 | 117k | if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 216 | 16 | *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 217 | 16 | return 0; | 218 | 16 | } | 219 | | | 220 | 117k | T value = int_part_number * type_scale_multiplier + frac_part_number; | 221 | 117k | *result = StringParser::PARSE_SUCCESS; | 222 | 117k | return is_negative ? T(-value) : T(value); | 223 | 117k | } |
|
224 | | template vectorized::Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( |
225 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
226 | | ParseResult* result); |
227 | | template vectorized::Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( |
228 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
229 | | ParseResult* result); |
230 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( |
231 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
232 | | ParseResult* result); |
233 | | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( |
234 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
235 | | ParseResult* result); |
236 | | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( |
237 | | const char* __restrict s, size_t len, int type_precision, int type_scale, |
238 | | ParseResult* result); |
239 | | } // end namespace doris |
240 | | #include "common/compile_check_avoid_end.h" |