/root/doris/be/src/util/string_parser.cpp
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #include "util/string_parser.hpp" | 
| 19 |  |  | 
| 20 |  | #include <limits> | 
| 21 |  |  | 
| 22 |  | #include "vec/core/extended_types.h" | 
| 23 |  | namespace doris { | 
| 24 |  | #include "common/compile_check_avoid_begin.h" | 
| 25 |  | // Supported decimal number format: | 
| 26 |  | // <decimal> ::= <whitespace>* <value> <whitespace>* | 
| 27 |  | // | 
| 28 |  | // <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v" | 
| 29 |  | // | 
| 30 |  | // <value> ::= <sign>? <significand> <exponent>? | 
| 31 |  | // | 
| 32 |  | // <sign> ::= "+" | "-" | 
| 33 |  | // | 
| 34 |  | // <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits> | 
| 35 |  | // | 
| 36 |  | // <digits> ::= <digit>+ | 
| 37 |  | // | 
| 38 |  | // <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | 
| 39 |  | // | 
| 40 |  | // <exponent> ::= <e_marker> <sign>? <digits> | 
| 41 |  | // | 
| 42 |  | // <e_marker> ::= "e" | "E" | 
| 43 |  | template <PrimitiveType P> | 
| 44 |  | typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal( | 
| 45 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 46 | 353k |         ParseResult* result) { | 
| 47 | 353k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; | 
| 48 | 353k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 
| 49 | 353k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 
| 50 | 353k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 
| 51 | 353k |                   "wide::Int256."); | 
| 52 |  |     // Ignore leading and trailing spaces. | 
| 53 | 353k |     s = skip_ascii_whitespaces(s, len); | 
| 54 |  |  | 
| 55 | 353k |     bool is_negative = false; | 
| 56 | 353k |     if (len > 0) { | 
| 57 | 353k |         switch (*s) { | 
| 58 | 92.4k |         case '-': | 
| 59 | 92.4k |             is_negative = true; | 
| 60 | 92.4k |             [[fallthrough]]; | 
| 61 | 119k |         case '+': | 
| 62 | 119k |             ++s; | 
| 63 | 119k |             --len; | 
| 64 | 353k |         } | 
| 65 | 353k |     } | 
| 66 |  |     // Ignore leading zeros. | 
| 67 | 353k |     bool found_value = false; | 
| 68 | 691k |     while (len > 0 && UNLIKELY(*s == '0')) { | 
| 69 | 337k |         found_value = true; | 
| 70 | 337k |         ++s; | 
| 71 | 337k |         --len; | 
| 72 | 337k |     } | 
| 73 |  |  | 
| 74 | 353k |     int found_dot = 0; | 
| 75 | 353k |     if (len > 0 && *s == '.') { | 
| 76 | 84.7k |         found_dot = 1; | 
| 77 | 84.7k |         ++s; | 
| 78 | 84.7k |         --len; | 
| 79 | 84.7k |     } | 
| 80 | 353k |     int int_part_count = 0; | 
| 81 | 353k |     std::vector<unsigned char> digits; | 
| 82 | 353k |     if (len > 0) { | 
| 83 | 345k |         digits.resize(len); | 
| 84 | 345k |     } | 
| 85 | 353k |     int total_digit_count = 0; | 
| 86 | 353k |     int i = 0; | 
| 87 | 8.23M |     for (; i != len; ++i) { | 
| 88 | 7.99M |         const char& c = s[i]; | 
| 89 | 7.99M |         if (LIKELY('0' <= c && c <= '9')) { | 
| 90 | 7.65M |             found_value = true; | 
| 91 | 7.65M |             digits[total_digit_count++] = c - '0'; | 
| 92 | 7.65M |             if (!found_dot) { | 
| 93 | 2.36M |                 ++int_part_count; | 
| 94 | 2.36M |             } | 
| 95 | 7.65M |         } else if (c == '.') { | 
| 96 | 224k |             if (found_dot) { | 
| 97 | 2 |                 *result = StringParser::PARSE_FAILURE; | 
| 98 | 2 |                 return 0; | 
| 99 | 2 |             } | 
| 100 | 224k |             found_dot = 1; | 
| 101 | 224k |         } else { | 
| 102 | 110k |             break; | 
| 103 | 110k |         } | 
| 104 | 7.99M |     } | 
| 105 | 353k |     if (!found_value) { | 
| 106 |  |         // '', '.' | 
| 107 | 346 |         *result = StringParser::PARSE_FAILURE; | 
| 108 | 346 |         return 0; | 
| 109 | 346 |     } | 
| 110 |  |     // parse exponent if any | 
| 111 | 353k |     int64_t exponent = 0; | 
| 112 | 353k |     if (i != len) { | 
| 113 | 110k |         bool negative_exponent = false; | 
| 114 | 110k |         if (s[i] == 'e' || s[i] == 'E') { | 
| 115 | 110k |             ++i; | 
| 116 | 110k |             if (i != len) { | 
| 117 | 110k |                 switch (s[i]) { | 
| 118 | 11.6k |                 case '-': | 
| 119 | 11.6k |                     negative_exponent = true; | 
| 120 | 11.6k |                     [[fallthrough]]; | 
| 121 | 79.2k |                 case '+': | 
| 122 | 79.2k |                     ++i; | 
| 123 | 110k |                 } | 
| 124 | 110k |             } | 
| 125 | 110k |             if (i == len) { | 
| 126 |  |                 // '123e', '123e+', '123e-' | 
| 127 | 6 |                 *result = StringParser::PARSE_FAILURE; | 
| 128 | 6 |                 return 0; | 
| 129 | 6 |             } | 
| 130 | 322k |             for (; i != len; ++i) { | 
| 131 | 211k |                 const char& c = s[i]; | 
| 132 | 211k |                 if (LIKELY('0' <= c && c <= '9')) { | 
| 133 | 211k |                     exponent = exponent * 10 + (c - '0'); | 
| 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, | 
| 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, | 
| 136 |  |                     // just check overflow of int32_t to simplify the logic | 
| 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 | 
| 138 | 211k |                     if (exponent > std::numeric_limits<int32_t>::max()) { | 
| 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; | 
| 140 | 0 |                         return 0; | 
| 141 | 0 |                     } | 
| 142 | 211k |                 } else { | 
| 143 |  |                     // '123e12abc', '123e1.2' | 
| 144 | 22 |                     *result = StringParser::PARSE_FAILURE; | 
| 145 | 22 |                     return 0; | 
| 146 | 22 |                 } | 
| 147 | 211k |             } | 
| 148 | 110k |             if (negative_exponent) { | 
| 149 | 11.6k |                 exponent = -exponent; | 
| 150 | 11.6k |             } | 
| 151 | 110k |         } else { | 
| 152 | 116 |             *result = StringParser::PARSE_FAILURE; | 
| 153 | 116 |             return 0; | 
| 154 | 116 |         } | 
| 155 | 110k |     } | 
| 156 | 353k |     T int_part_number = 0; | 
| 157 | 353k |     T frac_part_number = 0; | 
| 158 |  |     // TODO: check limit values of exponent and add UT | 
| 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, | 
| 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, | 
| 161 |  |     // so int_part_count will be in range of int32_t, | 
| 162 |  |     // and int_part_count + exponent will be in range of int64_t | 
| 163 | 353k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; | 
| 164 | 353k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || | 
| 165 | 353k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { | 
| 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; | 
| 167 | 0 |         return 0; | 
| 168 | 0 |     } | 
| 169 | 353k |     int actual_int_part_count = tmp_actual_int_part_count; | 
| 170 | 353k |     int actual_frac_part_count = 0; | 
| 171 | 353k |     int digit_index = 0; | 
| 172 | 353k |     if (actual_int_part_count >= 0) { | 
| 173 | 347k |         int max_index = std::min(actual_int_part_count, total_digit_count); | 
| 174 |  |         // skip zero number | 
| 175 | 1.20M |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { | 
| 176 | 855k |         } | 
| 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 | 
| 178 |  |         // 0.00000e2147483647 | 
| 179 | 347k |         if (max_index - digit_index > type_precision - type_scale) { | 
| 180 | 11.8k |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 
| 181 | 11.8k |             return 0; | 
| 182 | 11.8k |         } | 
| 183 |  |         // get int part number | 
| 184 | 3.42M |         for (; digit_index != max_index; ++digit_index) { | 
| 185 | 3.08M |             int_part_number = int_part_number * 10 + digits[digit_index]; | 
| 186 | 3.08M |         } | 
| 187 | 335k |         if (digit_index != actual_int_part_count) { | 
| 188 | 65.7k |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); | 
| 189 | 65.7k |         } | 
| 190 | 335k |     } else { | 
| 191 |  |         // leading zeros of fraction part | 
| 192 | 5.65k |         actual_frac_part_count = -actual_int_part_count; | 
| 193 | 5.65k |     } | 
| 194 |  |     // get fraction part number | 
| 195 | 3.50M |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; | 
| 196 | 3.16M |          ++digit_index, ++actual_frac_part_count) { | 
| 197 | 3.16M |         frac_part_number = frac_part_number * 10 + digits[digit_index]; | 
| 198 | 3.16M |     } | 
| 199 | 341k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); | 
| 200 |  |     // there are still extra fraction digits left, check rounding | 
| 201 | 341k |     if (digit_index != total_digit_count) { | 
| 202 |  |         // example: test 1.5 -> decimal(1, 0) | 
| 203 | 81.3k |         if (digits[digit_index] >= 5) { | 
| 204 | 32.9k |             ++frac_part_number; | 
| 205 | 32.9k |             if (frac_part_number == type_scale_multiplier) { | 
| 206 | 3.43k |                 frac_part_number = 0; | 
| 207 | 3.43k |                 ++int_part_number; | 
| 208 | 3.43k |             } | 
| 209 | 32.9k |         } | 
| 210 | 260k |     } else { | 
| 211 | 260k |         if (actual_frac_part_count < type_scale) { | 
| 212 | 194k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); | 
| 213 | 194k |         } | 
| 214 | 260k |     } | 
| 215 | 341k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { | 
| 216 | 73 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; | 
| 217 | 73 |         return 0; | 
| 218 | 73 |     } | 
| 219 |  |  | 
| 220 | 341k |     T value = int_part_number * type_scale_multiplier + frac_part_number; | 
| 221 | 341k |     *result = StringParser::PARSE_SUCCESS; | 
| 222 | 341k |     return is_negative ? T(-value) : T(value); | 
| 223 | 341k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE| Line | Count | Source |  | 46 | 54.4k |         ParseResult* result) { |  | 47 | 54.4k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |  | 48 | 54.4k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |  | 49 | 54.4k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |  | 50 | 54.4k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |  | 51 | 54.4k |                   "wide::Int256."); |  | 52 |  |     // Ignore leading and trailing spaces. |  | 53 | 54.4k |     s = skip_ascii_whitespaces(s, len); |  | 54 |  |  |  | 55 | 54.4k |     bool is_negative = false; |  | 56 | 54.4k |     if (len > 0) { |  | 57 | 54.4k |         switch (*s) { |  | 58 | 25.0k |         case '-': |  | 59 | 25.0k |             is_negative = true; |  | 60 | 25.0k |             [[fallthrough]]; |  | 61 | 32.3k |         case '+': |  | 62 | 32.3k |             ++s; |  | 63 | 32.3k |             --len; |  | 64 | 54.4k |         } |  | 65 | 54.4k |     } |  | 66 |  |     // Ignore leading zeros. |  | 67 | 54.4k |     bool found_value = false; |  | 68 | 108k |     while (len > 0 && UNLIKELY(*s == '0')) { |  | 69 | 53.6k |         found_value = true; |  | 70 | 53.6k |         ++s; |  | 71 | 53.6k |         --len; |  | 72 | 53.6k |     } |  | 73 |  |  |  | 74 | 54.4k |     int found_dot = 0; |  | 75 | 54.4k |     if (len > 0 && *s == '.') { |  | 76 | 17.9k |         found_dot = 1; |  | 77 | 17.9k |         ++s; |  | 78 | 17.9k |         --len; |  | 79 | 17.9k |     } |  | 80 | 54.4k |     int int_part_count = 0; |  | 81 | 54.4k |     std::vector<unsigned char> digits; |  | 82 | 54.4k |     if (len > 0) { |  | 83 | 52.3k |         digits.resize(len); |  | 84 | 52.3k |     } |  | 85 | 54.4k |     int total_digit_count = 0; |  | 86 | 54.4k |     int i = 0; |  | 87 | 578k |     for (; i != len; ++i) { |  | 88 | 533k |         const char& c = s[i]; |  | 89 | 533k |         if (LIKELY('0' <= c && c <= '9')) { |  | 90 | 509k |             found_value = true; |  | 91 | 509k |             digits[total_digit_count++] = c - '0'; |  | 92 | 509k |             if (!found_dot) { |  | 93 | 159k |                 ++int_part_count; |  | 94 | 159k |             } |  | 95 | 509k |         } else if (c == '.') { |  | 96 | 14.6k |             if (found_dot) { |  | 97 | 2 |                 *result = StringParser::PARSE_FAILURE; |  | 98 | 2 |                 return 0; |  | 99 | 2 |             } |  | 100 | 14.6k |             found_dot = 1; |  | 101 | 14.6k |         } else { |  | 102 | 9.52k |             break; |  | 103 | 9.52k |         } |  | 104 | 533k |     } |  | 105 | 54.4k |     if (!found_value) { |  | 106 |  |         // '', '.' |  | 107 | 146 |         *result = StringParser::PARSE_FAILURE; |  | 108 | 146 |         return 0; |  | 109 | 146 |     } |  | 110 |  |     // parse exponent if any |  | 111 | 54.3k |     int64_t exponent = 0; |  | 112 | 54.3k |     if (i != len) { |  | 113 | 9.39k |         bool negative_exponent = false; |  | 114 | 9.39k |         if (s[i] == 'e' || s[i] == 'E') { |  | 115 | 9.33k |             ++i; |  | 116 | 9.33k |             if (i != len) { |  | 117 | 9.33k |                 switch (s[i]) { |  | 118 | 1.54k |                 case '-': |  | 119 | 1.54k |                     negative_exponent = true; |  | 120 | 1.54k |                     [[fallthrough]]; |  | 121 | 1.54k |                 case '+': |  | 122 | 1.54k |                     ++i; |  | 123 | 9.33k |                 } |  | 124 | 9.33k |             } |  | 125 | 9.33k |             if (i == len) { |  | 126 |  |                 // '123e', '123e+', '123e-' |  | 127 | 6 |                 *result = StringParser::PARSE_FAILURE; |  | 128 | 6 |                 return 0; |  | 129 | 6 |             } |  | 130 | 24.6k |             for (; i != len; ++i) { |  | 131 | 15.3k |                 const char& c = s[i]; |  | 132 | 15.3k |                 if (LIKELY('0' <= c && c <= '9')) { |  | 133 | 15.3k |                     exponent = exponent * 10 + (c - '0'); |  | 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, |  | 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 136 |  |                     // just check overflow of int32_t to simplify the logic |  | 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 |  | 138 | 15.3k |                     if (exponent > std::numeric_limits<int32_t>::max()) { |  | 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; |  | 140 | 0 |                         return 0; |  | 141 | 0 |                     } |  | 142 | 15.3k |                 } else { |  | 143 |  |                     // '123e12abc', '123e1.2' |  | 144 | 12 |                     *result = StringParser::PARSE_FAILURE; |  | 145 | 12 |                     return 0; |  | 146 | 12 |                 } |  | 147 | 15.3k |             } |  | 148 | 9.31k |             if (negative_exponent) { |  | 149 | 1.53k |                 exponent = -exponent; |  | 150 | 1.53k |             } |  | 151 | 9.31k |         } else { |  | 152 | 60 |             *result = StringParser::PARSE_FAILURE; |  | 153 | 60 |             return 0; |  | 154 | 60 |         } |  | 155 | 9.39k |     } |  | 156 | 54.2k |     T int_part_number = 0; |  | 157 | 54.2k |     T frac_part_number = 0; |  | 158 |  |     // TODO: check limit values of exponent and add UT |  | 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, |  | 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 161 |  |     // so int_part_count will be in range of int32_t, |  | 162 |  |     // and int_part_count + exponent will be in range of int64_t |  | 163 | 54.2k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; |  | 164 | 54.2k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |  | 165 | 54.2k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |  | 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; |  | 167 | 0 |         return 0; |  | 168 | 0 |     } |  | 169 | 54.2k |     int actual_int_part_count = tmp_actual_int_part_count; |  | 170 | 54.2k |     int actual_frac_part_count = 0; |  | 171 | 54.2k |     int digit_index = 0; |  | 172 | 54.2k |     if (actual_int_part_count >= 0) { |  | 173 | 54.1k |         int max_index = std::min(actual_int_part_count, total_digit_count); |  | 174 |  |         // skip zero number |  | 175 | 266k |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |  | 176 | 212k |         } |  | 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 |  | 178 |  |         // 0.00000e2147483647 |  | 179 | 54.1k |         if (max_index - digit_index > type_precision - type_scale) { |  | 180 | 1.31k |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 181 | 1.31k |             return 0; |  | 182 | 1.31k |         } |  | 183 |  |         // get int part number |  | 184 | 153k |         for (; digit_index != max_index; ++digit_index) { |  | 185 | 100k |             int_part_number = int_part_number * 10 + digits[digit_index]; |  | 186 | 100k |         } |  | 187 | 52.8k |         if (digit_index != actual_int_part_count) { |  | 188 | 100 |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |  | 189 | 100 |         } |  | 190 | 52.8k |     } else { |  | 191 |  |         // leading zeros of fraction part |  | 192 | 48 |         actual_frac_part_count = -actual_int_part_count; |  | 193 | 48 |     } |  | 194 |  |     // get fraction part number |  | 195 | 159k |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |  | 196 | 106k |          ++digit_index, ++actual_frac_part_count) { |  | 197 | 106k |         frac_part_number = frac_part_number * 10 + digits[digit_index]; |  | 198 | 106k |     } |  | 199 | 52.9k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |  | 200 |  |     // there are still extra fraction digits left, check rounding |  | 201 | 52.9k |     if (digit_index != total_digit_count) { |  | 202 |  |         // example: test 1.5 -> decimal(1, 0) |  | 203 | 21.1k |         if (digits[digit_index] >= 5) { |  | 204 | 8.96k |             ++frac_part_number; |  | 205 | 8.96k |             if (frac_part_number == type_scale_multiplier) { |  | 206 | 856 |                 frac_part_number = 0; |  | 207 | 856 |                 ++int_part_number; |  | 208 | 856 |             } |  | 209 | 8.96k |         } |  | 210 | 31.8k |     } else { |  | 211 | 31.8k |         if (actual_frac_part_count < type_scale) { |  | 212 | 28.0k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |  | 213 | 28.0k |         } |  | 214 | 31.8k |     } |  | 215 | 52.9k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |  | 216 | 24 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 217 | 24 |         return 0; |  | 218 | 24 |     } |  | 219 |  |  |  | 220 | 52.9k |     T value = int_part_number * type_scale_multiplier + frac_part_number; |  | 221 | 52.9k |     *result = StringParser::PARSE_SUCCESS; |  | 222 | 52.9k |     return is_negative ? T(-value) : T(value); |  | 223 | 52.9k | } | 
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE| Line | Count | Source |  | 46 | 86.3k |         ParseResult* result) { |  | 47 | 86.3k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |  | 48 | 86.3k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |  | 49 | 86.3k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |  | 50 | 86.3k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |  | 51 | 86.3k |                   "wide::Int256."); |  | 52 |  |     // Ignore leading and trailing spaces. |  | 53 | 86.3k |     s = skip_ascii_whitespaces(s, len); |  | 54 |  |  |  | 55 | 86.3k |     bool is_negative = false; |  | 56 | 86.3k |     if (len > 0) { |  | 57 | 86.3k |         switch (*s) { |  | 58 | 21.7k |         case '-': |  | 59 | 21.7k |             is_negative = true; |  | 60 | 21.7k |             [[fallthrough]]; |  | 61 | 28.3k |         case '+': |  | 62 | 28.3k |             ++s; |  | 63 | 28.3k |             --len; |  | 64 | 86.3k |         } |  | 65 | 86.3k |     } |  | 66 |  |     // Ignore leading zeros. |  | 67 | 86.3k |     bool found_value = false; |  | 68 | 162k |     while (len > 0 && UNLIKELY(*s == '0')) { |  | 69 | 76.1k |         found_value = true; |  | 70 | 76.1k |         ++s; |  | 71 | 76.1k |         --len; |  | 72 | 76.1k |     } |  | 73 |  |  |  | 74 | 86.3k |     int found_dot = 0; |  | 75 | 86.3k |     if (len > 0 && *s == '.') { |  | 76 | 23.9k |         found_dot = 1; |  | 77 | 23.9k |         ++s; |  | 78 | 23.9k |         --len; |  | 79 | 23.9k |     } |  | 80 | 86.3k |     int int_part_count = 0; |  | 81 | 86.3k |     std::vector<unsigned char> digits; |  | 82 | 86.3k |     if (len > 0) { |  | 83 | 84.2k |         digits.resize(len); |  | 84 | 84.2k |     } |  | 85 | 86.3k |     int total_digit_count = 0; |  | 86 | 86.3k |     int i = 0; |  | 87 | 1.40M |     for (; i != len; ++i) { |  | 88 | 1.33M |         const char& c = s[i]; |  | 89 | 1.33M |         if (LIKELY('0' <= c && c <= '9')) { |  | 90 | 1.26M |             found_value = true; |  | 91 | 1.26M |             digits[total_digit_count++] = c - '0'; |  | 92 | 1.26M |             if (!found_dot) { |  | 93 | 521k |                 ++int_part_count; |  | 94 | 521k |             } |  | 95 | 1.26M |         } else if (c == '.') { |  | 96 | 53.6k |             if (found_dot) { |  | 97 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 98 | 0 |                 return 0; |  | 99 | 0 |             } |  | 100 | 53.6k |             found_dot = 1; |  | 101 | 53.6k |         } else { |  | 102 | 11.7k |             break; |  | 103 | 11.7k |         } |  | 104 | 1.33M |     } |  | 105 | 86.3k |     if (!found_value) { |  | 106 |  |         // '', '.' |  | 107 | 69 |         *result = StringParser::PARSE_FAILURE; |  | 108 | 69 |         return 0; |  | 109 | 69 |     } |  | 110 |  |     // parse exponent if any |  | 111 | 86.2k |     int64_t exponent = 0; |  | 112 | 86.2k |     if (i != len) { |  | 113 | 11.6k |         bool negative_exponent = false; |  | 114 | 11.6k |         if (s[i] == 'e' || s[i] == 'E') { |  | 115 | 11.6k |             ++i; |  | 116 | 11.6k |             if (i != len) { |  | 117 | 11.6k |                 switch (s[i]) { |  | 118 | 3.87k |                 case '-': |  | 119 | 3.87k |                     negative_exponent = true; |  | 120 | 3.87k |                     [[fallthrough]]; |  | 121 | 3.87k |                 case '+': |  | 122 | 3.87k |                     ++i; |  | 123 | 11.6k |                 } |  | 124 | 11.6k |             } |  | 125 | 11.6k |             if (i == len) { |  | 126 |  |                 // '123e', '123e+', '123e-' |  | 127 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 128 | 0 |                 return 0; |  | 129 | 0 |             } |  | 130 | 32.4k |             for (; i != len; ++i) { |  | 131 | 20.8k |                 const char& c = s[i]; |  | 132 | 20.8k |                 if (LIKELY('0' <= c && c <= '9')) { |  | 133 | 20.8k |                     exponent = exponent * 10 + (c - '0'); |  | 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, |  | 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 136 |  |                     // just check overflow of int32_t to simplify the logic |  | 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 |  | 138 | 20.8k |                     if (exponent > std::numeric_limits<int32_t>::max()) { |  | 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; |  | 140 | 0 |                         return 0; |  | 141 | 0 |                     } |  | 142 | 20.8k |                 } else { |  | 143 |  |                     // '123e12abc', '123e1.2' |  | 144 | 0 |                     *result = StringParser::PARSE_FAILURE; |  | 145 | 0 |                     return 0; |  | 146 | 0 |                 } |  | 147 | 20.8k |             } |  | 148 | 11.6k |             if (negative_exponent) { |  | 149 | 3.87k |                 exponent = -exponent; |  | 150 | 3.87k |             } |  | 151 | 11.6k |         } else { |  | 152 | 23 |             *result = StringParser::PARSE_FAILURE; |  | 153 | 23 |             return 0; |  | 154 | 23 |         } |  | 155 | 11.6k |     } |  | 156 | 86.2k |     T int_part_number = 0; |  | 157 | 86.2k |     T frac_part_number = 0; |  | 158 |  |     // TODO: check limit values of exponent and add UT |  | 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, |  | 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 161 |  |     // so int_part_count will be in range of int32_t, |  | 162 |  |     // and int_part_count + exponent will be in range of int64_t |  | 163 | 86.2k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; |  | 164 | 86.2k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |  | 165 | 86.2k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |  | 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; |  | 167 | 0 |         return 0; |  | 168 | 0 |     } |  | 169 | 86.2k |     int actual_int_part_count = tmp_actual_int_part_count; |  | 170 | 86.2k |     int actual_frac_part_count = 0; |  | 171 | 86.2k |     int digit_index = 0; |  | 172 | 86.2k |     if (actual_int_part_count >= 0) { |  | 173 | 83.8k |         int max_index = std::min(actual_int_part_count, total_digit_count); |  | 174 |  |         // skip zero number |  | 175 | 296k |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |  | 176 | 212k |         } |  | 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 |  | 178 |  |         // 0.00000e2147483647 |  | 179 | 83.8k |         if (max_index - digit_index > type_precision - type_scale) { |  | 180 | 10.2k |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 181 | 10.2k |             return 0; |  | 182 | 10.2k |         } |  | 183 |  |         // get int part number |  | 184 | 409k |         for (; digit_index != max_index; ++digit_index) { |  | 185 | 335k |             int_part_number = int_part_number * 10 + digits[digit_index]; |  | 186 | 335k |         } |  | 187 | 73.5k |         if (digit_index != actual_int_part_count) { |  | 188 | 77 |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |  | 189 | 77 |         } |  | 190 | 73.5k |     } else { |  | 191 |  |         // leading zeros of fraction part |  | 192 | 2.39k |         actual_frac_part_count = -actual_int_part_count; |  | 193 | 2.39k |     } |  | 194 |  |     // get fraction part number |  | 195 | 551k |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |  | 196 | 475k |          ++digit_index, ++actual_frac_part_count) { |  | 197 | 475k |         frac_part_number = frac_part_number * 10 + digits[digit_index]; |  | 198 | 475k |     } |  | 199 | 75.9k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |  | 200 |  |     // there are still extra fraction digits left, check rounding |  | 201 | 75.9k |     if (digit_index != total_digit_count) { |  | 202 |  |         // example: test 1.5 -> decimal(1, 0) |  | 203 | 19.8k |         if (digits[digit_index] >= 5) { |  | 204 | 7.95k |             ++frac_part_number; |  | 205 | 7.95k |             if (frac_part_number == type_scale_multiplier) { |  | 206 | 836 |                 frac_part_number = 0; |  | 207 | 836 |                 ++int_part_number; |  | 208 | 836 |             } |  | 209 | 7.95k |         } |  | 210 | 56.0k |     } else { |  | 211 | 56.0k |         if (actual_frac_part_count < type_scale) { |  | 212 | 31.7k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |  | 213 | 31.7k |         } |  | 214 | 56.0k |     } |  | 215 | 75.9k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |  | 216 | 17 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 217 | 17 |         return 0; |  | 218 | 17 |     } |  | 219 |  |  |  | 220 | 75.9k |     T value = int_part_number * type_scale_multiplier + frac_part_number; |  | 221 | 75.9k |     *result = StringParser::PARSE_SUCCESS; |  | 222 | 75.9k |     return is_negative ? T(-value) : T(value); |  | 223 | 75.9k | } | 
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE| Line | Count | Source |  | 46 | 82.8k |         ParseResult* result) { |  | 47 | 82.8k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |  | 48 | 82.8k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |  | 49 | 82.8k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |  | 50 | 82.8k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |  | 51 | 82.8k |                   "wide::Int256."); |  | 52 |  |     // Ignore leading and trailing spaces. |  | 53 | 82.8k |     s = skip_ascii_whitespaces(s, len); |  | 54 |  |  |  | 55 | 82.8k |     bool is_negative = false; |  | 56 | 82.8k |     if (len > 0) { |  | 57 | 82.8k |         switch (*s) { |  | 58 | 21.7k |         case '-': |  | 59 | 21.7k |             is_negative = true; |  | 60 | 21.7k |             [[fallthrough]]; |  | 61 | 28.3k |         case '+': |  | 62 | 28.3k |             ++s; |  | 63 | 28.3k |             --len; |  | 64 | 82.8k |         } |  | 65 | 82.8k |     } |  | 66 |  |     // Ignore leading zeros. |  | 67 | 82.8k |     bool found_value = false; |  | 68 | 157k |     while (len > 0 && UNLIKELY(*s == '0')) { |  | 69 | 74.7k |         found_value = true; |  | 70 | 74.7k |         ++s; |  | 71 | 74.7k |         --len; |  | 72 | 74.7k |     } |  | 73 |  |  |  | 74 | 82.8k |     int found_dot = 0; |  | 75 | 82.8k |     if (len > 0 && *s == '.') { |  | 76 | 25.0k |         found_dot = 1; |  | 77 | 25.0k |         ++s; |  | 78 | 25.0k |         --len; |  | 79 | 25.0k |     } |  | 80 | 82.8k |     int int_part_count = 0; |  | 81 | 82.8k |     std::vector<unsigned char> digits; |  | 82 | 82.8k |     if (len > 0) { |  | 83 | 80.7k |         digits.resize(len); |  | 84 | 80.7k |     } |  | 85 | 82.8k |     int total_digit_count = 0; |  | 86 | 82.8k |     int i = 0; |  | 87 | 2.06M |     for (; i != len; ++i) { |  | 88 | 1.99M |         const char& c = s[i]; |  | 89 | 1.99M |         if (LIKELY('0' <= c && c <= '9')) { |  | 90 | 1.92M |             found_value = true; |  | 91 | 1.92M |             digits[total_digit_count++] = c - '0'; |  | 92 | 1.92M |             if (!found_dot) { |  | 93 | 560k |                 ++int_part_count; |  | 94 | 560k |             } |  | 95 | 1.92M |         } else if (c == '.') { |  | 96 | 49.9k |             if (found_dot) { |  | 97 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 98 | 0 |                 return 0; |  | 99 | 0 |             } |  | 100 | 49.9k |             found_dot = 1; |  | 101 | 49.9k |         } else { |  | 102 | 12.4k |             break; |  | 103 | 12.4k |         } |  | 104 | 1.99M |     } |  | 105 | 82.8k |     if (!found_value) { |  | 106 |  |         // '', '.' |  | 107 | 55 |         *result = StringParser::PARSE_FAILURE; |  | 108 | 55 |         return 0; |  | 109 | 55 |     } |  | 110 |  |     // parse exponent if any |  | 111 | 82.8k |     int64_t exponent = 0; |  | 112 | 82.8k |     if (i != len) { |  | 113 | 12.4k |         bool negative_exponent = false; |  | 114 | 12.4k |         if (s[i] == 'e' || s[i] == 'E') { |  | 115 | 12.4k |             ++i; |  | 116 | 12.4k |             if (i != len) { |  | 117 | 12.4k |                 switch (s[i]) { |  | 118 | 4.66k |                 case '-': |  | 119 | 4.66k |                     negative_exponent = true; |  | 120 | 4.66k |                     [[fallthrough]]; |  | 121 | 4.66k |                 case '+': |  | 122 | 4.66k |                     ++i; |  | 123 | 12.4k |                 } |  | 124 | 12.4k |             } |  | 125 | 12.4k |             if (i == len) { |  | 126 |  |                 // '123e', '123e+', '123e-' |  | 127 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 128 | 0 |                 return 0; |  | 129 | 0 |             } |  | 130 | 35.8k |             for (; i != len; ++i) { |  | 131 | 23.3k |                 const char& c = s[i]; |  | 132 | 23.3k |                 if (LIKELY('0' <= c && c <= '9')) { |  | 133 | 23.3k |                     exponent = exponent * 10 + (c - '0'); |  | 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, |  | 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 136 |  |                     // just check overflow of int32_t to simplify the logic |  | 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 |  | 138 | 23.3k |                     if (exponent > std::numeric_limits<int32_t>::max()) { |  | 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; |  | 140 | 0 |                         return 0; |  | 141 | 0 |                     } |  | 142 | 23.3k |                 } else { |  | 143 |  |                     // '123e12abc', '123e1.2' |  | 144 | 0 |                     *result = StringParser::PARSE_FAILURE; |  | 145 | 0 |                     return 0; |  | 146 | 0 |                 } |  | 147 | 23.3k |             } |  | 148 | 12.4k |             if (negative_exponent) { |  | 149 | 4.66k |                 exponent = -exponent; |  | 150 | 4.66k |             } |  | 151 | 12.4k |         } else { |  | 152 | 12 |             *result = StringParser::PARSE_FAILURE; |  | 153 | 12 |             return 0; |  | 154 | 12 |         } |  | 155 | 12.4k |     } |  | 156 | 82.7k |     T int_part_number = 0; |  | 157 | 82.7k |     T frac_part_number = 0; |  | 158 |  |     // TODO: check limit values of exponent and add UT |  | 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, |  | 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 161 |  |     // so int_part_count will be in range of int32_t, |  | 162 |  |     // and int_part_count + exponent will be in range of int64_t |  | 163 | 82.7k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; |  | 164 | 82.7k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |  | 165 | 82.7k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |  | 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; |  | 167 | 0 |         return 0; |  | 168 | 0 |     } |  | 169 | 82.7k |     int actual_int_part_count = tmp_actual_int_part_count; |  | 170 | 82.7k |     int actual_frac_part_count = 0; |  | 171 | 82.7k |     int digit_index = 0; |  | 172 | 82.7k |     if (actual_int_part_count >= 0) { |  | 173 | 79.6k |         int max_index = std::min(actual_int_part_count, total_digit_count); |  | 174 |  |         // skip zero number |  | 175 | 293k |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |  | 176 | 213k |         } |  | 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 |  | 178 |  |         // 0.00000e2147483647 |  | 179 | 79.6k |         if (max_index - digit_index > type_precision - type_scale) { |  | 180 | 140 |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 181 | 140 |             return 0; |  | 182 | 140 |         } |  | 183 |  |         // get int part number |  | 184 | 598k |         for (; digit_index != max_index; ++digit_index) { |  | 185 | 519k |             int_part_number = int_part_number * 10 + digits[digit_index]; |  | 186 | 519k |         } |  | 187 | 79.4k |         if (digit_index != actual_int_part_count) { |  | 188 | 76 |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |  | 189 | 76 |         } |  | 190 | 79.4k |     } else { |  | 191 |  |         // leading zeros of fraction part |  | 192 | 3.17k |         actual_frac_part_count = -actual_int_part_count; |  | 193 | 3.17k |     } |  | 194 |  |     // get fraction part number |  | 195 | 1.17M |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |  | 196 | 1.08M |          ++digit_index, ++actual_frac_part_count) { |  | 197 | 1.08M |         frac_part_number = frac_part_number * 10 + digits[digit_index]; |  | 198 | 1.08M |     } |  | 199 | 82.6k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |  | 200 |  |     // there are still extra fraction digits left, check rounding |  | 201 | 82.6k |     if (digit_index != total_digit_count) { |  | 202 |  |         // example: test 1.5 -> decimal(1, 0) |  | 203 | 21.5k |         if (digits[digit_index] >= 5) { |  | 204 | 8.03k |             ++frac_part_number; |  | 205 | 8.03k |             if (frac_part_number == type_scale_multiplier) { |  | 206 | 907 |                 frac_part_number = 0; |  | 207 | 907 |                 ++int_part_number; |  | 208 | 907 |             } |  | 209 | 8.03k |         } |  | 210 | 61.1k |     } else { |  | 211 | 61.1k |         if (actual_frac_part_count < type_scale) { |  | 212 | 45.0k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |  | 213 | 45.0k |         } |  | 214 | 61.1k |     } |  | 215 | 82.6k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |  | 216 | 16 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 217 | 16 |         return 0; |  | 218 | 16 |     } |  | 219 |  |  |  | 220 | 82.6k |     T value = int_part_number * type_scale_multiplier + frac_part_number; |  | 221 | 82.6k |     *result = StringParser::PARSE_SUCCESS; |  | 222 | 82.6k |     return is_negative ? T(-value) : T(value); |  | 223 | 82.6k | } | 
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE| Line | Count | Source |  | 46 | 13.5k |         ParseResult* result) { |  | 47 | 13.5k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |  | 48 | 13.5k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |  | 49 | 13.5k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |  | 50 | 13.5k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |  | 51 | 13.5k |                   "wide::Int256."); |  | 52 |  |     // Ignore leading and trailing spaces. |  | 53 | 13.5k |     s = skip_ascii_whitespaces(s, len); |  | 54 |  |  |  | 55 | 13.5k |     bool is_negative = false; |  | 56 | 13.5k |     if (len > 0) { |  | 57 | 13.5k |         switch (*s) { |  | 58 | 6.68k |         case '-': |  | 59 | 6.68k |             is_negative = true; |  | 60 | 6.68k |             [[fallthrough]]; |  | 61 | 6.68k |         case '+': |  | 62 | 6.68k |             ++s; |  | 63 | 6.68k |             --len; |  | 64 | 13.5k |         } |  | 65 | 13.5k |     } |  | 66 |  |     // Ignore leading zeros. |  | 67 | 13.5k |     bool found_value = false; |  | 68 | 52.3k |     while (len > 0 && UNLIKELY(*s == '0')) { |  | 69 | 38.8k |         found_value = true; |  | 70 | 38.8k |         ++s; |  | 71 | 38.8k |         --len; |  | 72 | 38.8k |     } |  | 73 |  |  |  | 74 | 13.5k |     int found_dot = 0; |  | 75 | 13.5k |     if (len > 0 && *s == '.') { |  | 76 | 2.00k |         found_dot = 1; |  | 77 | 2.00k |         ++s; |  | 78 | 2.00k |         --len; |  | 79 | 2.00k |     } |  | 80 | 13.5k |     int int_part_count = 0; |  | 81 | 13.5k |     std::vector<unsigned char> digits; |  | 82 | 13.5k |     if (len > 0) { |  | 83 | 13.5k |         digits.resize(len); |  | 84 | 13.5k |     } |  | 85 | 13.5k |     int total_digit_count = 0; |  | 86 | 13.5k |     int i = 0; |  | 87 | 279k |     for (; i != len; ++i) { |  | 88 | 266k |         const char& c = s[i]; |  | 89 | 266k |         if (LIKELY('0' <= c && c <= '9')) { |  | 90 | 254k |             found_value = true; |  | 91 | 254k |             digits[total_digit_count++] = c - '0'; |  | 92 | 254k |             if (!found_dot) { |  | 93 | 136k |                 ++int_part_count; |  | 94 | 136k |             } |  | 95 | 254k |         } else if (c == '.') { |  | 96 | 11.4k |             if (found_dot) { |  | 97 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 98 | 0 |                 return 0; |  | 99 | 0 |             } |  | 100 | 11.4k |             found_dot = 1; |  | 101 | 11.4k |         } else { |  | 102 | 11 |             break; |  | 103 | 11 |         } |  | 104 | 266k |     } |  | 105 | 13.5k |     if (!found_value) { |  | 106 |  |         // '', '.' |  | 107 | 10 |         *result = StringParser::PARSE_FAILURE; |  | 108 | 10 |         return 0; |  | 109 | 10 |     } |  | 110 |  |     // parse exponent if any |  | 111 | 13.5k |     int64_t exponent = 0; |  | 112 | 13.5k |     if (i != len) { |  | 113 | 1 |         bool negative_exponent = false; |  | 114 | 1 |         if (s[i] == 'e' || s[i] == 'E') { |  | 115 | 0 |             ++i; |  | 116 | 0 |             if (i != len) { |  | 117 | 0 |                 switch (s[i]) { |  | 118 | 0 |                 case '-': |  | 119 | 0 |                     negative_exponent = true; |  | 120 | 0 |                     [[fallthrough]]; |  | 121 | 0 |                 case '+': |  | 122 | 0 |                     ++i; |  | 123 | 0 |                 } |  | 124 | 0 |             } |  | 125 | 0 |             if (i == len) { |  | 126 |  |                 // '123e', '123e+', '123e-' |  | 127 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 128 | 0 |                 return 0; |  | 129 | 0 |             } |  | 130 | 0 |             for (; i != len; ++i) { |  | 131 | 0 |                 const char& c = s[i]; |  | 132 | 0 |                 if (LIKELY('0' <= c && c <= '9')) { |  | 133 | 0 |                     exponent = exponent * 10 + (c - '0'); |  | 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, |  | 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 136 |  |                     // just check overflow of int32_t to simplify the logic |  | 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 |  | 138 | 0 |                     if (exponent > std::numeric_limits<int32_t>::max()) { |  | 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; |  | 140 | 0 |                         return 0; |  | 141 | 0 |                     } |  | 142 | 0 |                 } else { |  | 143 |  |                     // '123e12abc', '123e1.2' |  | 144 | 0 |                     *result = StringParser::PARSE_FAILURE; |  | 145 | 0 |                     return 0; |  | 146 | 0 |                 } |  | 147 | 0 |             } |  | 148 | 0 |             if (negative_exponent) { |  | 149 | 0 |                 exponent = -exponent; |  | 150 | 0 |             } |  | 151 | 1 |         } else { |  | 152 | 1 |             *result = StringParser::PARSE_FAILURE; |  | 153 | 1 |             return 0; |  | 154 | 1 |         } |  | 155 | 1 |     } |  | 156 | 13.5k |     T int_part_number = 0; |  | 157 | 13.5k |     T frac_part_number = 0; |  | 158 |  |     // TODO: check limit values of exponent and add UT |  | 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, |  | 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 161 |  |     // so int_part_count will be in range of int32_t, |  | 162 |  |     // and int_part_count + exponent will be in range of int64_t |  | 163 | 13.5k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; |  | 164 | 13.5k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |  | 165 | 13.5k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |  | 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; |  | 167 | 0 |         return 0; |  | 168 | 0 |     } |  | 169 | 13.5k |     int actual_int_part_count = tmp_actual_int_part_count; |  | 170 | 13.5k |     int actual_frac_part_count = 0; |  | 171 | 13.5k |     int digit_index = 0; |  | 172 | 13.5k |     if (actual_int_part_count >= 0) { |  | 173 | 13.5k |         int max_index = std::min(actual_int_part_count, total_digit_count); |  | 174 |  |         // skip zero number |  | 175 | 13.5k |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |  | 176 | 0 |         } |  | 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 |  | 178 |  |         // 0.00000e2147483647 |  | 179 | 13.5k |         if (max_index - digit_index > type_precision - type_scale) { |  | 180 | 8 |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 181 | 8 |             return 0; |  | 182 | 8 |         } |  | 183 |  |         // get int part number |  | 184 | 149k |         for (; digit_index != max_index; ++digit_index) { |  | 185 | 136k |             int_part_number = int_part_number * 10 + digits[digit_index]; |  | 186 | 136k |         } |  | 187 | 13.5k |         if (digit_index != actual_int_part_count) { |  | 188 | 0 |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |  | 189 | 0 |         } |  | 190 | 13.5k |     } else { |  | 191 |  |         // leading zeros of fraction part |  | 192 | 0 |         actual_frac_part_count = -actual_int_part_count; |  | 193 | 0 |     } |  | 194 |  |     // get fraction part number |  | 195 | 131k |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |  | 196 | 118k |          ++digit_index, ++actual_frac_part_count) { |  | 197 | 118k |         frac_part_number = frac_part_number * 10 + digits[digit_index]; |  | 198 | 118k |     } |  | 199 | 13.5k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |  | 200 |  |     // there are still extra fraction digits left, check rounding |  | 201 | 13.5k |     if (digit_index != total_digit_count) { |  | 202 |  |         // example: test 1.5 -> decimal(1, 0) |  | 203 | 17 |         if (digits[digit_index] >= 5) { |  | 204 | 17 |             ++frac_part_number; |  | 205 | 17 |             if (frac_part_number == type_scale_multiplier) { |  | 206 | 0 |                 frac_part_number = 0; |  | 207 | 0 |                 ++int_part_number; |  | 208 | 0 |             } |  | 209 | 17 |         } |  | 210 | 13.4k |     } else { |  | 211 | 13.4k |         if (actual_frac_part_count < type_scale) { |  | 212 | 1.94k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |  | 213 | 1.94k |         } |  | 214 | 13.4k |     } |  | 215 | 13.5k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |  | 216 | 0 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 217 | 0 |         return 0; |  | 218 | 0 |     } |  | 219 |  |  |  | 220 | 13.5k |     T value = int_part_number * type_scale_multiplier + frac_part_number; |  | 221 | 13.5k |     *result = StringParser::PARSE_SUCCESS; |  | 222 | 13.5k |     return is_negative ? T(-value) : T(value); |  | 223 | 13.5k | } | 
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE| Line | Count | Source |  | 46 | 116k |         ParseResult* result) { |  | 47 | 116k |     using T = typename PrimitiveTypeTraits<P>::CppType::NativeType; |  | 48 | 116k |     static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |  | 49 | 116k |                           std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |  | 50 | 116k |                   "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |  | 51 | 116k |                   "wide::Int256."); |  | 52 |  |     // Ignore leading and trailing spaces. |  | 53 | 116k |     s = skip_ascii_whitespaces(s, len); |  | 54 |  |  |  | 55 | 116k |     bool is_negative = false; |  | 56 | 116k |     if (len > 0) { |  | 57 | 116k |         switch (*s) { |  | 58 | 17.3k |         case '-': |  | 59 | 17.3k |             is_negative = true; |  | 60 | 17.3k |             [[fallthrough]]; |  | 61 | 23.9k |         case '+': |  | 62 | 23.9k |             ++s; |  | 63 | 23.9k |             --len; |  | 64 | 116k |         } |  | 65 | 116k |     } |  | 66 |  |     // Ignore leading zeros. |  | 67 | 116k |     bool found_value = false; |  | 68 | 210k |     while (len > 0 && UNLIKELY(*s == '0')) { |  | 69 | 94.0k |         found_value = true; |  | 70 | 94.0k |         ++s; |  | 71 | 94.0k |         --len; |  | 72 | 94.0k |     } |  | 73 |  |  |  | 74 | 116k |     int found_dot = 0; |  | 75 | 116k |     if (len > 0 && *s == '.') { |  | 76 | 15.8k |         found_dot = 1; |  | 77 | 15.8k |         ++s; |  | 78 | 15.8k |         --len; |  | 79 | 15.8k |     } |  | 80 | 116k |     int int_part_count = 0; |  | 81 | 116k |     std::vector<unsigned char> digits; |  | 82 | 116k |     if (len > 0) { |  | 83 | 114k |         digits.resize(len); |  | 84 | 114k |     } |  | 85 | 116k |     int total_digit_count = 0; |  | 86 | 116k |     int i = 0; |  | 87 | 3.91M |     for (; i != len; ++i) { |  | 88 | 3.87M |         const char& c = s[i]; |  | 89 | 3.87M |         if (LIKELY('0' <= c && c <= '9')) { |  | 90 | 3.69M |             found_value = true; |  | 91 | 3.69M |             digits[total_digit_count++] = c - '0'; |  | 92 | 3.69M |             if (!found_dot) { |  | 93 | 991k |                 ++int_part_count; |  | 94 | 991k |             } |  | 95 | 3.69M |         } else if (c == '.') { |  | 96 | 94.5k |             if (found_dot) { |  | 97 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 98 | 0 |                 return 0; |  | 99 | 0 |             } |  | 100 | 94.5k |             found_dot = 1; |  | 101 | 94.5k |         } else { |  | 102 | 76.9k |             break; |  | 103 | 76.9k |         } |  | 104 | 3.87M |     } |  | 105 | 116k |     if (!found_value) { |  | 106 |  |         // '', '.' |  | 107 | 66 |         *result = StringParser::PARSE_FAILURE; |  | 108 | 66 |         return 0; |  | 109 | 66 |     } |  | 110 |  |     // parse exponent if any |  | 111 | 116k |     int64_t exponent = 0; |  | 112 | 116k |     if (i != len) { |  | 113 | 76.9k |         bool negative_exponent = false; |  | 114 | 76.9k |         if (s[i] == 'e' || s[i] == 'E') { |  | 115 | 76.8k |             ++i; |  | 116 | 76.8k |             if (i != len) { |  | 117 | 76.8k |                 switch (s[i]) { |  | 118 | 1.53k |                 case '-': |  | 119 | 1.53k |                     negative_exponent = true; |  | 120 | 1.53k |                     [[fallthrough]]; |  | 121 | 69.1k |                 case '+': |  | 122 | 69.1k |                     ++i; |  | 123 | 76.8k |                 } |  | 124 | 76.8k |             } |  | 125 | 76.8k |             if (i == len) { |  | 126 |  |                 // '123e', '123e+', '123e-' |  | 127 | 0 |                 *result = StringParser::PARSE_FAILURE; |  | 128 | 0 |                 return 0; |  | 129 | 0 |             } |  | 130 | 229k |             for (; i != len; ++i) { |  | 131 | 152k |                 const char& c = s[i]; |  | 132 | 152k |                 if (LIKELY('0' <= c && c <= '9')) { |  | 133 | 152k |                     exponent = exponent * 10 + (c - '0'); |  | 134 |  |                     // max string len is config::string_type_length_soft_limit_bytes, |  | 135 |  |                     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 136 |  |                     // just check overflow of int32_t to simplify the logic |  | 137 |  |                     // For edge cases like 0.{2147483647 zeros}e+2147483647 |  | 138 | 152k |                     if (exponent > std::numeric_limits<int32_t>::max()) { |  | 139 | 0 |                         *result = StringParser::PARSE_OVERFLOW; |  | 140 | 0 |                         return 0; |  | 141 | 0 |                     } |  | 142 | 152k |                 } else { |  | 143 |  |                     // '123e12abc', '123e1.2' |  | 144 | 10 |                     *result = StringParser::PARSE_FAILURE; |  | 145 | 10 |                     return 0; |  | 146 | 10 |                 } |  | 147 | 152k |             } |  | 148 | 76.8k |             if (negative_exponent) { |  | 149 | 1.53k |                 exponent = -exponent; |  | 150 | 1.53k |             } |  | 151 | 76.8k |         } else { |  | 152 | 20 |             *result = StringParser::PARSE_FAILURE; |  | 153 | 20 |             return 0; |  | 154 | 20 |         } |  | 155 | 76.9k |     } |  | 156 | 116k |     T int_part_number = 0; |  | 157 | 116k |     T frac_part_number = 0; |  | 158 |  |     // TODO: check limit values of exponent and add UT |  | 159 |  |     // max string len is config::string_type_length_soft_limit_bytes, |  | 160 |  |     // whose max value is std::numeric_limits<int32_t>::max() - 4, |  | 161 |  |     // so int_part_count will be in range of int32_t, |  | 162 |  |     // and int_part_count + exponent will be in range of int64_t |  | 163 | 116k |     int64_t tmp_actual_int_part_count = int_part_count + exponent; |  | 164 | 116k |     if (tmp_actual_int_part_count > std::numeric_limits<int>::max() || |  | 165 | 116k |         tmp_actual_int_part_count < std::numeric_limits<int>::min()) { |  | 166 | 0 |         *result = StringParser::PARSE_OVERFLOW; |  | 167 | 0 |         return 0; |  | 168 | 0 |     } |  | 169 | 116k |     int actual_int_part_count = tmp_actual_int_part_count; |  | 170 | 116k |     int actual_frac_part_count = 0; |  | 171 | 116k |     int digit_index = 0; |  | 172 | 116k |     if (actual_int_part_count >= 0) { |  | 173 | 116k |         int max_index = std::min(actual_int_part_count, total_digit_count); |  | 174 |  |         // skip zero number |  | 175 | 332k |         for (; digit_index != max_index && digits[digit_index] == 0; ++digit_index) { |  | 176 | 216k |         } |  | 177 |  |         // test 0.00, .00, 0.{00...}e2147483647 |  | 178 |  |         // 0.00000e2147483647 |  | 179 | 116k |         if (max_index - digit_index > type_precision - type_scale) { |  | 180 | 112 |             *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 181 | 112 |             return 0; |  | 182 | 112 |         } |  | 183 |  |         // get int part number |  | 184 | 2.11M |         for (; digit_index != max_index; ++digit_index) { |  | 185 | 1.99M |             int_part_number = int_part_number * 10 + digits[digit_index]; |  | 186 | 1.99M |         } |  | 187 | 116k |         if (digit_index != actual_int_part_count) { |  | 188 | 65.5k |             int_part_number *= get_scale_multiplier<T>(actual_int_part_count - digit_index); |  | 189 | 65.5k |         } |  | 190 | 116k |     } else { |  | 191 |  |         // leading zeros of fraction part |  | 192 | 48 |         actual_frac_part_count = -actual_int_part_count; |  | 193 | 48 |     } |  | 194 |  |     // get fraction part number |  | 195 | 1.49M |     for (; digit_index != total_digit_count && actual_frac_part_count < type_scale; |  | 196 | 1.37M |          ++digit_index, ++actual_frac_part_count) { |  | 197 | 1.37M |         frac_part_number = frac_part_number * 10 + digits[digit_index]; |  | 198 | 1.37M |     } |  | 199 | 116k |     auto type_scale_multiplier = get_scale_multiplier<T>(type_scale); |  | 200 |  |     // there are still extra fraction digits left, check rounding |  | 201 | 116k |     if (digit_index != total_digit_count) { |  | 202 |  |         // example: test 1.5 -> decimal(1, 0) |  | 203 | 18.8k |         if (digits[digit_index] >= 5) { |  | 204 | 7.94k |             ++frac_part_number; |  | 205 | 7.94k |             if (frac_part_number == type_scale_multiplier) { |  | 206 | 836 |                 frac_part_number = 0; |  | 207 | 836 |                 ++int_part_number; |  | 208 | 836 |             } |  | 209 | 7.94k |         } |  | 210 | 97.6k |     } else { |  | 211 | 97.6k |         if (actual_frac_part_count < type_scale) { |  | 212 | 88.2k |             frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count); |  | 213 | 88.2k |         } |  | 214 | 97.6k |     } |  | 215 | 116k |     if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) { |  | 216 | 16 |         *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW; |  | 217 | 16 |         return 0; |  | 218 | 16 |     } |  | 219 |  |  |  | 220 | 116k |     T value = int_part_number * type_scale_multiplier + frac_part_number; |  | 221 | 116k |     *result = StringParser::PARSE_SUCCESS; |  | 222 | 116k |     return is_negative ? T(-value) : T(value); |  | 223 | 116k | } | 
 | 
| 224 |  | template vectorized::Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>( | 
| 225 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 226 |  |         ParseResult* result); | 
| 227 |  | template vectorized::Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>( | 
| 228 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 229 |  |         ParseResult* result); | 
| 230 |  | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>( | 
| 231 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 232 |  |         ParseResult* result); | 
| 233 |  | template vectorized::Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>( | 
| 234 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 235 |  |         ParseResult* result); | 
| 236 |  | template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>( | 
| 237 |  |         const char* __restrict s, size_t len, int type_precision, int type_scale, | 
| 238 |  |         ParseResult* result); | 
| 239 |  | } // end namespace doris | 
| 240 |  | #include "common/compile_check_avoid_end.h" |