/root/doris/be/src/util/string_parser.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | |
27 | | #include <cstdlib> |
28 | | // IWYU pragma: no_include <bits/std_abs.h> |
29 | | #include <cmath> // IWYU pragma: keep |
30 | | #include <cstdint> |
31 | | #include <limits> |
32 | | #include <map> |
33 | | #include <string> |
34 | | #include <system_error> |
35 | | #include <type_traits> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/compiler_util.h" // IWYU pragma: keep |
39 | | #include "common/status.h" |
40 | | #include "runtime/large_int_value.h" |
41 | | #include "runtime/primitive_type.h" |
42 | | #include "vec/common/int_exp.h" |
43 | | #include "vec/common/string_utils/string_utils.h" |
44 | | #include "vec/core/extended_types.h" |
45 | | #include "vec/core/wide_integer.h" |
46 | | #include "vec/data_types/data_type_decimal.h" |
47 | | #include "vec/data_types/number_traits.h" |
48 | | |
49 | | namespace doris { |
50 | | namespace vectorized { |
51 | | template <DecimalNativeTypeConcept T> |
52 | | struct Decimal; |
53 | | } // namespace vectorized |
54 | | |
55 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
56 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
57 | | // |
58 | | // Strings with leading and trailing whitespaces are accepted. |
59 | | // Branching is heavily optimized for the non-whitespace successful case. |
60 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
61 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
62 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
63 | | // |
64 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
65 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
66 | | // and inf/-inf for float types. |
67 | | // |
68 | | // Things we tried that did not work: |
69 | | // - lookup table for converting character to digit |
70 | | // Improvements (TODO): |
71 | | // - Validate input using _sidd_compare_ranges |
72 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
73 | | class StringParser { |
74 | | public: |
75 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
76 | | |
77 | | template <typename T> |
78 | 568k | static T numeric_limits(bool negative) { |
79 | 568k | if constexpr (std::is_same_v<T, __int128>) { |
80 | 521k | return negative ? MIN_INT128 : MAX_INT128; |
81 | 521k | } else { |
82 | 521k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
83 | 521k | } |
84 | 568k | } _ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 78 | 268k | static T numeric_limits(bool negative) { | 79 | 268k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 268k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 268k | } else { | 82 | 268k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 268k | } | 84 | 268k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 78 | 88.6k | static T numeric_limits(bool negative) { | 79 | 88.6k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 88.6k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 88.6k | } else { | 82 | 88.6k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 88.6k | } | 84 | 88.6k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 78 | 77.3k | static T numeric_limits(bool negative) { | 79 | 77.3k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 77.3k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 77.3k | } else { | 82 | 77.3k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 77.3k | } | 84 | 77.3k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 78 | 65.9k | static T numeric_limits(bool negative) { | 79 | 65.9k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 65.9k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 65.9k | } else { | 82 | 65.9k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 65.9k | } | 84 | 65.9k | } |
_ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 78 | 46.8k | static T numeric_limits(bool negative) { | 79 | 46.8k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 46.8k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 46.8k | } else { | 82 | 46.8k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 46.8k | } | 84 | 46.8k | } |
_ZN5doris12StringParser14numeric_limitsIhEET_b Line | Count | Source | 78 | 19.9k | static T numeric_limits(bool negative) { | 79 | 19.9k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 19.9k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 19.9k | } else { | 82 | 19.9k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 19.9k | } | 84 | 19.9k | } |
_ZN5doris12StringParser14numeric_limitsItEET_b Line | Count | Source | 78 | 672 | static T numeric_limits(bool negative) { | 79 | 672 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 672 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 672 | } else { | 82 | 672 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 672 | } | 84 | 672 | } |
_ZN5doris12StringParser14numeric_limitsIjEET_b Line | Count | Source | 78 | 780 | static T numeric_limits(bool negative) { | 79 | 780 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 780 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 780 | } else { | 82 | 780 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 780 | } | 84 | 780 | } |
_ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 78 | 729 | static T numeric_limits(bool negative) { | 79 | 729 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 729 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 729 | } else { | 82 | 729 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 729 | } | 84 | 729 | } |
_ZN5doris12StringParser14numeric_limitsIoEET_b Line | Count | Source | 78 | 4 | static T numeric_limits(bool negative) { | 79 | 4 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 4 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 4 | } else { | 82 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 4 | } | 84 | 4 | } |
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Line | Count | Source | 78 | 4 | static T numeric_limits(bool negative) { | 79 | 4 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 4 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 4 | } else { | 82 | 4 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 4 | } | 84 | 4 | } |
|
85 | | |
86 | | template <typename T> |
87 | 247k | static T get_scale_multiplier(int scale) { |
88 | 247k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
89 | 247k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
90 | 247k | "You can only instantiate as int32_t, int64_t, __int128."); |
91 | 247k | if constexpr (std::is_same_v<T, int32_t>) { |
92 | 225k | return common::exp10_i32(scale); |
93 | 225k | } else if constexpr (std::is_same_v<T, int64_t>) { |
94 | 198k | return common::exp10_i64(scale); |
95 | 198k | } else if constexpr (std::is_same_v<T, __int128>) { |
96 | 154k | return common::exp10_i128(scale); |
97 | 154k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
98 | 154k | return common::exp10_i256(scale); |
99 | 154k | } |
100 | 247k | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 87 | 21.6k | static T get_scale_multiplier(int scale) { | 88 | 21.6k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 21.6k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 21.6k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 21.6k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 21.6k | return common::exp10_i32(scale); | 93 | 21.6k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 21.6k | return common::exp10_i64(scale); | 95 | 21.6k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 21.6k | return common::exp10_i128(scale); | 97 | 21.6k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 21.6k | return common::exp10_i256(scale); | 99 | 21.6k | } | 100 | 21.6k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 87 | 26.6k | static T get_scale_multiplier(int scale) { | 88 | 26.6k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 26.6k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 26.6k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 26.6k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 26.6k | return common::exp10_i32(scale); | 93 | 26.6k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 26.6k | return common::exp10_i64(scale); | 95 | 26.6k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 26.6k | return common::exp10_i128(scale); | 97 | 26.6k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 26.6k | return common::exp10_i256(scale); | 99 | 26.6k | } | 100 | 26.6k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 87 | 44.3k | static T get_scale_multiplier(int scale) { | 88 | 44.3k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 44.3k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 44.3k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 44.3k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 44.3k | return common::exp10_i32(scale); | 93 | 44.3k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 44.3k | return common::exp10_i64(scale); | 95 | 44.3k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 44.3k | return common::exp10_i128(scale); | 97 | 44.3k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 44.3k | return common::exp10_i256(scale); | 99 | 44.3k | } | 100 | 44.3k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 87 | 154k | static T get_scale_multiplier(int scale) { | 88 | 154k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 154k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 154k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 154k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 154k | return common::exp10_i32(scale); | 93 | 154k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 154k | return common::exp10_i64(scale); | 95 | 154k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 154k | return common::exp10_i128(scale); | 97 | 154k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 154k | return common::exp10_i256(scale); | 99 | 154k | } | 100 | 154k | } |
|
101 | | |
102 | | // This is considerably faster than glibc's implementation (25x). |
103 | | // In the case of overflow, the max/min value for the data type will be returned. |
104 | | // Assumes s represents a decimal number. |
105 | | template <typename T> |
106 | 370k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
107 | 370k | T ans = string_to_int_internal<T>(s, len, result); |
108 | 370k | if (LIKELY(*result == PARSE_SUCCESS)) { |
109 | 317k | return ans; |
110 | 317k | } |
111 | | |
112 | 53.1k | int i = skip_leading_whitespace(s, len); |
113 | 53.1k | return string_to_int_internal<T>(s + i, len - i, result); |
114 | 370k | } _ZN5doris12StringParser13string_to_intIlEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 83.5k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 83.5k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 83.5k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 79.6k | return ans; | 110 | 79.6k | } | 111 | | | 112 | 3.97k | int i = skip_leading_whitespace(s, len); | 113 | 3.97k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 83.5k | } |
_ZN5doris12StringParser13string_to_intIaEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 97.5k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 97.5k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 97.5k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 68.8k | return ans; | 110 | 68.8k | } | 111 | | | 112 | 28.7k | int i = skip_leading_whitespace(s, len); | 113 | 28.7k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 97.5k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 68.3k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 68.3k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 68.3k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 60.4k | return ans; | 110 | 60.4k | } | 111 | | | 112 | 7.84k | int i = skip_leading_whitespace(s, len); | 113 | 7.84k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 68.3k | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 60.9k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 60.9k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 60.9k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 57.0k | return ans; | 110 | 57.0k | } | 111 | | | 112 | 3.91k | int i = skip_leading_whitespace(s, len); | 113 | 3.91k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 60.9k | } |
_ZN5doris12StringParser13string_to_intInEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 46.2k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 46.2k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 46.2k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 45.5k | return ans; | 110 | 45.5k | } | 111 | | | 112 | 663 | int i = skip_leading_whitespace(s, len); | 113 | 663 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 46.2k | } |
_ZN5doris12StringParser13string_to_intIhEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 11.9k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 11.9k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 11.9k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 3.89k | return ans; | 110 | 3.89k | } | 111 | | | 112 | 8.02k | int i = skip_leading_whitespace(s, len); | 113 | 8.02k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 11.9k | } |
_ZN5doris12StringParser13string_to_intItEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 669 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 669 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 669 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 666 | return ans; | 110 | 666 | } | 111 | | | 112 | 3 | int i = skip_leading_whitespace(s, len); | 113 | 3 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 669 | } |
_ZN5doris12StringParser13string_to_intIjEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 777 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 777 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 777 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 774 | return ans; | 110 | 774 | } | 111 | | | 112 | 3 | int i = skip_leading_whitespace(s, len); | 113 | 3 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 777 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 725 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 725 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 725 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 722 | return ans; | 110 | 722 | } | 111 | | | 112 | 3 | int i = skip_leading_whitespace(s, len); | 113 | 3 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 725 | } |
_ZN5doris12StringParser13string_to_intIoEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 4 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 4 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 4 | return ans; | 110 | 4 | } | 111 | | | 112 | 0 | int i = skip_leading_whitespace(s, len); | 113 | 0 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 4 | } |
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEEEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 4 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 4 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 4 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 4 | return ans; | 110 | 4 | } | 111 | | | 112 | 0 | int i = skip_leading_whitespace(s, len); | 113 | 0 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 4 | } |
|
115 | | |
116 | | // This is considerably faster than glibc's implementation. |
117 | | // In the case of overflow, the max/min value for the data type will be returned. |
118 | | // Assumes s represents a decimal number. |
119 | | template <typename T> |
120 | 1.38k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
121 | 1.38k | T ans = string_to_unsigned_int_internal<T>(s, len, result); |
122 | 1.38k | if (LIKELY(*result == PARSE_SUCCESS)) { |
123 | 92 | return ans; |
124 | 92 | } |
125 | | |
126 | 1.29k | int i = skip_leading_whitespace(s, len); |
127 | 1.29k | return string_to_unsigned_int_internal<T>(s + i, len - i, result); |
128 | 1.38k | } _ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 355 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 355 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 355 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 29 | return ans; | 124 | 29 | } | 125 | | | 126 | 326 | int i = skip_leading_whitespace(s, len); | 127 | 326 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 355 | } |
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
|
129 | | |
130 | | // Convert a string s representing a number in given base into a decimal number. |
131 | | template <typename T> |
132 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
133 | 27.8k | ParseResult* result) { |
134 | 27.8k | T ans = string_to_int_internal<T>(s, len, base, result); |
135 | 27.8k | if (LIKELY(*result == PARSE_SUCCESS)) { |
136 | 2.06k | return ans; |
137 | 2.06k | } |
138 | | |
139 | 25.7k | int i = skip_leading_whitespace(s, len); |
140 | 25.7k | return string_to_int_internal<T>(s + i, len - i, base, result); |
141 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 26.4k | ParseResult* result) { | 134 | 26.4k | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 26.4k | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 1.91k | return ans; | 137 | 1.91k | } | 138 | | | 139 | 24.5k | int i = skip_leading_whitespace(s, len); | 140 | 24.5k | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 490 | ParseResult* result) { | 134 | 490 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 490 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 56 | return ans; | 137 | 56 | } | 138 | | | 139 | 434 | int i = skip_leading_whitespace(s, len); | 140 | 434 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 441 | ParseResult* result) { | 134 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 49 | return ans; | 137 | 49 | } | 138 | | | 139 | 392 | int i = skip_leading_whitespace(s, len); | 140 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 441 | ParseResult* result) { | 134 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 49 | return ans; | 137 | 49 | } | 138 | | | 139 | 392 | int i = skip_leading_whitespace(s, len); | 140 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 1 | ParseResult* result) { | 134 | 1 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 1 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 1 | return ans; | 137 | 1 | } | 138 | | | 139 | 0 | int i = skip_leading_whitespace(s, len); | 140 | 0 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 1 | } |
|
142 | | |
143 | | template <typename T> |
144 | 137k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
145 | 137k | return string_to_float_internal<T>(s, len, result); |
146 | 137k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 144 | 73.8k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 145 | 73.8k | return string_to_float_internal<T>(s, len, result); | 146 | 73.8k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 144 | 63.8k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 145 | 63.8k | return string_to_float_internal<T>(s, len, result); | 146 | 63.8k | } |
|
147 | | |
148 | | // Parses a string for 'true' or 'false', case insensitive. |
149 | 8.31k | static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) { |
150 | 8.31k | bool ans = string_to_bool_internal(s, len, result); |
151 | 8.31k | if (LIKELY(*result == PARSE_SUCCESS)) { |
152 | 7.28k | return ans; |
153 | 7.28k | } |
154 | | |
155 | 1.03k | int i = skip_leading_whitespace(s, len); |
156 | 1.03k | return string_to_bool_internal(s + i, len - i, result); |
157 | 8.31k | } |
158 | | |
159 | | template <PrimitiveType P, typename T = PrimitiveTypeTraits<P>::CppType::NativeType, |
160 | | typename DecimalType = PrimitiveTypeTraits<P>::ColumnType::value_type> |
161 | | static inline T string_to_decimal(const char* __restrict s, int len, int type_precision, |
162 | | int type_scale, ParseResult* result); |
163 | | |
164 | | template <typename T> |
165 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
166 | | const T key_value_separator, |
167 | | std::map<std::string, std::string>* result) { |
168 | | int key_pos = 0; |
169 | | int key_end; |
170 | | int val_pos; |
171 | | int val_end; |
172 | | |
173 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
174 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
175 | | std::string::npos) { |
176 | | break; |
177 | | } |
178 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
179 | | val_end = base.size(); |
180 | | } |
181 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
182 | | base.substr(val_pos, val_end - val_pos))); |
183 | | key_pos = val_end; |
184 | | if (key_pos != std::string::npos) { |
185 | | ++key_pos; |
186 | | } |
187 | | } |
188 | | |
189 | | return Status::OK(); |
190 | | } |
191 | | |
192 | | private: |
193 | | // This is considerably faster than glibc's implementation. |
194 | | // In the case of overflow, the max/min value for the data type will be returned. |
195 | | // Assumes s represents a decimal number. |
196 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
197 | | template <typename T> |
198 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
199 | | |
200 | | // This is considerably faster than glibc's implementation. |
201 | | // In the case of overflow, the max/min value for the data type will be returned. |
202 | | // Assumes s represents a decimal number. |
203 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
204 | | template <typename T> |
205 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
206 | | ParseResult* result); |
207 | | |
208 | | // Convert a string s representing a number in given base into a decimal number. |
209 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
210 | | template <typename T> |
211 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
212 | | ParseResult* result); |
213 | | |
214 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
215 | | // and the number is positive. |
216 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
217 | | template <typename T> |
218 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
219 | | ParseResult* result); |
220 | | |
221 | | // This is considerably faster than glibc's implementation (>100x why???) |
222 | | // No special case handling needs to be done for overflows, the floating point spec |
223 | | // already does it and will cap the values to -inf/inf |
224 | | // To avoid inaccurate conversions this function falls back to strtod for |
225 | | // scientific notation. |
226 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
227 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
228 | | template <typename T> |
229 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
230 | | ParseResult* result); |
231 | | |
232 | | // parses a string for 'true' or 'false', case insensitive |
233 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
234 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
235 | | ParseResult* result); |
236 | | |
237 | | // Returns true if s only contains whitespace. |
238 | 33.8k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
239 | 108k | for (int i = 0; i < len; ++i) { |
240 | 76.6k | if (!LIKELY(is_whitespace(s[i]))) { |
241 | 1.77k | return false; |
242 | 1.77k | } |
243 | 76.6k | } |
244 | 32.0k | return true; |
245 | 33.8k | } |
246 | | |
247 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
248 | 1.49k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
249 | 1.49k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
250 | 1.49k | } |
251 | | |
252 | 826 | static inline bool is_all_digit(const char* __restrict s, int len) { |
253 | 1.65k | for (int i = 0; i < len; ++i) { |
254 | 837 | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
255 | 11 | return false; |
256 | 11 | } |
257 | 837 | } |
258 | 815 | return true; |
259 | 826 | } |
260 | | |
261 | | // Returns the position of the first non-whitespace character in s. |
262 | 81.2k | static inline int skip_leading_whitespace(const char* __restrict s, int len) { |
263 | 81.2k | int i = 0; |
264 | 234k | while (i < len && is_whitespace(s[i])) { |
265 | 153k | ++i; |
266 | 153k | } |
267 | 81.2k | return i; |
268 | 81.2k | } |
269 | | |
270 | | // Our own definition of "isspace" that optimize on the ' ' branch. |
271 | 1.17M | static inline bool is_whitespace(const char& c) { |
272 | 1.17M | return LIKELY(c == ' ') || |
273 | 1.17M | UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); |
274 | 1.17M | } |
275 | | |
276 | | }; // end of class StringParser |
277 | | |
278 | | template <typename T> |
279 | 501k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
280 | 501k | if (UNLIKELY(len <= 0)) { |
281 | 242 | *result = PARSE_FAILURE; |
282 | 242 | return 0; |
283 | 242 | } |
284 | | |
285 | 500k | typedef typename std::make_unsigned<T>::type UnsignedT; |
286 | 500k | UnsignedT val = 0; |
287 | 500k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
288 | 500k | bool negative = false; |
289 | 500k | int i = 0; |
290 | 500k | switch (*s) { |
291 | 107k | case '-': |
292 | 107k | negative = true; |
293 | 107k | max_val += 1; |
294 | 107k | [[fallthrough]]; |
295 | 178k | case '+': |
296 | 178k | ++i; |
297 | | // only one '+'/'-' char, so could return failure directly |
298 | 178k | if (UNLIKELY(len == 1)) { |
299 | 0 | *result = PARSE_FAILURE; |
300 | 0 | return 0; |
301 | 0 | } |
302 | 500k | } |
303 | | |
304 | | // This is the fast path where the string cannot overflow. |
305 | 500k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
306 | 327k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); |
307 | 327k | return static_cast<T>(negative ? -val : val); |
308 | 327k | } |
309 | | |
310 | 173k | const T max_div_10 = max_val / 10; |
311 | 173k | const T max_mod_10 = max_val % 10; |
312 | | |
313 | 173k | int first = i; |
314 | 1.63M | for (; i < len; ++i) { |
315 | 1.56M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
316 | 1.51M | T digit = s[i] - '0'; |
317 | | // This is a tricky check to see if adding this digit will cause an overflow. |
318 | 1.51M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
319 | 46.3k | *result = PARSE_OVERFLOW; |
320 | 46.3k | return negative ? -max_val : max_val; |
321 | 46.3k | } |
322 | 1.46M | val = val * 10 + digit; |
323 | 1.46M | } else { |
324 | 52.1k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && |
325 | 52.1k | !is_float_suffix(s + i, len - i))))) { |
326 | | // Reject the string because either the first char was not a digit, |
327 | | // or the remaining chars are not all whitespace |
328 | 40.7k | *result = PARSE_FAILURE; |
329 | 40.7k | return 0; |
330 | 40.7k | } |
331 | | // Returning here is slightly faster than breaking the loop. |
332 | 11.4k | *result = PARSE_SUCCESS; |
333 | 11.4k | return static_cast<T>(negative ? -val : val); |
334 | 52.1k | } |
335 | 1.56M | } |
336 | 74.6k | *result = PARSE_SUCCESS; |
337 | 74.6k | return static_cast<T>(negative ? -val : val); |
338 | 173k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 203k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 203k | if (UNLIKELY(len <= 0)) { | 281 | 206 | *result = PARSE_FAILURE; | 282 | 206 | return 0; | 283 | 206 | } | 284 | | | 285 | 203k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 203k | UnsignedT val = 0; | 287 | 203k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 203k | bool negative = false; | 289 | 203k | int i = 0; | 290 | 203k | switch (*s) { | 291 | 28.1k | case '-': | 292 | 28.1k | negative = true; | 293 | 28.1k | max_val += 1; | 294 | 28.1k | [[fallthrough]]; | 295 | 99.6k | case '+': | 296 | 99.6k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 99.6k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 203k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 203k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 140k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 140k | return static_cast<T>(negative ? -val : val); | 308 | 140k | } | 309 | | | 310 | 63.0k | const T max_div_10 = max_val / 10; | 311 | 63.0k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 63.0k | int first = i; | 314 | 150k | for (; i < len; ++i) { | 315 | 143k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 108k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 108k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 21.0k | *result = PARSE_OVERFLOW; | 320 | 21.0k | return negative ? -max_val : max_val; | 321 | 21.0k | } | 322 | 87.9k | val = val * 10 + digit; | 323 | 87.9k | } else { | 324 | 34.9k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 34.9k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 23.9k | *result = PARSE_FAILURE; | 329 | 23.9k | return 0; | 330 | 23.9k | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 11.0k | *result = PARSE_SUCCESS; | 333 | 11.0k | return static_cast<T>(negative ? -val : val); | 334 | 34.9k | } | 335 | 143k | } | 336 | 7.01k | *result = PARSE_SUCCESS; | 337 | 7.01k | return static_cast<T>(negative ? -val : val); | 338 | 63.0k | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 87.5k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 87.5k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 87.5k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 87.5k | UnsignedT val = 0; | 287 | 87.5k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 87.5k | bool negative = false; | 289 | 87.5k | int i = 0; | 290 | 87.5k | switch (*s) { | 291 | 51.9k | case '-': | 292 | 51.9k | negative = true; | 293 | 51.9k | max_val += 1; | 294 | 51.9k | [[fallthrough]]; | 295 | 52.0k | case '+': | 296 | 52.0k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 52.0k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 87.5k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 87.5k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 33.0k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 33.0k | return static_cast<T>(negative ? -val : val); | 308 | 33.0k | } | 309 | | | 310 | 54.5k | const T max_div_10 = max_val / 10; | 311 | 54.5k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 54.5k | int first = i; | 314 | 1.06M | for (; i < len; ++i) { | 315 | 1.01M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 1.01M | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 1.01M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 5.02k | *result = PARSE_OVERFLOW; | 320 | 5.02k | return negative ? -max_val : max_val; | 321 | 5.02k | } | 322 | 1.01M | val = val * 10 + digit; | 323 | 1.01M | } else { | 324 | 1.03k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 1.03k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 949 | *result = PARSE_FAILURE; | 329 | 949 | return 0; | 330 | 949 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 84 | *result = PARSE_SUCCESS; | 333 | 84 | return static_cast<T>(negative ? -val : val); | 334 | 1.03k | } | 335 | 1.01M | } | 336 | 48.4k | *result = PARSE_SUCCESS; | 337 | 48.4k | return static_cast<T>(negative ? -val : val); | 338 | 54.5k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 76.1k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 76.1k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 76.1k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 76.1k | UnsignedT val = 0; | 287 | 76.1k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 76.1k | bool negative = false; | 289 | 76.1k | int i = 0; | 290 | 76.1k | switch (*s) { | 291 | 12.9k | case '-': | 292 | 12.9k | negative = true; | 293 | 12.9k | max_val += 1; | 294 | 12.9k | [[fallthrough]]; | 295 | 12.9k | case '+': | 296 | 12.9k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 12.9k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 76.1k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 76.1k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 52.8k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 52.8k | return static_cast<T>(negative ? -val : val); | 308 | 52.8k | } | 309 | | | 310 | 23.3k | const T max_div_10 = max_val / 10; | 311 | 23.3k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 23.3k | int first = i; | 314 | 122k | for (; i < len; ++i) { | 315 | 113k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 112k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 112k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 13.2k | *result = PARSE_OVERFLOW; | 320 | 13.2k | return negative ? -max_val : max_val; | 321 | 13.2k | } | 322 | 99.5k | val = val * 10 + digit; | 323 | 99.5k | } else { | 324 | 975 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 975 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 765 | *result = PARSE_FAILURE; | 329 | 765 | return 0; | 330 | 765 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 210 | *result = PARSE_SUCCESS; | 333 | 210 | return static_cast<T>(negative ? -val : val); | 334 | 975 | } | 335 | 113k | } | 336 | 9.14k | *result = PARSE_SUCCESS; | 337 | 9.14k | return static_cast<T>(negative ? -val : val); | 338 | 23.3k | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 64.9k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 64.9k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 64.9k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 64.9k | UnsignedT val = 0; | 287 | 64.9k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 64.9k | bool negative = false; | 289 | 64.9k | int i = 0; | 290 | 64.9k | switch (*s) { | 291 | 10.1k | case '-': | 292 | 10.1k | negative = true; | 293 | 10.1k | max_val += 1; | 294 | 10.1k | [[fallthrough]]; | 295 | 10.2k | case '+': | 296 | 10.2k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 10.2k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 64.9k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 64.9k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 52.7k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 52.7k | return static_cast<T>(negative ? -val : val); | 308 | 52.7k | } | 309 | | | 310 | 12.1k | const T max_div_10 = max_val / 10; | 311 | 12.1k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 12.1k | int first = i; | 314 | 124k | for (; i < len; ++i) { | 315 | 118k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 117k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 117k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 5.61k | *result = PARSE_OVERFLOW; | 320 | 5.61k | return negative ? -max_val : max_val; | 321 | 5.61k | } | 322 | 112k | val = val * 10 + digit; | 323 | 112k | } else { | 324 | 509 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 509 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 383 | *result = PARSE_FAILURE; | 329 | 383 | return 0; | 330 | 383 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 126 | *result = PARSE_SUCCESS; | 333 | 126 | return static_cast<T>(negative ? -val : val); | 334 | 509 | } | 335 | 118k | } | 336 | 6.04k | *result = PARSE_SUCCESS; | 337 | 6.04k | return static_cast<T>(negative ? -val : val); | 338 | 12.1k | } |
_ZN5doris12StringParser22string_to_int_internalInEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 46.8k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 46.8k | if (UNLIKELY(len <= 0)) { | 281 | 36 | *result = PARSE_FAILURE; | 282 | 36 | return 0; | 283 | 36 | } | 284 | | | 285 | 46.8k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 46.8k | UnsignedT val = 0; | 287 | 46.8k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 46.8k | bool negative = false; | 289 | 46.8k | int i = 0; | 290 | 46.8k | switch (*s) { | 291 | 3.15k | case '-': | 292 | 3.15k | negative = true; | 293 | 3.15k | max_val += 1; | 294 | 3.15k | [[fallthrough]]; | 295 | 3.15k | case '+': | 296 | 3.15k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 3.15k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 46.8k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 46.8k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 43.0k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 43.0k | return static_cast<T>(negative ? -val : val); | 308 | 43.0k | } | 309 | | | 310 | 3.83k | const T max_div_10 = max_val / 10; | 311 | 3.83k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 3.83k | int first = i; | 314 | 150k | for (; i < len; ++i) { | 315 | 146k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 146k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 146k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 6 | *result = PARSE_OVERFLOW; | 320 | 6 | return negative ? -max_val : max_val; | 321 | 6 | } | 322 | 146k | val = val * 10 + digit; | 323 | 146k | } else { | 324 | 106 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 106 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 106 | *result = PARSE_FAILURE; | 329 | 106 | return 0; | 330 | 106 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 106 | } | 335 | 146k | } | 336 | 3.72k | *result = PARSE_SUCCESS; | 337 | 3.72k | return static_cast<T>(negative ? -val : val); | 338 | 3.83k | } |
_ZN5doris12StringParser22string_to_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 19.9k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 19.9k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 19.9k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 19.9k | UnsignedT val = 0; | 287 | 19.9k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 19.9k | bool negative = false; | 289 | 19.9k | int i = 0; | 290 | 19.9k | switch (*s) { | 291 | 862 | case '-': | 292 | 862 | negative = true; | 293 | 862 | max_val += 1; | 294 | 862 | [[fallthrough]]; | 295 | 862 | case '+': | 296 | 862 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 862 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 19.9k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 19.9k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 3.89k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 3.89k | return static_cast<T>(negative ? -val : val); | 308 | 3.89k | } | 309 | | | 310 | 16.0k | const T max_div_10 = max_val / 10; | 311 | 16.0k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 16.0k | int first = i; | 314 | 18.2k | for (; i < len; ++i) { | 315 | 18.2k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 3.69k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 3.69k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 1.46k | *result = PARSE_OVERFLOW; | 320 | 1.46k | return negative ? -max_val : max_val; | 321 | 1.46k | } | 322 | 2.23k | val = val * 10 + digit; | 323 | 14.5k | } else { | 324 | 14.5k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 14.5k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 14.5k | *result = PARSE_FAILURE; | 329 | 14.5k | return 0; | 330 | 14.5k | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 14.5k | } | 335 | 18.2k | } | 336 | 12 | *result = PARSE_SUCCESS; | 337 | 12 | return static_cast<T>(negative ? -val : val); | 338 | 16.0k | } |
_ZN5doris12StringParser22string_to_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 672 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 672 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 672 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 672 | UnsignedT val = 0; | 287 | 672 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 672 | bool negative = false; | 289 | 672 | int i = 0; | 290 | 672 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 672 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 672 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 600 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 600 | return static_cast<T>(negative ? -val : val); | 308 | 600 | } | 309 | | | 310 | 72 | const T max_div_10 = max_val / 10; | 311 | 72 | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 72 | int first = i; | 314 | 432 | for (; i < len; ++i) { | 315 | 360 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 360 | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 360 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 360 | val = val * 10 + digit; | 323 | 360 | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 360 | } | 336 | 72 | *result = PARSE_SUCCESS; | 337 | 72 | return static_cast<T>(negative ? -val : val); | 338 | 72 | } |
_ZN5doris12StringParser22string_to_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 780 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 780 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 780 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 780 | UnsignedT val = 0; | 287 | 780 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 780 | bool negative = false; | 289 | 780 | int i = 0; | 290 | 780 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 780 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 780 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 654 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 654 | return static_cast<T>(negative ? -val : val); | 308 | 654 | } | 309 | | | 310 | 126 | const T max_div_10 = max_val / 10; | 311 | 126 | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 126 | int first = i; | 314 | 1.38k | for (; i < len; ++i) { | 315 | 1.26k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 1.26k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 1.26k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 1.26k | val = val * 10 + digit; | 323 | 1.26k | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 1.26k | } | 336 | 126 | *result = PARSE_SUCCESS; | 337 | 126 | return static_cast<T>(negative ? -val : val); | 338 | 126 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 728 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 728 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 728 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 728 | UnsignedT val = 0; | 287 | 728 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 728 | bool negative = false; | 289 | 728 | int i = 0; | 290 | 728 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 728 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 728 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 692 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 692 | return static_cast<T>(negative ? -val : val); | 308 | 692 | } | 309 | | | 310 | 36 | const T max_div_10 = max_val / 10; | 311 | 36 | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 36 | int first = i; | 314 | 756 | for (; i < len; ++i) { | 315 | 720 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 720 | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 720 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 720 | val = val * 10 + digit; | 323 | 720 | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 720 | } | 336 | 36 | *result = PARSE_SUCCESS; | 337 | 36 | return static_cast<T>(negative ? -val : val); | 338 | 36 | } |
_ZN5doris12StringParser22string_to_int_internalIoEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 4 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 4 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 4 | UnsignedT val = 0; | 287 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 4 | bool negative = false; | 289 | 4 | int i = 0; | 290 | 4 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 4 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 0 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 0 | return static_cast<T>(negative ? -val : val); | 308 | 0 | } | 309 | | | 310 | 4 | const T max_div_10 = max_val / 10; | 311 | 4 | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 4 | int first = i; | 314 | 84 | for (; i < len; ++i) { | 315 | 80 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 80 | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 80 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 80 | val = val * 10 + digit; | 323 | 80 | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 80 | } | 336 | 4 | *result = PARSE_SUCCESS; | 337 | 4 | return static_cast<T>(negative ? -val : val); | 338 | 4 | } |
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 4 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 4 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 4 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 4 | UnsignedT val = 0; | 287 | 4 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 4 | bool negative = false; | 289 | 4 | int i = 0; | 290 | 4 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 4 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 4 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 4 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 4 | return static_cast<T>(negative ? -val : val); | 308 | 4 | } | 309 | | | 310 | 0 | const T max_div_10 = max_val / 10; | 311 | 0 | const T max_mod_10 = max_val % 10; | 312 | |
| 313 | 0 | int first = i; | 314 | 0 | for (; i < len; ++i) { | 315 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 0 | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 0 | val = val * 10 + digit; | 323 | 0 | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 0 | } | 336 | 0 | *result = PARSE_SUCCESS; | 337 | 0 | return static_cast<T>(negative ? -val : val); | 338 | 0 | } |
|
339 | | |
340 | | template <typename T> |
341 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
342 | 2.67k | ParseResult* result) { |
343 | 2.67k | if (UNLIKELY(len <= 0)) { |
344 | 0 | *result = PARSE_FAILURE; |
345 | 0 | return 0; |
346 | 0 | } |
347 | | |
348 | 2.67k | T val = 0; |
349 | 2.67k | T max_val = std::numeric_limits<T>::max(); |
350 | 2.67k | int i = 0; |
351 | | |
352 | 2.67k | typedef typename std::make_signed<T>::type signedT; |
353 | | // This is the fast path where the string cannot overflow. |
354 | 2.67k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
355 | 895 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
356 | 895 | return val; |
357 | 895 | } |
358 | | |
359 | 1.78k | const T max_div_10 = max_val / 10; |
360 | 1.78k | const T max_mod_10 = max_val % 10; |
361 | | |
362 | 1.78k | int first = i; |
363 | 6.54k | for (; i < len; ++i) { |
364 | 6.49k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
365 | 4.99k | T digit = s[i] - '0'; |
366 | | // This is a tricky check to see if adding this digit will cause an overflow. |
367 | 4.99k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
368 | 224 | *result = PARSE_OVERFLOW; |
369 | 224 | return max_val; |
370 | 224 | } |
371 | 4.76k | val = val * 10 + digit; |
372 | 4.76k | } else { |
373 | 1.50k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
374 | | // Reject the string because either the first char was not a digit, |
375 | | // or the remaining chars are not all whitespace |
376 | 1.13k | *result = PARSE_FAILURE; |
377 | 1.13k | return 0; |
378 | 1.13k | } |
379 | | // Returning here is slightly faster than breaking the loop. |
380 | 378 | *result = PARSE_SUCCESS; |
381 | 378 | return val; |
382 | 1.50k | } |
383 | 6.49k | } |
384 | 49 | *result = PARSE_SUCCESS; |
385 | 49 | return val; |
386 | 1.78k | } _ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 681 | ParseResult* result) { | 343 | 681 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 681 | T val = 0; | 349 | 681 | T max_val = std::numeric_limits<T>::max(); | 350 | 681 | int i = 0; | 351 | | | 352 | 681 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 681 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 456 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 456 | return val; | 357 | 456 | } | 358 | | | 359 | 225 | const T max_div_10 = max_val / 10; | 360 | 225 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 225 | int first = i; | 363 | 2.26k | for (; i < len; ++i) { | 364 | 2.26k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 2.10k | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 2.10k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 2.04k | val = val * 10 + digit; | 372 | 2.04k | } else { | 373 | 162 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 120 | *result = PARSE_FAILURE; | 377 | 120 | return 0; | 378 | 120 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 42 | *result = PARSE_SUCCESS; | 381 | 42 | return val; | 382 | 162 | } | 383 | 2.26k | } | 384 | 7 | *result = PARSE_SUCCESS; | 385 | 7 | return val; | 386 | 225 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 16 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 16 | return val; | 357 | 16 | } | 358 | | | 359 | 649 | const T max_div_10 = max_val / 10; | 360 | 649 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 649 | int first = i; | 363 | 1.20k | for (; i < len; ++i) { | 364 | 1.18k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 609 | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 609 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 553 | val = val * 10 + digit; | 372 | 572 | } else { | 373 | 572 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 446 | *result = PARSE_FAILURE; | 377 | 446 | return 0; | 378 | 446 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 126 | *result = PARSE_SUCCESS; | 381 | 126 | return val; | 382 | 572 | } | 383 | 1.18k | } | 384 | 21 | *result = PARSE_SUCCESS; | 385 | 21 | return val; | 386 | 649 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 31 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 31 | return val; | 357 | 31 | } | 358 | | | 359 | 634 | const T max_div_10 = max_val / 10; | 360 | 634 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 634 | int first = i; | 363 | 1.47k | for (; i < len; ++i) { | 364 | 1.46k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 896 | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 896 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 840 | val = val * 10 + digit; | 372 | 840 | } else { | 373 | 564 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 438 | *result = PARSE_FAILURE; | 377 | 438 | return 0; | 378 | 438 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 126 | *result = PARSE_SUCCESS; | 381 | 126 | return val; | 382 | 564 | } | 383 | 1.46k | } | 384 | 14 | *result = PARSE_SUCCESS; | 385 | 14 | return val; | 386 | 634 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 392 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 392 | return val; | 357 | 392 | } | 358 | | | 359 | 273 | const T max_div_10 = max_val / 10; | 360 | 273 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 273 | int first = i; | 363 | 1.60k | for (; i < len; ++i) { | 364 | 1.59k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 1.38k | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 1.38k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 1.33k | val = val * 10 + digit; | 372 | 1.33k | } else { | 373 | 210 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 126 | *result = PARSE_FAILURE; | 377 | 126 | return 0; | 378 | 126 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 84 | *result = PARSE_SUCCESS; | 381 | 84 | return val; | 382 | 210 | } | 383 | 1.59k | } | 384 | 7 | *result = PARSE_SUCCESS; | 385 | 7 | return val; | 386 | 273 | } |
|
387 | | |
388 | | template <typename T> |
389 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
390 | 53.6k | ParseResult* result) { |
391 | 53.6k | typedef typename std::make_unsigned<T>::type UnsignedT; |
392 | 53.6k | UnsignedT val = 0; |
393 | 53.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
394 | 53.6k | bool negative = false; |
395 | 53.6k | if (UNLIKELY(len <= 0)) { |
396 | 0 | *result = PARSE_FAILURE; |
397 | 0 | return 0; |
398 | 0 | } |
399 | 53.6k | int i = 0; |
400 | 53.6k | switch (*s) { |
401 | 14.3k | case '-': |
402 | 14.3k | negative = true; |
403 | 14.3k | max_val = StringParser::numeric_limits<T>(false) + 1; |
404 | 14.3k | [[fallthrough]]; |
405 | 14.6k | case '+': |
406 | 14.6k | i = 1; |
407 | 53.6k | } |
408 | | |
409 | 53.6k | const T max_div_base = max_val / base; |
410 | 53.6k | const T max_mod_base = max_val % base; |
411 | | |
412 | 53.6k | int first = i; |
413 | 120k | for (; i < len; ++i) { |
414 | 118k | T digit; |
415 | 118k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
416 | 81.6k | digit = s[i] - '0'; |
417 | 81.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
418 | 639 | digit = (s[i] - 'a' + 10); |
419 | 36.4k | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
420 | 98 | digit = (s[i] - 'A' + 10); |
421 | 36.3k | } else { |
422 | 36.3k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
423 | | // Reject the string because either the first char was not an alpha/digit, |
424 | | // or the remaining chars are not all whitespace |
425 | 24.0k | *result = PARSE_FAILURE; |
426 | 24.0k | return 0; |
427 | 24.0k | } |
428 | | // skip trailing whitespace. |
429 | 12.2k | break; |
430 | 36.3k | } |
431 | | |
432 | | // Bail, if we encounter a digit that is not available in base. |
433 | 82.4k | if (digit >= base) { |
434 | 392 | break; |
435 | 392 | } |
436 | | |
437 | | // This is a tricky check to see if adding this digit will cause an overflow. |
438 | 82.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
439 | 14.8k | *result = PARSE_OVERFLOW; |
440 | 14.8k | return static_cast<T>(negative ? -max_val : max_val); |
441 | 14.8k | } |
442 | 67.2k | val = val * base + digit; |
443 | 67.2k | } |
444 | 14.7k | *result = PARSE_SUCCESS; |
445 | 14.7k | return static_cast<T>(negative ? -val : val); |
446 | 53.6k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 51.0k | ParseResult* result) { | 391 | 51.0k | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 51.0k | UnsignedT val = 0; | 393 | 51.0k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 51.0k | bool negative = false; | 395 | 51.0k | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 51.0k | int i = 0; | 400 | 51.0k | switch (*s) { | 401 | 13.7k | case '-': | 402 | 13.7k | negative = true; | 403 | 13.7k | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 13.7k | [[fallthrough]]; | 405 | 13.8k | case '+': | 406 | 13.8k | i = 1; | 407 | 51.0k | } | 408 | | | 409 | 51.0k | const T max_div_base = max_val / base; | 410 | 51.0k | const T max_mod_base = max_val % base; | 411 | | | 412 | 51.0k | int first = i; | 413 | 108k | for (; i < len; ++i) { | 414 | 107k | T digit; | 415 | 107k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 72.1k | digit = s[i] - '0'; | 417 | 72.1k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 539 | digit = (s[i] - 'a' + 10); | 419 | 34.3k | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 98 | digit = (s[i] - 'A' + 10); | 421 | 34.2k | } else { | 422 | 34.2k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 22.8k | *result = PARSE_FAILURE; | 426 | 22.8k | return 0; | 427 | 22.8k | } | 428 | | // skip trailing whitespace. | 429 | 11.3k | break; | 430 | 34.2k | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 72.7k | if (digit >= base) { | 434 | 392 | break; | 435 | 392 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 72.4k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 14.5k | *result = PARSE_OVERFLOW; | 440 | 14.5k | return static_cast<T>(negative ? -max_val : max_val); | 441 | 14.5k | } | 442 | 57.8k | val = val * base + digit; | 443 | 57.8k | } | 444 | 13.6k | *result = PARSE_SUCCESS; | 445 | 13.6k | return static_cast<T>(negative ? -val : val); | 446 | 51.0k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 924 | ParseResult* result) { | 391 | 924 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 924 | UnsignedT val = 0; | 393 | 924 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 924 | bool negative = false; | 395 | 924 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 924 | int i = 0; | 400 | 924 | switch (*s) { | 401 | 203 | case '-': | 402 | 203 | negative = true; | 403 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 203 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 924 | } | 408 | | | 409 | 924 | const T max_div_base = max_val / base; | 410 | 924 | const T max_mod_base = max_val % base; | 411 | | | 412 | 924 | int first = i; | 413 | 2.59k | for (; i < len; ++i) { | 414 | 2.54k | T digit; | 415 | 2.54k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 1.68k | digit = s[i] - '0'; | 417 | 1.68k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 98 | digit = (s[i] - 'a' + 10); | 419 | 756 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 756 | } else { | 422 | 756 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 420 | *result = PARSE_FAILURE; | 426 | 420 | return 0; | 427 | 420 | } | 428 | | // skip trailing whitespace. | 429 | 336 | break; | 430 | 756 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 1.78k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 1.78k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 1.67k | val = val * base + digit; | 443 | 1.67k | } | 444 | 392 | *result = PARSE_SUCCESS; | 445 | 392 | return static_cast<T>(negative ? -val : val); | 446 | 924 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 833 | ParseResult* result) { | 391 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 833 | UnsignedT val = 0; | 393 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 833 | bool negative = false; | 395 | 833 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 833 | int i = 0; | 400 | 833 | switch (*s) { | 401 | 154 | case '-': | 402 | 154 | negative = true; | 403 | 154 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 154 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 833 | } | 408 | | | 409 | 833 | const T max_div_base = max_val / base; | 410 | 833 | const T max_mod_base = max_val % base; | 411 | | | 412 | 833 | int first = i; | 413 | 3.55k | for (; i < len; ++i) { | 414 | 3.50k | T digit; | 415 | 3.50k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 2.83k | digit = s[i] - '0'; | 417 | 2.83k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 0 | digit = (s[i] - 'a' + 10); | 419 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 672 | } else { | 422 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 378 | *result = PARSE_FAILURE; | 426 | 378 | return 0; | 427 | 378 | } | 428 | | // skip trailing whitespace. | 429 | 294 | break; | 430 | 672 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 2.83k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 2.83k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 2.72k | val = val * base + digit; | 443 | 2.72k | } | 444 | 343 | *result = PARSE_SUCCESS; | 445 | 343 | return static_cast<T>(negative ? -val : val); | 446 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 833 | ParseResult* result) { | 391 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 833 | UnsignedT val = 0; | 393 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 833 | bool negative = false; | 395 | 833 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 833 | int i = 0; | 400 | 833 | switch (*s) { | 401 | 203 | case '-': | 402 | 203 | negative = true; | 403 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 203 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 833 | } | 408 | | | 409 | 833 | const T max_div_base = max_val / base; | 410 | 833 | const T max_mod_base = max_val % base; | 411 | | | 412 | 833 | int first = i; | 413 | 5.74k | for (; i < len; ++i) { | 414 | 5.69k | T digit; | 415 | 5.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 5.01k | digit = s[i] - '0'; | 417 | 5.01k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 0 | digit = (s[i] - 'a' + 10); | 419 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 672 | } else { | 422 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 378 | *result = PARSE_FAILURE; | 426 | 378 | return 0; | 427 | 378 | } | 428 | | // skip trailing whitespace. | 429 | 294 | break; | 430 | 672 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 5.01k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 5.01k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 4.90k | val = val * base + digit; | 443 | 4.90k | } | 444 | 343 | *result = PARSE_SUCCESS; | 445 | 343 | return static_cast<T>(negative ? -val : val); | 446 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 1 | ParseResult* result) { | 391 | 1 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 1 | UnsignedT val = 0; | 393 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 1 | bool negative = false; | 395 | 1 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 1 | int i = 0; | 400 | 1 | switch (*s) { | 401 | 0 | case '-': | 402 | 0 | negative = true; | 403 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 0 | [[fallthrough]]; | 405 | 0 | case '+': | 406 | 0 | i = 1; | 407 | 1 | } | 408 | | | 409 | 1 | const T max_div_base = max_val / base; | 410 | 1 | const T max_mod_base = max_val % base; | 411 | | | 412 | 1 | int first = i; | 413 | 3 | for (; i < len; ++i) { | 414 | 2 | T digit; | 415 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 0 | digit = s[i] - '0'; | 417 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 2 | digit = (s[i] - 'a' + 10); | 419 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 0 | } else { | 422 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 0 | *result = PARSE_FAILURE; | 426 | 0 | return 0; | 427 | 0 | } | 428 | | // skip trailing whitespace. | 429 | 0 | break; | 430 | 0 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 2 | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 0 | *result = PARSE_OVERFLOW; | 440 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 0 | } | 442 | 2 | val = val * base + digit; | 443 | 2 | } | 444 | 1 | *result = PARSE_SUCCESS; | 445 | 1 | return static_cast<T>(negative ? -val : val); | 446 | 1 | } |
|
447 | | |
448 | | template <typename T> |
449 | 328k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
450 | 328k | T val = 0; |
451 | 328k | if (UNLIKELY(len == 0)) { |
452 | 0 | *result = PARSE_SUCCESS; |
453 | 0 | return val; |
454 | 0 | } |
455 | | // Factor out the first char for error handling speeds up the loop. |
456 | 328k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
457 | 324k | val = s[0] - '0'; |
458 | 324k | } else { |
459 | 4.26k | *result = PARSE_FAILURE; |
460 | 4.26k | return 0; |
461 | 4.26k | } |
462 | 546k | for (int i = 1; i < len; ++i) { |
463 | 223k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
464 | 221k | T digit = s[i] - '0'; |
465 | 221k | val = val * 10 + digit; |
466 | 221k | } else { |
467 | 1.59k | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && |
468 | 1.59k | !is_float_suffix(s + i, len - i)))) { |
469 | 194 | *result = PARSE_FAILURE; |
470 | 194 | return 0; |
471 | 194 | } |
472 | 1.40k | *result = PARSE_SUCCESS; |
473 | 1.40k | return val; |
474 | 1.59k | } |
475 | 223k | } |
476 | 322k | *result = PARSE_SUCCESS; |
477 | 322k | return val; |
478 | 324k | } _ZN5doris12StringParser25string_to_int_no_overflowIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 144k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 144k | T val = 0; | 451 | 144k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 144k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 143k | val = s[0] - '0'; | 458 | 143k | } else { | 459 | 492 | *result = PARSE_FAILURE; | 460 | 492 | return 0; | 461 | 492 | } | 462 | 248k | for (int i = 1; i < len; ++i) { | 463 | 104k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 104k | T digit = s[i] - '0'; | 465 | 104k | val = val * 10 + digit; | 466 | 104k | } else { | 467 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 0 | !is_float_suffix(s + i, len - i)))) { | 469 | 0 | *result = PARSE_FAILURE; | 470 | 0 | return 0; | 471 | 0 | } | 472 | 0 | *result = PARSE_SUCCESS; | 473 | 0 | return val; | 474 | 0 | } | 475 | 104k | } | 476 | 143k | *result = PARSE_SUCCESS; | 477 | 143k | return val; | 478 | 143k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 34.2k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 34.2k | T val = 0; | 451 | 34.2k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 34.2k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 33.1k | val = s[0] - '0'; | 458 | 33.1k | } else { | 459 | 1.06k | *result = PARSE_FAILURE; | 460 | 1.06k | return 0; | 461 | 1.06k | } | 462 | 73.6k | for (int i = 1; i < len; ++i) { | 463 | 40.8k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 40.4k | T digit = s[i] - '0'; | 465 | 40.4k | val = val * 10 + digit; | 466 | 40.4k | } else { | 467 | 359 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 359 | !is_float_suffix(s + i, len - i)))) { | 469 | 64 | *result = PARSE_FAILURE; | 470 | 64 | return 0; | 471 | 64 | } | 472 | 295 | *result = PARSE_SUCCESS; | 473 | 295 | return val; | 474 | 359 | } | 475 | 40.8k | } | 476 | 32.7k | *result = PARSE_SUCCESS; | 477 | 32.7k | return val; | 478 | 33.1k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 53.4k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 53.4k | T val = 0; | 451 | 53.4k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 53.4k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 52.7k | val = s[0] - '0'; | 458 | 52.7k | } else { | 459 | 761 | *result = PARSE_FAILURE; | 460 | 761 | return 0; | 461 | 761 | } | 462 | 76.7k | for (int i = 1; i < len; ++i) { | 463 | 24.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 24.0k | T digit = s[i] - '0'; | 465 | 24.0k | val = val * 10 + digit; | 466 | 24.0k | } else { | 467 | 950 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 950 | !is_float_suffix(s + i, len - i)))) { | 469 | 52 | *result = PARSE_FAILURE; | 470 | 52 | return 0; | 471 | 52 | } | 472 | 898 | *result = PARSE_SUCCESS; | 473 | 898 | return val; | 474 | 950 | } | 475 | 24.9k | } | 476 | 51.7k | *result = PARSE_SUCCESS; | 477 | 51.7k | return val; | 478 | 52.7k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 53.7k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 53.7k | T val = 0; | 451 | 53.7k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 53.7k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 52.5k | val = s[0] - '0'; | 458 | 52.5k | } else { | 459 | 1.26k | *result = PARSE_FAILURE; | 460 | 1.26k | return 0; | 461 | 1.26k | } | 462 | 88.7k | for (int i = 1; i < len; ++i) { | 463 | 36.5k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 36.2k | T digit = s[i] - '0'; | 465 | 36.2k | val = val * 10 + digit; | 466 | 36.2k | } else { | 467 | 254 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 254 | !is_float_suffix(s + i, len - i)))) { | 469 | 44 | *result = PARSE_FAILURE; | 470 | 44 | return 0; | 471 | 44 | } | 472 | 210 | *result = PARSE_SUCCESS; | 473 | 210 | return val; | 474 | 254 | } | 475 | 36.5k | } | 476 | 52.2k | *result = PARSE_SUCCESS; | 477 | 52.2k | return val; | 478 | 52.5k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 43.0k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 43.0k | T val = 0; | 451 | 43.0k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 43.0k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 42.3k | val = s[0] - '0'; | 458 | 42.3k | } else { | 459 | 674 | *result = PARSE_FAILURE; | 460 | 674 | return 0; | 461 | 674 | } | 462 | 58.7k | for (int i = 1; i < len; ++i) { | 463 | 16.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 16.4k | T digit = s[i] - '0'; | 465 | 16.4k | val = val * 10 + digit; | 466 | 16.4k | } else { | 467 | 34 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 34 | !is_float_suffix(s + i, len - i)))) { | 469 | 34 | *result = PARSE_FAILURE; | 470 | 34 | return 0; | 471 | 34 | } | 472 | 0 | *result = PARSE_SUCCESS; | 473 | 0 | return val; | 474 | 34 | } | 475 | 16.4k | } | 476 | 42.3k | *result = PARSE_SUCCESS; | 477 | 42.3k | return val; | 478 | 42.3k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEEEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 4 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 4 | T val = 0; | 451 | 4 | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 4 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 4 | val = s[0] - '0'; | 458 | 4 | } else { | 459 | 0 | *result = PARSE_FAILURE; | 460 | 0 | return 0; | 461 | 0 | } | 462 | 4 | for (int i = 1; i < len; ++i) { | 463 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 0 | T digit = s[i] - '0'; | 465 | 0 | val = val * 10 + digit; | 466 | 0 | } else { | 467 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 0 | !is_float_suffix(s + i, len - i)))) { | 469 | 0 | *result = PARSE_FAILURE; | 470 | 0 | return 0; | 471 | 0 | } | 472 | 0 | *result = PARSE_SUCCESS; | 473 | 0 | return val; | 474 | 0 | } | 475 | 0 | } | 476 | 4 | *result = PARSE_SUCCESS; | 477 | 4 | return val; | 478 | 4 | } |
|
479 | | |
480 | | template <typename T> |
481 | 137k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
482 | 137k | int i = 0; |
483 | | // skip leading spaces |
484 | 180k | for (; i < len; ++i) { |
485 | 180k | if (!is_whitespace(s[i])) { |
486 | 137k | break; |
487 | 137k | } |
488 | 180k | } |
489 | | |
490 | | // skip back spaces |
491 | 137k | int j = len - 1; |
492 | 178k | for (; j >= i; j--) { |
493 | 178k | if (!is_whitespace(s[j])) { |
494 | 137k | break; |
495 | 137k | } |
496 | 178k | } |
497 | | |
498 | | // skip leading '+', from_chars can handle '-' |
499 | 137k | if (i < len && s[i] == '+') { |
500 | 5.29k | i++; |
501 | 5.29k | } |
502 | 137k | if (UNLIKELY(i > j)) { |
503 | 3 | *result = PARSE_FAILURE; |
504 | 3 | return 0; |
505 | 3 | } |
506 | | |
507 | | // Use double here to not lose precision while accumulating the result |
508 | 137k | double val = 0; |
509 | 137k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
510 | | |
511 | 137k | if (res.ec == std::errc() && res.ptr == s + j + 1) { |
512 | 132k | if (abs(val) == std::numeric_limits<T>::infinity()) { |
513 | 898 | auto contain_inf = false; |
514 | 1.29k | for (int k = i; k < j + 1; k++) { |
515 | 1.29k | if (s[k] == 'i' || s[k] == 'I') { |
516 | 894 | contain_inf = true; |
517 | 894 | break; |
518 | 894 | } |
519 | 1.29k | } |
520 | | |
521 | 898 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; |
522 | 132k | } else { |
523 | 132k | *result = PARSE_SUCCESS; |
524 | 132k | } |
525 | 132k | return val; |
526 | 132k | } else { |
527 | 4.74k | *result = PARSE_FAILURE; |
528 | 4.74k | } |
529 | 4.74k | return 0; |
530 | 137k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 481 | 73.8k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 482 | 73.8k | int i = 0; | 483 | | // skip leading spaces | 484 | 94.9k | for (; i < len; ++i) { | 485 | 94.9k | if (!is_whitespace(s[i])) { | 486 | 73.8k | break; | 487 | 73.8k | } | 488 | 94.9k | } | 489 | | | 490 | | // skip back spaces | 491 | 73.8k | int j = len - 1; | 492 | 94.3k | for (; j >= i; j--) { | 493 | 94.3k | if (!is_whitespace(s[j])) { | 494 | 73.8k | break; | 495 | 73.8k | } | 496 | 94.3k | } | 497 | | | 498 | | // skip leading '+', from_chars can handle '-' | 499 | 73.8k | if (i < len && s[i] == '+') { | 500 | 2.64k | i++; | 501 | 2.64k | } | 502 | 73.8k | if (UNLIKELY(i > j)) { | 503 | 3 | *result = PARSE_FAILURE; | 504 | 3 | return 0; | 505 | 3 | } | 506 | | | 507 | | // Use double here to not lose precision while accumulating the result | 508 | 73.8k | double val = 0; | 509 | 73.8k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 510 | | | 511 | 73.8k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 512 | 71.2k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 513 | 449 | auto contain_inf = false; | 514 | 656 | for (int k = i; k < j + 1; k++) { | 515 | 654 | if (s[k] == 'i' || s[k] == 'I') { | 516 | 447 | contain_inf = true; | 517 | 447 | break; | 518 | 447 | } | 519 | 654 | } | 520 | | | 521 | 449 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 522 | 70.7k | } else { | 523 | 70.7k | *result = PARSE_SUCCESS; | 524 | 70.7k | } | 525 | 71.2k | return val; | 526 | 71.2k | } else { | 527 | 2.57k | *result = PARSE_FAILURE; | 528 | 2.57k | } | 529 | 2.57k | return 0; | 530 | 73.8k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 481 | 63.8k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 482 | 63.8k | int i = 0; | 483 | | // skip leading spaces | 484 | 85.0k | for (; i < len; ++i) { | 485 | 85.0k | if (!is_whitespace(s[i])) { | 486 | 63.8k | break; | 487 | 63.8k | } | 488 | 85.0k | } | 489 | | | 490 | | // skip back spaces | 491 | 63.8k | int j = len - 1; | 492 | 84.4k | for (; j >= i; j--) { | 493 | 84.4k | if (!is_whitespace(s[j])) { | 494 | 63.8k | break; | 495 | 63.8k | } | 496 | 84.4k | } | 497 | | | 498 | | // skip leading '+', from_chars can handle '-' | 499 | 63.8k | if (i < len && s[i] == '+') { | 500 | 2.64k | i++; | 501 | 2.64k | } | 502 | 63.8k | if (UNLIKELY(i > j)) { | 503 | 0 | *result = PARSE_FAILURE; | 504 | 0 | return 0; | 505 | 0 | } | 506 | | | 507 | | // Use double here to not lose precision while accumulating the result | 508 | 63.8k | double val = 0; | 509 | 63.8k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 510 | | | 511 | 63.8k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 512 | 61.7k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 513 | 449 | auto contain_inf = false; | 514 | 638 | for (int k = i; k < j + 1; k++) { | 515 | 636 | if (s[k] == 'i' || s[k] == 'I') { | 516 | 447 | contain_inf = true; | 517 | 447 | break; | 518 | 447 | } | 519 | 636 | } | 520 | | | 521 | 449 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 522 | 61.2k | } else { | 523 | 61.2k | *result = PARSE_SUCCESS; | 524 | 61.2k | } | 525 | 61.7k | return val; | 526 | 61.7k | } else { | 527 | 2.16k | *result = PARSE_FAILURE; | 528 | 2.16k | } | 529 | 2.16k | return 0; | 530 | 63.8k | } |
|
531 | | |
532 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
533 | 9.34k | ParseResult* result) { |
534 | 9.34k | *result = PARSE_SUCCESS; |
535 | | |
536 | 9.34k | if (len >= 4 && (s[0] == 't' || s[0] == 'T')) { |
537 | 3.76k | bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') && |
538 | 3.76k | (s[3] == 'e' || s[3] == 'E'); |
539 | 3.76k | if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) { |
540 | 3.65k | return true; |
541 | 3.65k | } |
542 | 5.57k | } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) { |
543 | 3.83k | bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') && |
544 | 3.83k | (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E'); |
545 | 3.83k | if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) { |
546 | 3.71k | return false; |
547 | 3.71k | } |
548 | 3.83k | } |
549 | | |
550 | 1.97k | *result = PARSE_FAILURE; |
551 | 1.97k | return false; |
552 | 9.34k | } |
553 | | |
554 | | template <PrimitiveType P, typename T, typename DecimalType> |
555 | | T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision, |
556 | 249k | int type_scale, ParseResult* result) { |
557 | 249k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
558 | 249k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
559 | 249k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
560 | 249k | "wide::Int256."); |
561 | | // Special cases: |
562 | | // 1) '' == Fail, an empty string fails to parse. |
563 | | // 2) ' # ' == #, leading and trailing white space is ignored. |
564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). |
565 | | // 4) '#.' == '#', a trailing dot is ignored. |
566 | | |
567 | | // Ignore leading and trailing spaces. |
568 | 251k | while (len > 0 && is_whitespace(*s)) { |
569 | 2.44k | ++s; |
570 | 2.44k | --len; |
571 | 2.44k | } |
572 | 249k | while (len > 0 && is_whitespace(s[len - 1])) { |
573 | 0 | --len; |
574 | 0 | } |
575 | | |
576 | 249k | bool is_negative = false; |
577 | 249k | if (len > 0) { |
578 | 249k | switch (*s) { |
579 | 37.5k | case '-': |
580 | 37.5k | is_negative = true; |
581 | 37.5k | [[fallthrough]]; |
582 | 37.5k | case '+': |
583 | 37.5k | ++s; |
584 | 37.5k | --len; |
585 | 249k | } |
586 | 249k | } |
587 | | |
588 | | // Ignore leading zeros. |
589 | 249k | bool found_value = false; |
590 | 396k | while (len > 0 && UNLIKELY(*s == '0')) { |
591 | 147k | found_value = true; |
592 | 147k | ++s; |
593 | 147k | --len; |
594 | 147k | } |
595 | | |
596 | | // Ignore leading zeros even after a dot. This allows for differentiating between |
597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would |
598 | | // overflow. |
599 | 249k | int scale = 0; |
600 | 249k | int found_dot = 0; |
601 | 249k | if (len > 0 && *s == '.') { |
602 | 29.6k | found_dot = 1; |
603 | 29.6k | ++s; |
604 | 29.6k | --len; |
605 | 125k | while (len > 0 && UNLIKELY(*s == '0')) { |
606 | 95.6k | found_value = true; |
607 | 95.6k | ++scale; |
608 | 95.6k | ++s; |
609 | 95.6k | --len; |
610 | 95.6k | } |
611 | 29.6k | } |
612 | | |
613 | 249k | int precision = 0; |
614 | 249k | int max_digit = type_precision - type_scale; |
615 | 249k | int cur_digit = 0; |
616 | 249k | bool found_exponent = false; |
617 | 249k | int8_t exponent = 0; |
618 | 249k | T value = 0; |
619 | 249k | bool has_round = false; |
620 | 4.38M | for (int i = 0; i < len; ++i) { |
621 | 4.21M | const char& c = s[i]; |
622 | 4.21M | if (LIKELY('0' <= c && c <= '9')) { |
623 | 3.94M | found_value = true; |
624 | | // Ignore digits once the type's precision limit is reached. This avoids |
625 | | // overflowing the underlying storage while handling a string like |
626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and |
627 | | // an exponent will be made later. |
628 | 3.94M | if (LIKELY(type_precision > precision) && !has_round) { |
629 | 3.94M | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... |
630 | 3.94M | ++precision; |
631 | 3.94M | scale += found_dot; |
632 | 3.94M | cur_digit = precision - scale; |
633 | 3.94M | } else if (!found_dot && max_digit < (precision - scale)) { |
634 | 438 | *result = StringParser::PARSE_OVERFLOW; |
635 | 438 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
636 | 438 | : vectorized::max_decimal_value<DecimalType>(type_precision); |
637 | 438 | return value; |
638 | 438 | } else if (found_dot && scale >= type_scale && !has_round) { |
639 | | // make rounding cases |
640 | 24 | if (c > '4') { |
641 | 8 | value += 1; |
642 | 8 | } |
643 | 24 | has_round = true; |
644 | 24 | continue; |
645 | 24 | } else if (!found_dot) { |
646 | 0 | ++cur_digit; |
647 | 0 | } |
648 | 3.94M | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. |
649 | 3.94M | } else if (c == '.' && LIKELY(!found_dot)) { |
650 | 189k | found_dot = 1; |
651 | 189k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { |
652 | 77.1k | found_exponent = true; |
653 | 77.1k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); |
654 | 77.1k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { |
655 | 10 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { |
656 | 0 | *result = StringParser::PARSE_UNDERFLOW; |
657 | 0 | } |
658 | 10 | return 0; |
659 | 10 | } |
660 | 77.1k | break; |
661 | 77.1k | } else { |
662 | 344 | if (value == 0) { |
663 | 271 | *result = StringParser::PARSE_FAILURE; |
664 | 271 | return 0; |
665 | 271 | } |
666 | | // here to handle |
667 | 73 | *result = StringParser::PARSE_SUCCESS; |
668 | 73 | if (type_scale >= scale) { |
669 | 71 | value *= get_scale_multiplier<T>(type_scale - scale); |
670 | | // here meet non-valid character, should return the value, keep going to meet |
671 | | // the E/e character because we make right user-given type_precision |
672 | | // not max number type_precision |
673 | 71 | if (!is_numeric_ascii(c)) { |
674 | 71 | if (cur_digit > type_precision) { |
675 | 0 | *result = StringParser::PARSE_OVERFLOW; |
676 | 0 | value = is_negative |
677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) |
678 | 0 | : vectorized::max_decimal_value<DecimalType>( |
679 | 0 | type_precision); |
680 | 0 | return value; |
681 | 0 | } |
682 | 71 | return is_negative ? T(-value) : T(value); |
683 | 71 | } |
684 | 71 | } |
685 | | |
686 | 2 | return is_negative ? T(-value) : T(value); |
687 | 73 | } |
688 | 4.21M | } |
689 | | |
690 | | // Find the number of truncated digits before adjusting the precision for an exponent. |
691 | 248k | if (exponent > scale) { |
692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the |
693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. |
694 | 69.1k | precision += exponent - scale; |
695 | | |
696 | 69.1k | value *= get_scale_multiplier<T>(exponent - scale); |
697 | 69.1k | scale = 0; |
698 | 179k | } else { |
699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, |
700 | | // the precision must also be set to 4 but that will be done below for the |
701 | | // non-exponent case anyways. |
702 | 179k | scale -= exponent; |
703 | 179k | } |
704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros |
705 | | // were ignored during previous parsing. |
706 | 248k | if (scale > precision) { |
707 | 14.2k | precision = scale; |
708 | 14.2k | } |
709 | | |
710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower |
711 | | // than just letting the function run out. |
712 | 13.5k | *result = StringParser::PARSE_SUCCESS; |
713 | 234k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { |
714 | 11.4k | *result = StringParser::PARSE_OVERFLOW; |
715 | 11.4k | if constexpr (TYPE_DECIMALV2 != P) { |
716 | | // decimalv3 overflow will return max min value for type precision |
717 | 11.4k | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
718 | 11.4k | : vectorized::max_decimal_value<DecimalType>(type_precision); |
719 | 11.4k | return value; |
720 | 11.4k | } |
721 | 236k | } else if (UNLIKELY(scale > type_scale)) { |
722 | 4.15k | *result = StringParser::PARSE_UNDERFLOW; |
723 | 4.15k | int shift = scale - type_scale; |
724 | 4.15k | T divisor = get_scale_multiplier<T>(shift); |
725 | 4.15k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { |
726 | 0 | value = 0; |
727 | 4.15k | } else { |
728 | 4.15k | T remainder = value % divisor; |
729 | 4.15k | value /= divisor; |
730 | 4.15k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { |
731 | 100 | value += 1; |
732 | 100 | } |
733 | 4.15k | } |
734 | 4.15k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. |
735 | 232k | } else if (UNLIKELY(!found_value && !found_dot)) { |
736 | 1 | *result = StringParser::PARSE_FAILURE; |
737 | 1 | } |
738 | | |
739 | 237k | if (type_scale > scale) { |
740 | 173k | value *= get_scale_multiplier<T>(type_scale - scale); |
741 | 173k | } |
742 | | |
743 | 237k | return is_negative ? T(-value) : T(value); |
744 | 234k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EiNS_10vectorized7DecimalIiEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 25.4k | int type_scale, ParseResult* result) { | 557 | 25.4k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 25.4k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 25.4k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 25.4k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 25.6k | while (len > 0 && is_whitespace(*s)) { | 569 | 220 | ++s; | 570 | 220 | --len; | 571 | 220 | } | 572 | 25.4k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 25.4k | bool is_negative = false; | 577 | 25.4k | if (len > 0) { | 578 | 25.4k | switch (*s) { | 579 | 10.5k | case '-': | 580 | 10.5k | is_negative = true; | 581 | 10.5k | [[fallthrough]]; | 582 | 10.5k | case '+': | 583 | 10.5k | ++s; | 584 | 10.5k | --len; | 585 | 25.4k | } | 586 | 25.4k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 25.4k | bool found_value = false; | 590 | 29.1k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 3.73k | found_value = true; | 592 | 3.73k | ++s; | 593 | 3.73k | --len; | 594 | 3.73k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 25.4k | int scale = 0; | 600 | 25.4k | int found_dot = 0; | 601 | 25.4k | if (len > 0 && *s == '.') { | 602 | 2.80k | found_dot = 1; | 603 | 2.80k | ++s; | 604 | 2.80k | --len; | 605 | 3.27k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 470 | found_value = true; | 607 | 470 | ++scale; | 608 | 470 | ++s; | 609 | 470 | --len; | 610 | 470 | } | 611 | 2.80k | } | 612 | | | 613 | 25.4k | int precision = 0; | 614 | 25.4k | int max_digit = type_precision - type_scale; | 615 | 25.4k | int cur_digit = 0; | 616 | 25.4k | bool found_exponent = false; | 617 | 25.4k | int8_t exponent = 0; | 618 | 25.4k | T value = 0; | 619 | 25.4k | bool has_round = false; | 620 | 76.5k | for (int i = 0; i < len; ++i) { | 621 | 51.6k | const char& c = s[i]; | 622 | 51.6k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 47.8k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 47.8k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 47.3k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 47.3k | ++precision; | 631 | 47.3k | scale += found_dot; | 632 | 47.3k | cur_digit = precision - scale; | 633 | 47.3k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 438 | *result = StringParser::PARSE_OVERFLOW; | 635 | 438 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 438 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 438 | return value; | 638 | 438 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 4 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 4 | has_round = true; | 644 | 4 | continue; | 645 | 10 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 47.3k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 47.3k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 3.76k | found_dot = 1; | 651 | 3.76k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 0 | found_exponent = true; | 653 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 0 | break; | 661 | 92 | } else { | 662 | 92 | if (value == 0) { | 663 | 66 | *result = StringParser::PARSE_FAILURE; | 664 | 66 | return 0; | 665 | 66 | } | 666 | | // here to handle | 667 | 26 | *result = StringParser::PARSE_SUCCESS; | 668 | 26 | if (type_scale >= scale) { | 669 | 26 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 26 | if (!is_numeric_ascii(c)) { | 674 | 26 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 26 | return is_negative ? T(-value) : T(value); | 683 | 26 | } | 684 | 26 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 26 | } | 688 | 51.6k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 24.8k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 24.8k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 24.8k | scale -= exponent; | 703 | 24.8k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 24.8k | if (scale > precision) { | 707 | 354 | precision = scale; | 708 | 354 | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 24.8k | *result = StringParser::PARSE_SUCCESS; | 713 | 24.8k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 736 | *result = StringParser::PARSE_OVERFLOW; | 715 | 736 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 736 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 736 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 736 | return value; | 720 | 736 | } | 721 | 24.1k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 4 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 4 | int shift = scale - type_scale; | 724 | 4 | T divisor = get_scale_multiplier<T>(shift); | 725 | 4 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 4 | } else { | 728 | 4 | T remainder = value % divisor; | 729 | 4 | value /= divisor; | 730 | 4 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 4 | } | 734 | 4 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 24.1k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 24.1k | if (type_scale > scale) { | 740 | 21.6k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 21.6k | } | 742 | | | 743 | 24.1k | return is_negative ? T(-value) : T(value); | 744 | 24.8k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29ElNS_10vectorized7DecimalIlEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 60.8k | int type_scale, ParseResult* result) { | 557 | 60.8k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 60.8k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 60.8k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 60.8k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 61.4k | while (len > 0 && is_whitespace(*s)) { | 569 | 593 | ++s; | 570 | 593 | --len; | 571 | 593 | } | 572 | 60.8k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 60.8k | bool is_negative = false; | 577 | 60.8k | if (len > 0) { | 578 | 60.8k | switch (*s) { | 579 | 8.26k | case '-': | 580 | 8.26k | is_negative = true; | 581 | 8.26k | [[fallthrough]]; | 582 | 8.26k | case '+': | 583 | 8.26k | ++s; | 584 | 8.26k | --len; | 585 | 60.8k | } | 586 | 60.8k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 60.8k | bool found_value = false; | 590 | 90.4k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 29.5k | found_value = true; | 592 | 29.5k | ++s; | 593 | 29.5k | --len; | 594 | 29.5k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 60.8k | int scale = 0; | 600 | 60.8k | int found_dot = 0; | 601 | 60.8k | if (len > 0 && *s == '.') { | 602 | 10.6k | found_dot = 1; | 603 | 10.6k | ++s; | 604 | 10.6k | --len; | 605 | 23.4k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 12.7k | found_value = true; | 607 | 12.7k | ++scale; | 608 | 12.7k | ++s; | 609 | 12.7k | --len; | 610 | 12.7k | } | 611 | 10.6k | } | 612 | | | 613 | 60.8k | int precision = 0; | 614 | 60.8k | int max_digit = type_precision - type_scale; | 615 | 60.8k | int cur_digit = 0; | 616 | 60.8k | bool found_exponent = false; | 617 | 60.8k | int8_t exponent = 0; | 618 | 60.8k | T value = 0; | 619 | 60.8k | bool has_round = false; | 620 | 779k | for (int i = 0; i < len; ++i) { | 621 | 721k | const char& c = s[i]; | 622 | 721k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 673k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 673k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 673k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 673k | ++precision; | 631 | 673k | scale += found_dot; | 632 | 673k | cur_digit = precision - scale; | 633 | 673k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 10 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 4 | if (c > '4') { | 641 | 4 | value += 1; | 642 | 4 | } | 643 | 4 | has_round = true; | 644 | 4 | continue; | 645 | 6 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 673k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 673k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 45.4k | found_dot = 1; | 651 | 45.4k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 2.47k | found_exponent = true; | 653 | 2.47k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 2.47k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 2.47k | break; | 661 | 2.47k | } else { | 662 | 91 | if (value == 0) { | 663 | 68 | *result = StringParser::PARSE_FAILURE; | 664 | 68 | return 0; | 665 | 68 | } | 666 | | // here to handle | 667 | 23 | *result = StringParser::PARSE_SUCCESS; | 668 | 23 | if (type_scale >= scale) { | 669 | 22 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 22 | if (!is_numeric_ascii(c)) { | 674 | 22 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 22 | return is_negative ? T(-value) : T(value); | 683 | 22 | } | 684 | 22 | } | 685 | | | 686 | 1 | return is_negative ? T(-value) : T(value); | 687 | 23 | } | 688 | 721k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 60.7k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 1 | precision += exponent - scale; | 695 | | | 696 | 1 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 1 | scale = 0; | 698 | 60.7k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 60.7k | scale -= exponent; | 703 | 60.7k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 60.7k | if (scale > precision) { | 707 | 5.31k | precision = scale; | 708 | 5.31k | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 60.7k | *result = StringParser::PARSE_SUCCESS; | 713 | 60.7k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 10.7k | *result = StringParser::PARSE_OVERFLOW; | 715 | 10.7k | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 10.7k | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 10.7k | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 10.7k | return value; | 720 | 10.7k | } | 721 | 50.0k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 1.15k | *result = StringParser::PARSE_UNDERFLOW; | 723 | 1.15k | int shift = scale - type_scale; | 724 | 1.15k | T divisor = get_scale_multiplier<T>(shift); | 725 | 1.15k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 1.15k | } else { | 728 | 1.15k | T remainder = value % divisor; | 729 | 1.15k | value /= divisor; | 730 | 1.15k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 1.15k | } | 734 | 1.15k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 48.9k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 1 | *result = StringParser::PARSE_FAILURE; | 737 | 1 | } | 738 | | | 739 | 50.0k | if (type_scale > scale) { | 740 | 25.5k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 25.5k | } | 742 | | | 743 | 50.0k | return is_negative ? T(-value) : T(value); | 744 | 60.7k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EnNS_10vectorized12Decimal128V3EEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 57.1k | int type_scale, ParseResult* result) { | 557 | 57.1k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 57.1k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 57.1k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 57.1k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 57.6k | while (len > 0 && is_whitespace(*s)) { | 569 | 503 | ++s; | 570 | 503 | --len; | 571 | 503 | } | 572 | 57.1k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 57.1k | bool is_negative = false; | 577 | 57.1k | if (len > 0) { | 578 | 57.1k | switch (*s) { | 579 | 8.26k | case '-': | 580 | 8.26k | is_negative = true; | 581 | 8.26k | [[fallthrough]]; | 582 | 8.26k | case '+': | 583 | 8.26k | ++s; | 584 | 8.26k | --len; | 585 | 57.1k | } | 586 | 57.1k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 57.1k | bool found_value = false; | 590 | 85.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 28.2k | found_value = true; | 592 | 28.2k | ++s; | 593 | 28.2k | --len; | 594 | 28.2k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 57.1k | int scale = 0; | 600 | 57.1k | int found_dot = 0; | 601 | 57.1k | if (len > 0 && *s == '.') { | 602 | 11.7k | found_dot = 1; | 603 | 11.7k | ++s; | 604 | 11.7k | --len; | 605 | 45.0k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 33.2k | found_value = true; | 607 | 33.2k | ++scale; | 608 | 33.2k | ++s; | 609 | 33.2k | --len; | 610 | 33.2k | } | 611 | 11.7k | } | 612 | | | 613 | 57.1k | int precision = 0; | 614 | 57.1k | int max_digit = type_precision - type_scale; | 615 | 57.1k | int cur_digit = 0; | 616 | 57.1k | bool found_exponent = false; | 617 | 57.1k | int8_t exponent = 0; | 618 | 57.1k | T value = 0; | 619 | 57.1k | bool has_round = false; | 620 | 1.04M | for (int i = 0; i < len; ++i) { | 621 | 995k | const char& c = s[i]; | 622 | 995k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 950k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 950k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 950k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 950k | ++precision; | 631 | 950k | scale += found_dot; | 632 | 950k | cur_digit = precision - scale; | 633 | 950k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 16 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 8 | if (c > '4') { | 641 | 4 | value += 1; | 642 | 4 | } | 643 | 8 | has_round = true; | 644 | 8 | continue; | 645 | 8 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 950k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 950k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 41.4k | found_dot = 1; | 651 | 41.4k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 3.30k | found_exponent = true; | 653 | 3.30k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 3.30k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 3.30k | break; | 661 | 3.30k | } else { | 662 | 65 | if (value == 0) { | 663 | 53 | *result = StringParser::PARSE_FAILURE; | 664 | 53 | return 0; | 665 | 53 | } | 666 | | // here to handle | 667 | 12 | *result = StringParser::PARSE_SUCCESS; | 668 | 12 | if (type_scale >= scale) { | 669 | 11 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 11 | if (!is_numeric_ascii(c)) { | 674 | 11 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 11 | return is_negative ? T(-value) : T(value); | 683 | 11 | } | 684 | 11 | } | 685 | | | 686 | 1 | return is_negative ? T(-value) : T(value); | 687 | 12 | } | 688 | 995k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 57.0k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 57.0k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 57.0k | scale -= exponent; | 703 | 57.0k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 57.0k | if (scale > precision) { | 707 | 6.66k | precision = scale; | 708 | 6.66k | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 57.0k | *result = StringParser::PARSE_SUCCESS; | 713 | 57.0k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 2 | *result = StringParser::PARSE_OVERFLOW; | 715 | 2 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 2 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 2 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 2 | return value; | 720 | 2 | } | 721 | 57.0k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 2.88k | *result = StringParser::PARSE_UNDERFLOW; | 723 | 2.88k | int shift = scale - type_scale; | 724 | 2.88k | T divisor = get_scale_multiplier<T>(shift); | 725 | 2.88k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 2.88k | } else { | 728 | 2.88k | T remainder = value % divisor; | 729 | 2.88k | value /= divisor; | 730 | 2.88k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 83 | value += 1; | 732 | 83 | } | 733 | 2.88k | } | 734 | 2.88k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 54.1k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 57.0k | if (type_scale > scale) { | 740 | 39.4k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 39.4k | } | 742 | | | 743 | 57.0k | return is_negative ? T(-value) : T(value); | 744 | 57.0k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EN4wide7integerILm256EiEENS_10vectorized7DecimalIS5_EEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 92.2k | int type_scale, ParseResult* result) { | 557 | 92.2k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 92.2k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 92.2k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 92.2k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 93.4k | while (len > 0 && is_whitespace(*s)) { | 569 | 1.12k | ++s; | 570 | 1.12k | --len; | 571 | 1.12k | } | 572 | 92.2k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 92.2k | bool is_negative = false; | 577 | 92.2k | if (len > 0) { | 578 | 92.2k | switch (*s) { | 579 | 3.87k | case '-': | 580 | 3.87k | is_negative = true; | 581 | 3.87k | [[fallthrough]]; | 582 | 3.87k | case '+': | 583 | 3.87k | ++s; | 584 | 3.87k | --len; | 585 | 92.2k | } | 586 | 92.2k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 92.2k | bool found_value = false; | 590 | 139k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 47.3k | found_value = true; | 592 | 47.3k | ++s; | 593 | 47.3k | --len; | 594 | 47.3k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 92.2k | int scale = 0; | 600 | 92.2k | int found_dot = 0; | 601 | 92.2k | if (len > 0 && *s == '.') { | 602 | 2.43k | found_dot = 1; | 603 | 2.43k | ++s; | 604 | 2.43k | --len; | 605 | 47.0k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 44.6k | found_value = true; | 607 | 44.6k | ++scale; | 608 | 44.6k | ++s; | 609 | 44.6k | --len; | 610 | 44.6k | } | 611 | 2.43k | } | 612 | | | 613 | 92.2k | int precision = 0; | 614 | 92.2k | int max_digit = type_precision - type_scale; | 615 | 92.2k | int cur_digit = 0; | 616 | 92.2k | bool found_exponent = false; | 617 | 92.2k | int8_t exponent = 0; | 618 | 92.2k | T value = 0; | 619 | 92.2k | bool has_round = false; | 620 | 2.20M | for (int i = 0; i < len; ++i) { | 621 | 2.18M | const char& c = s[i]; | 622 | 2.18M | if (LIKELY('0' <= c && c <= '9')) { | 623 | 2.02M | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 2.02M | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 2.02M | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 2.02M | ++precision; | 631 | 2.02M | scale += found_dot; | 632 | 2.02M | cur_digit = precision - scale; | 633 | 2.02M | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 0 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 0 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 0 | has_round = true; | 644 | 0 | continue; | 645 | 0 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 2.02M | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 2.02M | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 87.5k | found_dot = 1; | 651 | 87.5k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 71.4k | found_exponent = true; | 653 | 71.4k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 71.4k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 10 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 10 | return 0; | 659 | 10 | } | 660 | 71.3k | break; | 661 | 71.4k | } else { | 662 | 80 | if (value == 0) { | 663 | 74 | *result = StringParser::PARSE_FAILURE; | 664 | 74 | return 0; | 665 | 74 | } | 666 | | // here to handle | 667 | 6 | *result = StringParser::PARSE_SUCCESS; | 668 | 6 | if (type_scale >= scale) { | 669 | 6 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 6 | if (!is_numeric_ascii(c)) { | 674 | 6 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 6 | return is_negative ? T(-value) : T(value); | 683 | 6 | } | 684 | 6 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 6 | } | 688 | 2.18M | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 92.2k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 69.1k | precision += exponent - scale; | 695 | | | 696 | 69.1k | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 69.1k | scale = 0; | 698 | 69.1k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 23.0k | scale -= exponent; | 703 | 23.0k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 92.2k | if (scale > precision) { | 707 | 1.26k | precision = scale; | 708 | 1.26k | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 92.2k | *result = StringParser::PARSE_SUCCESS; | 713 | 92.2k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 0 | *result = StringParser::PARSE_OVERFLOW; | 715 | 0 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 0 | return value; | 720 | 0 | } | 721 | 92.2k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 96 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 96 | int shift = scale - type_scale; | 724 | 96 | T divisor = get_scale_multiplier<T>(shift); | 725 | 96 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 96 | } else { | 728 | 96 | T remainder = value % divisor; | 729 | 96 | value /= divisor; | 730 | 96 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 96 | } | 734 | 96 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 92.1k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 92.2k | if (type_scale > scale) { | 740 | 85.1k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 85.1k | } | 742 | | | 743 | 92.2k | return is_negative ? T(-value) : T(value); | 744 | 92.2k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EnNS_10vectorized7DecimalInEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 13.5k | int type_scale, ParseResult* result) { | 557 | 13.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 13.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 13.5k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 13.5k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 13.5k | while (len > 0 && is_whitespace(*s)) { | 569 | 0 | ++s; | 570 | 0 | --len; | 571 | 0 | } | 572 | 13.5k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 13.5k | bool is_negative = false; | 577 | 13.5k | if (len > 0) { | 578 | 13.5k | switch (*s) { | 579 | 6.68k | case '-': | 580 | 6.68k | is_negative = true; | 581 | 6.68k | [[fallthrough]]; | 582 | 6.68k | case '+': | 583 | 6.68k | ++s; | 584 | 6.68k | --len; | 585 | 13.5k | } | 586 | 13.5k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 13.5k | bool found_value = false; | 590 | 52.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 38.8k | found_value = true; | 592 | 38.8k | ++s; | 593 | 38.8k | --len; | 594 | 38.8k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 13.5k | int scale = 0; | 600 | 13.5k | int found_dot = 0; | 601 | 13.5k | if (len > 0 && *s == '.') { | 602 | 2.00k | found_dot = 1; | 603 | 2.00k | ++s; | 604 | 2.00k | --len; | 605 | 6.43k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 4.42k | found_value = true; | 607 | 4.42k | ++scale; | 608 | 4.42k | ++s; | 609 | 4.42k | --len; | 610 | 4.42k | } | 611 | 2.00k | } | 612 | | | 613 | 13.5k | int precision = 0; | 614 | 13.5k | int max_digit = type_precision - type_scale; | 615 | 13.5k | int cur_digit = 0; | 616 | 13.5k | bool found_exponent = false; | 617 | 13.5k | int8_t exponent = 0; | 618 | 13.5k | T value = 0; | 619 | 13.5k | bool has_round = false; | 620 | 275k | for (int i = 0; i < len; ++i) { | 621 | 261k | const char& c = s[i]; | 622 | 261k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 250k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 250k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 250k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 250k | ++precision; | 631 | 250k | scale += found_dot; | 632 | 250k | cur_digit = precision - scale; | 633 | 250k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 8 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 8 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 8 | has_round = true; | 644 | 8 | continue; | 645 | 8 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 250k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 250k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 11.4k | found_dot = 1; | 651 | 11.4k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 0 | found_exponent = true; | 653 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 0 | break; | 661 | 16 | } else { | 662 | 16 | if (value == 0) { | 663 | 10 | *result = StringParser::PARSE_FAILURE; | 664 | 10 | return 0; | 665 | 10 | } | 666 | | // here to handle | 667 | 6 | *result = StringParser::PARSE_SUCCESS; | 668 | 6 | if (type_scale >= scale) { | 669 | 6 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 6 | if (!is_numeric_ascii(c)) { | 674 | 6 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 6 | return is_negative ? T(-value) : T(value); | 683 | 6 | } | 684 | 6 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 6 | } | 688 | 261k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 13.5k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 13.5k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 13.5k | scale -= exponent; | 703 | 13.5k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 13.5k | if (scale > precision) { | 707 | 675 | precision = scale; | 708 | 675 | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 13.5k | *result = StringParser::PARSE_SUCCESS; | 713 | 13.5k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 9 | *result = StringParser::PARSE_OVERFLOW; | 715 | 9 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 9 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 9 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 9 | return value; | 720 | 9 | } | 721 | 13.5k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 17 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 17 | int shift = scale - type_scale; | 724 | 17 | T divisor = get_scale_multiplier<T>(shift); | 725 | 17 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 17 | } else { | 728 | 17 | T remainder = value % divisor; | 729 | 17 | value /= divisor; | 730 | 17 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 17 | value += 1; | 732 | 17 | } | 733 | 17 | } | 734 | 17 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 13.5k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 13.5k | if (type_scale > scale) { | 740 | 1.95k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 1.95k | } | 742 | | | 743 | 13.5k | return is_negative ? T(-value) : T(value); | 744 | 13.5k | } |
|
745 | | |
746 | | } // end namespace doris |