/root/doris/be/src/util/string_parser.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | |
27 | | #include <cstdlib> |
28 | | // IWYU pragma: no_include <bits/std_abs.h> |
29 | | #include <cmath> // IWYU pragma: keep |
30 | | #include <cstdint> |
31 | | #include <limits> |
32 | | #include <map> |
33 | | #include <string> |
34 | | #include <system_error> |
35 | | #include <type_traits> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/compiler_util.h" // IWYU pragma: keep |
39 | | #include "common/status.h" |
40 | | #include "runtime/large_int_value.h" |
41 | | #include "runtime/primitive_type.h" |
42 | | #include "vec/common/int_exp.h" |
43 | | #include "vec/common/string_utils/string_utils.h" |
44 | | #include "vec/core/extended_types.h" |
45 | | #include "vec/core/wide_integer.h" |
46 | | #include "vec/data_types/data_type_decimal.h" |
47 | | #include "vec/data_types/number_traits.h" |
48 | | |
49 | | namespace doris { |
50 | | namespace vectorized { |
51 | | template <DecimalNativeTypeConcept T> |
52 | | struct Decimal; |
53 | | } // namespace vectorized |
54 | | |
55 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
56 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
57 | | // |
58 | | // Strings with leading and trailing whitespaces are accepted. |
59 | | // Branching is heavily optimized for the non-whitespace successful case. |
60 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
61 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
62 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
63 | | // |
64 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
65 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
66 | | // and inf/-inf for float types. |
67 | | // |
68 | | // Things we tried that did not work: |
69 | | // - lookup table for converting character to digit |
70 | | // Improvements (TODO): |
71 | | // - Validate input using _sidd_compare_ranges |
72 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
73 | | class StringParser { |
74 | | public: |
75 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
76 | | |
77 | | template <typename T> |
78 | 551k | static T numeric_limits(bool negative) { |
79 | 551k | if constexpr (std::is_same_v<T, __int128>) { |
80 | 505k | return negative ? MIN_INT128 : MAX_INT128; |
81 | 505k | } else { |
82 | 505k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
83 | 505k | } |
84 | 551k | } _ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 78 | 263k | static T numeric_limits(bool negative) { | 79 | 263k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 263k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 263k | } else { | 82 | 263k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 263k | } | 84 | 263k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 78 | 74.5k | static T numeric_limits(bool negative) { | 79 | 74.5k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 74.5k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 74.5k | } else { | 82 | 74.5k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 74.5k | } | 84 | 74.5k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 78 | 63.4k | static T numeric_limits(bool negative) { | 79 | 63.4k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 63.4k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 63.4k | } else { | 82 | 63.4k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 63.4k | } | 84 | 63.4k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 78 | 85.5k | static T numeric_limits(bool negative) { | 79 | 85.5k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 85.5k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 85.5k | } else { | 82 | 85.5k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 85.5k | } | 84 | 85.5k | } |
_ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 78 | 45.9k | static T numeric_limits(bool negative) { | 79 | 45.9k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 45.9k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 45.9k | } else { | 82 | 45.9k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 45.9k | } | 84 | 45.9k | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIoEET_b _ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 78 | 21 | static T numeric_limits(bool negative) { | 79 | 21 | if constexpr (std::is_same_v<T, __int128>) { | 80 | 21 | return negative ? MIN_INT128 : MAX_INT128; | 81 | 21 | } else { | 82 | 21 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 21 | } | 84 | 21 | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIjEET_b _ZN5doris12StringParser14numeric_limitsIhEET_b Line | Count | Source | 78 | 19.0k | static T numeric_limits(bool negative) { | 79 | 19.0k | if constexpr (std::is_same_v<T, __int128>) { | 80 | 19.0k | return negative ? MIN_INT128 : MAX_INT128; | 81 | 19.0k | } else { | 82 | 19.0k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 83 | 19.0k | } | 84 | 19.0k | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsItEET_b |
85 | | |
86 | | template <typename T> |
87 | 232k | static T get_scale_multiplier(int scale) { |
88 | 232k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
89 | 232k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
90 | 232k | "You can only instantiate as int32_t, int64_t, __int128."); |
91 | 232k | if constexpr (std::is_same_v<T, int32_t>) { |
92 | 211k | return common::exp10_i32(scale); |
93 | 211k | } else if constexpr (std::is_same_v<T, int64_t>) { |
94 | 188k | return common::exp10_i64(scale); |
95 | 188k | } else if constexpr (std::is_same_v<T, __int128>) { |
96 | 149k | return common::exp10_i128(scale); |
97 | 149k | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
98 | 149k | return common::exp10_i256(scale); |
99 | 149k | } |
100 | 232k | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 87 | 21.0k | static T get_scale_multiplier(int scale) { | 88 | 21.0k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 21.0k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 21.0k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 21.0k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 21.0k | return common::exp10_i32(scale); | 93 | 21.0k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 21.0k | return common::exp10_i64(scale); | 95 | 21.0k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 21.0k | return common::exp10_i128(scale); | 97 | 21.0k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 21.0k | return common::exp10_i256(scale); | 99 | 21.0k | } | 100 | 21.0k | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 87 | 23.5k | static T get_scale_multiplier(int scale) { | 88 | 23.5k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 23.5k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 23.5k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 23.5k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 23.5k | return common::exp10_i32(scale); | 93 | 23.5k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 23.5k | return common::exp10_i64(scale); | 95 | 23.5k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 23.5k | return common::exp10_i128(scale); | 97 | 23.5k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 23.5k | return common::exp10_i256(scale); | 99 | 23.5k | } | 100 | 23.5k | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 87 | 38.7k | static T get_scale_multiplier(int scale) { | 88 | 38.7k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 38.7k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 38.7k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 38.7k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 38.7k | return common::exp10_i32(scale); | 93 | 38.7k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 38.7k | return common::exp10_i64(scale); | 95 | 38.7k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 38.7k | return common::exp10_i128(scale); | 97 | 38.7k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 38.7k | return common::exp10_i256(scale); | 99 | 38.7k | } | 100 | 38.7k | } |
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i Line | Count | Source | 87 | 149k | static T get_scale_multiplier(int scale) { | 88 | 149k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 89 | 149k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 90 | 149k | "You can only instantiate as int32_t, int64_t, __int128."); | 91 | 149k | if constexpr (std::is_same_v<T, int32_t>) { | 92 | 149k | return common::exp10_i32(scale); | 93 | 149k | } else if constexpr (std::is_same_v<T, int64_t>) { | 94 | 149k | return common::exp10_i64(scale); | 95 | 149k | } else if constexpr (std::is_same_v<T, __int128>) { | 96 | 149k | return common::exp10_i128(scale); | 97 | 149k | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 98 | 149k | return common::exp10_i256(scale); | 99 | 149k | } | 100 | 149k | } |
|
101 | | |
102 | | // This is considerably faster than glibc's implementation (25x). |
103 | | // In the case of overflow, the max/min value for the data type will be returned. |
104 | | // Assumes s represents a decimal number. |
105 | | template <typename T> |
106 | 356k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
107 | 356k | T ans = string_to_int_internal<T>(s, len, result); |
108 | 356k | if (LIKELY(*result == PARSE_SUCCESS)) { |
109 | 303k | return ans; |
110 | 303k | } |
111 | | |
112 | 52.7k | int i = skip_leading_whitespace(s, len); |
113 | 52.7k | return string_to_int_internal<T>(s + i, len - i, result); |
114 | 356k | } _ZN5doris12StringParser13string_to_intIaEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 94.8k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 94.8k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 94.8k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 66.2k | return ans; | 110 | 66.2k | } | 111 | | | 112 | 28.6k | int i = skip_leading_whitespace(s, len); | 113 | 28.6k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 94.8k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 65.7k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 65.7k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 65.7k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 58.1k | return ans; | 110 | 58.1k | } | 111 | | | 112 | 7.66k | int i = skip_leading_whitespace(s, len); | 113 | 7.66k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 65.7k | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 58.5k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 58.5k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 58.5k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 54.7k | return ans; | 110 | 54.7k | } | 111 | | | 112 | 3.83k | int i = skip_leading_whitespace(s, len); | 113 | 3.83k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 58.5k | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 80.6k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 80.6k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 80.6k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 76.7k | return ans; | 110 | 76.7k | } | 111 | | | 112 | 3.90k | int i = skip_leading_whitespace(s, len); | 113 | 3.90k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 80.6k | } |
_ZN5doris12StringParser13string_to_intInEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 45.0k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 45.0k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 45.0k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 44.1k | return ans; | 110 | 44.1k | } | 111 | | | 112 | 941 | int i = skip_leading_whitespace(s, len); | 113 | 941 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 45.0k | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEEEET_PKcmPNS0_11ParseResultE Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIoEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intImEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 20 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 20 | T ans = string_to_int_internal<T>(s, len, result); | 108 | 20 | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 20 | return ans; | 110 | 20 | } | 111 | | | 112 | 0 | int i = skip_leading_whitespace(s, len); | 113 | 0 | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 20 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intIhEET_PKcmPNS0_11ParseResultE Line | Count | Source | 106 | 11.2k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 107 | 11.2k | T ans = string_to_int_internal<T>(s, len, result); | 108 | 11.2k | if (LIKELY(*result == PARSE_SUCCESS)) { | 109 | 3.51k | return ans; | 110 | 3.51k | } | 111 | | | 112 | 7.75k | int i = skip_leading_whitespace(s, len); | 113 | 7.75k | return string_to_int_internal<T>(s + i, len - i, result); | 114 | 11.2k | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intItEET_PKcmPNS0_11ParseResultE |
115 | | |
116 | | // This is considerably faster than glibc's implementation. |
117 | | // In the case of overflow, the max/min value for the data type will be returned. |
118 | | // Assumes s represents a decimal number. |
119 | | template <typename T> |
120 | 1.37k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
121 | 1.37k | T ans = string_to_unsigned_int_internal<T>(s, len, result); |
122 | 1.37k | if (LIKELY(*result == PARSE_SUCCESS)) { |
123 | 84 | return ans; |
124 | 84 | } |
125 | | |
126 | 1.28k | int i = skip_leading_whitespace(s, len); |
127 | 1.28k | return string_to_unsigned_int_internal<T>(s + i, len - i, result); |
128 | 1.37k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 120 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 121 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 122 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 123 | 21 | return ans; | 124 | 21 | } | 125 | | | 126 | 322 | int i = skip_leading_whitespace(s, len); | 127 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 128 | 343 | } |
|
129 | | |
130 | | // Convert a string s representing a number in given base into a decimal number. |
131 | | template <typename T> |
132 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
133 | 27.8k | ParseResult* result) { |
134 | 27.8k | T ans = string_to_int_internal<T>(s, len, base, result); |
135 | 27.8k | if (LIKELY(*result == PARSE_SUCCESS)) { |
136 | 2.06k | return ans; |
137 | 2.06k | } |
138 | | |
139 | 25.7k | int i = skip_leading_whitespace(s, len); |
140 | 25.7k | return string_to_int_internal<T>(s + i, len - i, base, result); |
141 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 26.4k | ParseResult* result) { | 134 | 26.4k | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 26.4k | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 1.91k | return ans; | 137 | 1.91k | } | 138 | | | 139 | 24.5k | int i = skip_leading_whitespace(s, len); | 140 | 24.5k | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 490 | ParseResult* result) { | 134 | 490 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 490 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 56 | return ans; | 137 | 56 | } | 138 | | | 139 | 434 | int i = skip_leading_whitespace(s, len); | 140 | 434 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 441 | ParseResult* result) { | 134 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 49 | return ans; | 137 | 49 | } | 138 | | | 139 | 392 | int i = skip_leading_whitespace(s, len); | 140 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 441 | ParseResult* result) { | 134 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 49 | return ans; | 137 | 49 | } | 138 | | | 139 | 392 | int i = skip_leading_whitespace(s, len); | 140 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 133 | 1 | ParseResult* result) { | 134 | 1 | T ans = string_to_int_internal<T>(s, len, base, result); | 135 | 1 | if (LIKELY(*result == PARSE_SUCCESS)) { | 136 | 1 | return ans; | 137 | 1 | } | 138 | | | 139 | 0 | int i = skip_leading_whitespace(s, len); | 140 | 0 | return string_to_int_internal<T>(s + i, len - i, base, result); | 141 | 1 | } |
|
142 | | |
143 | | template <typename T> |
144 | 134k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
145 | 134k | return string_to_float_internal<T>(s, len, result); |
146 | 134k | } _ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 144 | 71.8k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 145 | 71.8k | return string_to_float_internal<T>(s, len, result); | 146 | 71.8k | } |
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 144 | 62.3k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 145 | 62.3k | return string_to_float_internal<T>(s, len, result); | 146 | 62.3k | } |
|
147 | | |
148 | | // Parses a string for 'true' or 'false', case insensitive. |
149 | 8.05k | static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) { |
150 | 8.05k | bool ans = string_to_bool_internal(s, len, result); |
151 | 8.05k | if (LIKELY(*result == PARSE_SUCCESS)) { |
152 | 7.04k | return ans; |
153 | 7.04k | } |
154 | | |
155 | 1.00k | int i = skip_leading_whitespace(s, len); |
156 | 1.00k | return string_to_bool_internal(s + i, len - i, result); |
157 | 8.05k | } |
158 | | |
159 | | template <PrimitiveType P, typename T = PrimitiveTypeTraits<P>::CppType::NativeType, |
160 | | typename DecimalType = PrimitiveTypeTraits<P>::ColumnType::value_type> |
161 | | static inline T string_to_decimal(const char* __restrict s, int len, int type_precision, |
162 | | int type_scale, ParseResult* result); |
163 | | |
164 | | template <typename T> |
165 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
166 | | const T key_value_separator, |
167 | | std::map<std::string, std::string>* result) { |
168 | | int key_pos = 0; |
169 | | int key_end; |
170 | | int val_pos; |
171 | | int val_end; |
172 | | |
173 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
174 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
175 | | std::string::npos) { |
176 | | break; |
177 | | } |
178 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
179 | | val_end = base.size(); |
180 | | } |
181 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
182 | | base.substr(val_pos, val_end - val_pos))); |
183 | | key_pos = val_end; |
184 | | if (key_pos != std::string::npos) { |
185 | | ++key_pos; |
186 | | } |
187 | | } |
188 | | |
189 | | return Status::OK(); |
190 | | } |
191 | | |
192 | | private: |
193 | | // This is considerably faster than glibc's implementation. |
194 | | // In the case of overflow, the max/min value for the data type will be returned. |
195 | | // Assumes s represents a decimal number. |
196 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
197 | | template <typename T> |
198 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
199 | | |
200 | | // This is considerably faster than glibc's implementation. |
201 | | // In the case of overflow, the max/min value for the data type will be returned. |
202 | | // Assumes s represents a decimal number. |
203 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
204 | | template <typename T> |
205 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
206 | | ParseResult* result); |
207 | | |
208 | | // Convert a string s representing a number in given base into a decimal number. |
209 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
210 | | template <typename T> |
211 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
212 | | ParseResult* result); |
213 | | |
214 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
215 | | // and the number is positive. |
216 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
217 | | template <typename T> |
218 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
219 | | ParseResult* result); |
220 | | |
221 | | // This is considerably faster than glibc's implementation (>100x why???) |
222 | | // No special case handling needs to be done for overflows, the floating point spec |
223 | | // already does it and will cap the values to -inf/inf |
224 | | // To avoid inaccurate conversions this function falls back to strtod for |
225 | | // scientific notation. |
226 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
227 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
228 | | template <typename T> |
229 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
230 | | ParseResult* result); |
231 | | |
232 | | // parses a string for 'true' or 'false', case insensitive |
233 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
234 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
235 | | ParseResult* result); |
236 | | |
237 | | // Returns true if s only contains whitespace. |
238 | 33.5k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
239 | 108k | for (int i = 0; i < len; ++i) { |
240 | 76.6k | if (!LIKELY(is_whitespace(s[i]))) { |
241 | 1.75k | return false; |
242 | 1.75k | } |
243 | 76.6k | } |
244 | 31.8k | return true; |
245 | 33.5k | } |
246 | | |
247 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
248 | 1.47k | static inline bool is_float_suffix(const char* __restrict s, int len) { |
249 | 1.47k | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
250 | 1.47k | } |
251 | | |
252 | 804 | static inline bool is_all_digit(const char* __restrict s, int len) { |
253 | 1.60k | for (int i = 0; i < len; ++i) { |
254 | 815 | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
255 | 11 | return false; |
256 | 11 | } |
257 | 815 | } |
258 | 793 | return true; |
259 | 804 | } |
260 | | |
261 | | // Returns the position of the first non-whitespace character in s. |
262 | 80.7k | static inline int skip_leading_whitespace(const char* __restrict s, int len) { |
263 | 80.7k | int i = 0; |
264 | 234k | while (i < len && is_whitespace(s[i])) { |
265 | 153k | ++i; |
266 | 153k | } |
267 | 80.7k | return i; |
268 | 80.7k | } |
269 | | |
270 | | // Our own definition of "isspace" that optimize on the ' ' branch. |
271 | 1.03M | static inline bool is_whitespace(const char& c) { |
272 | 1.03M | return LIKELY(c == ' ') || |
273 | 1.03M | UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); |
274 | 1.03M | } |
275 | | |
276 | | }; // end of class StringParser |
277 | | |
278 | | template <typename T> |
279 | 484k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
280 | 484k | if (UNLIKELY(len <= 0)) { |
281 | 242 | *result = PARSE_FAILURE; |
282 | 242 | return 0; |
283 | 242 | } |
284 | | |
285 | 483k | typedef typename std::make_unsigned<T>::type UnsignedT; |
286 | 483k | UnsignedT val = 0; |
287 | 483k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
288 | 483k | bool negative = false; |
289 | 483k | int i = 0; |
290 | 483k | switch (*s) { |
291 | 102k | case '-': |
292 | 102k | negative = true; |
293 | 102k | max_val += 1; |
294 | 102k | [[fallthrough]]; |
295 | 172k | case '+': |
296 | 172k | ++i; |
297 | | // only one '+'/'-' char, so could return failure directly |
298 | 172k | if (UNLIKELY(len == 1)) { |
299 | 0 | *result = PARSE_FAILURE; |
300 | 0 | return 0; |
301 | 0 | } |
302 | 483k | } |
303 | | |
304 | | // This is the fast path where the string cannot overflow. |
305 | 483k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
306 | 314k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); |
307 | 314k | return static_cast<T>(negative ? -val : val); |
308 | 314k | } |
309 | | |
310 | 169k | const T max_div_10 = max_val / 10; |
311 | 169k | const T max_mod_10 = max_val % 10; |
312 | | |
313 | 169k | int first = i; |
314 | 1.57M | for (; i < len; ++i) { |
315 | 1.50M | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
316 | 1.45M | T digit = s[i] - '0'; |
317 | | // This is a tricky check to see if adding this digit will cause an overflow. |
318 | 1.45M | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
319 | 45.5k | *result = PARSE_OVERFLOW; |
320 | 45.5k | return negative ? -max_val : max_val; |
321 | 45.5k | } |
322 | 1.40M | val = val * 10 + digit; |
323 | 1.40M | } else { |
324 | 51.9k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && |
325 | 51.9k | !is_float_suffix(s + i, len - i))))) { |
326 | | // Reject the string because either the first char was not a digit, |
327 | | // or the remaining chars are not all whitespace |
328 | 40.4k | *result = PARSE_FAILURE; |
329 | 40.4k | return 0; |
330 | 40.4k | } |
331 | | // Returning here is slightly faster than breaking the loop. |
332 | 11.4k | *result = PARSE_SUCCESS; |
333 | 11.4k | return static_cast<T>(negative ? -val : val); |
334 | 51.9k | } |
335 | 1.50M | } |
336 | 71.5k | *result = PARSE_SUCCESS; |
337 | 71.5k | return static_cast<T>(negative ? -val : val); |
338 | 169k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 198k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 198k | if (UNLIKELY(len <= 0)) { | 281 | 206 | *result = PARSE_FAILURE; | 282 | 206 | return 0; | 283 | 206 | } | 284 | | | 285 | 198k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 198k | UnsignedT val = 0; | 287 | 198k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 198k | bool negative = false; | 289 | 198k | int i = 0; | 290 | 198k | switch (*s) { | 291 | 27.3k | case '-': | 292 | 27.3k | negative = true; | 293 | 27.3k | max_val += 1; | 294 | 27.3k | [[fallthrough]]; | 295 | 96.9k | case '+': | 296 | 96.9k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 96.9k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 198k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 198k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 135k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 135k | return static_cast<T>(negative ? -val : val); | 308 | 135k | } | 309 | | | 310 | 62.6k | const T max_div_10 = max_val / 10; | 311 | 62.6k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 62.6k | int first = i; | 314 | 149k | for (; i < len; ++i) { | 315 | 142k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 107k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 107k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 20.8k | *result = PARSE_OVERFLOW; | 320 | 20.8k | return negative ? -max_val : max_val; | 321 | 20.8k | } | 322 | 86.9k | val = val * 10 + digit; | 323 | 86.9k | } else { | 324 | 34.9k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 34.9k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 23.9k | *result = PARSE_FAILURE; | 329 | 23.9k | return 0; | 330 | 23.9k | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 11.0k | *result = PARSE_SUCCESS; | 333 | 11.0k | return static_cast<T>(negative ? -val : val); | 334 | 34.9k | } | 335 | 142k | } | 336 | 6.81k | *result = PARSE_SUCCESS; | 337 | 6.81k | return static_cast<T>(negative ? -val : val); | 338 | 62.6k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 73.4k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 73.4k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 73.4k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 73.4k | UnsignedT val = 0; | 287 | 73.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 73.4k | bool negative = false; | 289 | 73.4k | int i = 0; | 290 | 73.4k | switch (*s) { | 291 | 12.1k | case '-': | 292 | 12.1k | negative = true; | 293 | 12.1k | max_val += 1; | 294 | 12.1k | [[fallthrough]]; | 295 | 12.2k | case '+': | 296 | 12.2k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 12.2k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 73.4k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 73.4k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 50.7k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 50.7k | return static_cast<T>(negative ? -val : val); | 308 | 50.7k | } | 309 | | | 310 | 22.6k | const T max_div_10 = max_val / 10; | 311 | 22.6k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 22.6k | int first = i; | 314 | 119k | for (; i < len; ++i) { | 315 | 110k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 109k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 109k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 12.8k | *result = PARSE_OVERFLOW; | 320 | 12.8k | return negative ? -max_val : max_val; | 321 | 12.8k | } | 322 | 96.5k | val = val * 10 + digit; | 323 | 96.5k | } else { | 324 | 971 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 971 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 761 | *result = PARSE_FAILURE; | 329 | 761 | return 0; | 330 | 761 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 210 | *result = PARSE_SUCCESS; | 333 | 210 | return static_cast<T>(negative ? -val : val); | 334 | 971 | } | 335 | 110k | } | 336 | 8.82k | *result = PARSE_SUCCESS; | 337 | 8.82k | return static_cast<T>(negative ? -val : val); | 338 | 22.6k | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 62.4k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 62.4k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 62.4k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 62.4k | UnsignedT val = 0; | 287 | 62.4k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 62.4k | bool negative = false; | 289 | 62.4k | int i = 0; | 290 | 62.4k | switch (*s) { | 291 | 9.58k | case '-': | 292 | 9.58k | negative = true; | 293 | 9.58k | max_val += 1; | 294 | 9.58k | [[fallthrough]]; | 295 | 9.67k | case '+': | 296 | 9.67k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 9.67k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 62.4k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 62.4k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 50.7k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 50.7k | return static_cast<T>(negative ? -val : val); | 308 | 50.7k | } | 309 | | | 310 | 11.7k | const T max_div_10 = max_val / 10; | 311 | 11.7k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 11.7k | int first = i; | 314 | 119k | for (; i < len; ++i) { | 315 | 114k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 113k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 113k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 5.46k | *result = PARSE_OVERFLOW; | 320 | 5.46k | return negative ? -max_val : max_val; | 321 | 5.46k | } | 322 | 108k | val = val * 10 + digit; | 323 | 108k | } else { | 324 | 501 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 501 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 375 | *result = PARSE_FAILURE; | 329 | 375 | return 0; | 330 | 375 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 126 | *result = PARSE_SUCCESS; | 333 | 126 | return static_cast<T>(negative ? -val : val); | 334 | 501 | } | 335 | 114k | } | 336 | 5.75k | *result = PARSE_SUCCESS; | 337 | 5.75k | return static_cast<T>(negative ? -val : val); | 338 | 11.7k | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 84.5k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 84.5k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 84.5k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 84.5k | UnsignedT val = 0; | 287 | 84.5k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 84.5k | bool negative = false; | 289 | 84.5k | int i = 0; | 290 | 84.5k | switch (*s) { | 291 | 50.1k | case '-': | 292 | 50.1k | negative = true; | 293 | 50.1k | max_val += 1; | 294 | 50.1k | [[fallthrough]]; | 295 | 50.1k | case '+': | 296 | 50.1k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 50.1k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 84.5k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 84.5k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 31.7k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 31.7k | return static_cast<T>(negative ? -val : val); | 308 | 31.7k | } | 309 | | | 310 | 52.7k | const T max_div_10 = max_val / 10; | 311 | 52.7k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 52.7k | int first = i; | 314 | 1.03M | for (; i < len; ++i) { | 315 | 985k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 984k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 984k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 4.89k | *result = PARSE_OVERFLOW; | 320 | 4.89k | return negative ? -max_val : max_val; | 321 | 4.89k | } | 322 | 979k | val = val * 10 + digit; | 323 | 979k | } else { | 324 | 1.02k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 1.02k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 945 | *result = PARSE_FAILURE; | 329 | 945 | return 0; | 330 | 945 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 84 | *result = PARSE_SUCCESS; | 333 | 84 | return static_cast<T>(negative ? -val : val); | 334 | 1.02k | } | 335 | 985k | } | 336 | 46.8k | *result = PARSE_SUCCESS; | 337 | 46.8k | return static_cast<T>(negative ? -val : val); | 338 | 52.7k | } |
_ZN5doris12StringParser22string_to_int_internalInEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 45.9k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 45.9k | if (UNLIKELY(len <= 0)) { | 281 | 36 | *result = PARSE_FAILURE; | 282 | 36 | return 0; | 283 | 36 | } | 284 | | | 285 | 45.9k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 45.9k | UnsignedT val = 0; | 287 | 45.9k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 45.9k | bool negative = false; | 289 | 45.9k | int i = 0; | 290 | 45.9k | switch (*s) { | 291 | 2.56k | case '-': | 292 | 2.56k | negative = true; | 293 | 2.56k | max_val += 1; | 294 | 2.56k | [[fallthrough]]; | 295 | 2.56k | case '+': | 296 | 2.56k | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 2.56k | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 45.9k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 45.9k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 42.1k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 42.1k | return static_cast<T>(negative ? -val : val); | 308 | 42.1k | } | 309 | | | 310 | 3.75k | const T max_div_10 = max_val / 10; | 311 | 3.75k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 3.75k | int first = i; | 314 | 136k | for (; i < len; ++i) { | 315 | 132k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 132k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 132k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 6 | *result = PARSE_OVERFLOW; | 320 | 6 | return negative ? -max_val : max_val; | 321 | 6 | } | 322 | 132k | val = val * 10 + digit; | 323 | 132k | } else { | 324 | 382 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 382 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 382 | *result = PARSE_FAILURE; | 329 | 382 | return 0; | 330 | 382 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 382 | } | 335 | 132k | } | 336 | 3.37k | *result = PARSE_SUCCESS; | 337 | 3.37k | return static_cast<T>(negative ? -val : val); | 338 | 3.75k | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIoEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 20 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 20 | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 20 | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 20 | UnsignedT val = 0; | 287 | 20 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 20 | bool negative = false; | 289 | 20 | int i = 0; | 290 | 20 | switch (*s) { | 291 | 0 | case '-': | 292 | 0 | negative = true; | 293 | 0 | max_val += 1; | 294 | 0 | [[fallthrough]]; | 295 | 0 | case '+': | 296 | 0 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 0 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 20 | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 20 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 20 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 20 | return static_cast<T>(negative ? -val : val); | 308 | 20 | } | 309 | | | 310 | 0 | const T max_div_10 = max_val / 10; | 311 | 0 | const T max_mod_10 = max_val % 10; | 312 | |
| 313 | 0 | int first = i; | 314 | 0 | for (; i < len; ++i) { | 315 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 0 | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 0 | *result = PARSE_OVERFLOW; | 320 | 0 | return negative ? -max_val : max_val; | 321 | 0 | } | 322 | 0 | val = val * 10 + digit; | 323 | 0 | } else { | 324 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 0 | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 0 | *result = PARSE_FAILURE; | 329 | 0 | return 0; | 330 | 0 | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 0 | } | 335 | 0 | } | 336 | 0 | *result = PARSE_SUCCESS; | 337 | 0 | return static_cast<T>(negative ? -val : val); | 338 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 279 | 19.0k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 280 | 19.0k | if (UNLIKELY(len <= 0)) { | 281 | 0 | *result = PARSE_FAILURE; | 282 | 0 | return 0; | 283 | 0 | } | 284 | | | 285 | 19.0k | typedef typename std::make_unsigned<T>::type UnsignedT; | 286 | 19.0k | UnsignedT val = 0; | 287 | 19.0k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 288 | 19.0k | bool negative = false; | 289 | 19.0k | int i = 0; | 290 | 19.0k | switch (*s) { | 291 | 834 | case '-': | 292 | 834 | negative = true; | 293 | 834 | max_val += 1; | 294 | 834 | [[fallthrough]]; | 295 | 834 | case '+': | 296 | 834 | ++i; | 297 | | // only one '+'/'-' char, so could return failure directly | 298 | 834 | if (UNLIKELY(len == 1)) { | 299 | 0 | *result = PARSE_FAILURE; | 300 | 0 | return 0; | 301 | 0 | } | 302 | 19.0k | } | 303 | | | 304 | | // This is the fast path where the string cannot overflow. | 305 | 19.0k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 306 | 3.53k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 307 | 3.53k | return static_cast<T>(negative ? -val : val); | 308 | 3.53k | } | 309 | | | 310 | 15.4k | const T max_div_10 = max_val / 10; | 311 | 15.4k | const T max_mod_10 = max_val % 10; | 312 | | | 313 | 15.4k | int first = i; | 314 | 17.6k | for (; i < len; ++i) { | 315 | 17.6k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 316 | 3.54k | T digit = s[i] - '0'; | 317 | | // This is a tricky check to see if adding this digit will cause an overflow. | 318 | 3.54k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 319 | 1.41k | *result = PARSE_OVERFLOW; | 320 | 1.41k | return negative ? -max_val : max_val; | 321 | 1.41k | } | 322 | 2.12k | val = val * 10 + digit; | 323 | 14.0k | } else { | 324 | 14.0k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 325 | 14.0k | !is_float_suffix(s + i, len - i))))) { | 326 | | // Reject the string because either the first char was not a digit, | 327 | | // or the remaining chars are not all whitespace | 328 | 14.0k | *result = PARSE_FAILURE; | 329 | 14.0k | return 0; | 330 | 14.0k | } | 331 | | // Returning here is slightly faster than breaking the loop. | 332 | 0 | *result = PARSE_SUCCESS; | 333 | 0 | return static_cast<T>(negative ? -val : val); | 334 | 14.0k | } | 335 | 17.6k | } | 336 | 0 | *result = PARSE_SUCCESS; | 337 | 0 | return static_cast<T>(negative ? -val : val); | 338 | 15.4k | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalItEET_PKciPNS0_11ParseResultE |
339 | | |
340 | | template <typename T> |
341 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
342 | 2.66k | ParseResult* result) { |
343 | 2.66k | if (UNLIKELY(len <= 0)) { |
344 | 0 | *result = PARSE_FAILURE; |
345 | 0 | return 0; |
346 | 0 | } |
347 | | |
348 | 2.66k | T val = 0; |
349 | 2.66k | T max_val = std::numeric_limits<T>::max(); |
350 | 2.66k | int i = 0; |
351 | | |
352 | 2.66k | typedef typename std::make_signed<T>::type signedT; |
353 | | // This is the fast path where the string cannot overflow. |
354 | 2.66k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
355 | 879 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
356 | 879 | return val; |
357 | 879 | } |
358 | | |
359 | 1.78k | const T max_div_10 = max_val / 10; |
360 | 1.78k | const T max_mod_10 = max_val % 10; |
361 | | |
362 | 1.78k | int first = i; |
363 | 6.54k | for (; i < len; ++i) { |
364 | 6.49k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
365 | 4.99k | T digit = s[i] - '0'; |
366 | | // This is a tricky check to see if adding this digit will cause an overflow. |
367 | 4.99k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
368 | 224 | *result = PARSE_OVERFLOW; |
369 | 224 | return max_val; |
370 | 224 | } |
371 | 4.76k | val = val * 10 + digit; |
372 | 4.76k | } else { |
373 | 1.50k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
374 | | // Reject the string because either the first char was not a digit, |
375 | | // or the remaining chars are not all whitespace |
376 | 1.13k | *result = PARSE_FAILURE; |
377 | 1.13k | return 0; |
378 | 1.13k | } |
379 | | // Returning here is slightly faster than breaking the loop. |
380 | 378 | *result = PARSE_SUCCESS; |
381 | 378 | return val; |
382 | 1.50k | } |
383 | 6.49k | } |
384 | 49 | *result = PARSE_SUCCESS; |
385 | 49 | return val; |
386 | 1.78k | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 16 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 16 | return val; | 357 | 16 | } | 358 | | | 359 | 649 | const T max_div_10 = max_val / 10; | 360 | 649 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 649 | int first = i; | 363 | 1.20k | for (; i < len; ++i) { | 364 | 1.18k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 609 | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 609 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 553 | val = val * 10 + digit; | 372 | 572 | } else { | 373 | 572 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 446 | *result = PARSE_FAILURE; | 377 | 446 | return 0; | 378 | 446 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 126 | *result = PARSE_SUCCESS; | 381 | 126 | return val; | 382 | 572 | } | 383 | 1.18k | } | 384 | 21 | *result = PARSE_SUCCESS; | 385 | 21 | return val; | 386 | 649 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 31 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 31 | return val; | 357 | 31 | } | 358 | | | 359 | 634 | const T max_div_10 = max_val / 10; | 360 | 634 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 634 | int first = i; | 363 | 1.47k | for (; i < len; ++i) { | 364 | 1.46k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 896 | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 896 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 840 | val = val * 10 + digit; | 372 | 840 | } else { | 373 | 564 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 438 | *result = PARSE_FAILURE; | 377 | 438 | return 0; | 378 | 438 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 126 | *result = PARSE_SUCCESS; | 381 | 126 | return val; | 382 | 564 | } | 383 | 1.46k | } | 384 | 14 | *result = PARSE_SUCCESS; | 385 | 14 | return val; | 386 | 634 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 392 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 392 | return val; | 357 | 392 | } | 358 | | | 359 | 273 | const T max_div_10 = max_val / 10; | 360 | 273 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 273 | int first = i; | 363 | 1.60k | for (; i < len; ++i) { | 364 | 1.59k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 1.38k | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 1.38k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 1.33k | val = val * 10 + digit; | 372 | 1.33k | } else { | 373 | 210 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 126 | *result = PARSE_FAILURE; | 377 | 126 | return 0; | 378 | 126 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 84 | *result = PARSE_SUCCESS; | 381 | 84 | return val; | 382 | 210 | } | 383 | 1.59k | } | 384 | 7 | *result = PARSE_SUCCESS; | 385 | 7 | return val; | 386 | 273 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 342 | 665 | ParseResult* result) { | 343 | 665 | if (UNLIKELY(len <= 0)) { | 344 | 0 | *result = PARSE_FAILURE; | 345 | 0 | return 0; | 346 | 0 | } | 347 | | | 348 | 665 | T val = 0; | 349 | 665 | T max_val = std::numeric_limits<T>::max(); | 350 | 665 | int i = 0; | 351 | | | 352 | 665 | typedef typename std::make_signed<T>::type signedT; | 353 | | // This is the fast path where the string cannot overflow. | 354 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 355 | 440 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 356 | 440 | return val; | 357 | 440 | } | 358 | | | 359 | 225 | const T max_div_10 = max_val / 10; | 360 | 225 | const T max_mod_10 = max_val % 10; | 361 | | | 362 | 225 | int first = i; | 363 | 2.26k | for (; i < len; ++i) { | 364 | 2.26k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 365 | 2.10k | T digit = s[i] - '0'; | 366 | | // This is a tricky check to see if adding this digit will cause an overflow. | 367 | 2.10k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 368 | 56 | *result = PARSE_OVERFLOW; | 369 | 56 | return max_val; | 370 | 56 | } | 371 | 2.04k | val = val * 10 + digit; | 372 | 2.04k | } else { | 373 | 162 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 374 | | // Reject the string because either the first char was not a digit, | 375 | | // or the remaining chars are not all whitespace | 376 | 120 | *result = PARSE_FAILURE; | 377 | 120 | return 0; | 378 | 120 | } | 379 | | // Returning here is slightly faster than breaking the loop. | 380 | 42 | *result = PARSE_SUCCESS; | 381 | 42 | return val; | 382 | 162 | } | 383 | 2.26k | } | 384 | 7 | *result = PARSE_SUCCESS; | 385 | 7 | return val; | 386 | 225 | } |
|
387 | | |
388 | | template <typename T> |
389 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
390 | 53.6k | ParseResult* result) { |
391 | 53.6k | typedef typename std::make_unsigned<T>::type UnsignedT; |
392 | 53.6k | UnsignedT val = 0; |
393 | 53.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
394 | 53.6k | bool negative = false; |
395 | 53.6k | if (UNLIKELY(len <= 0)) { |
396 | 0 | *result = PARSE_FAILURE; |
397 | 0 | return 0; |
398 | 0 | } |
399 | 53.6k | int i = 0; |
400 | 53.6k | switch (*s) { |
401 | 14.3k | case '-': |
402 | 14.3k | negative = true; |
403 | 14.3k | max_val = StringParser::numeric_limits<T>(false) + 1; |
404 | 14.3k | [[fallthrough]]; |
405 | 14.6k | case '+': |
406 | 14.6k | i = 1; |
407 | 53.6k | } |
408 | | |
409 | 53.6k | const T max_div_base = max_val / base; |
410 | 53.6k | const T max_mod_base = max_val % base; |
411 | | |
412 | 53.6k | int first = i; |
413 | 120k | for (; i < len; ++i) { |
414 | 118k | T digit; |
415 | 118k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
416 | 81.6k | digit = s[i] - '0'; |
417 | 81.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
418 | 639 | digit = (s[i] - 'a' + 10); |
419 | 36.4k | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
420 | 98 | digit = (s[i] - 'A' + 10); |
421 | 36.3k | } else { |
422 | 36.3k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
423 | | // Reject the string because either the first char was not an alpha/digit, |
424 | | // or the remaining chars are not all whitespace |
425 | 24.0k | *result = PARSE_FAILURE; |
426 | 24.0k | return 0; |
427 | 24.0k | } |
428 | | // skip trailing whitespace. |
429 | 12.2k | break; |
430 | 36.3k | } |
431 | | |
432 | | // Bail, if we encounter a digit that is not available in base. |
433 | 82.4k | if (digit >= base) { |
434 | 392 | break; |
435 | 392 | } |
436 | | |
437 | | // This is a tricky check to see if adding this digit will cause an overflow. |
438 | 82.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
439 | 14.8k | *result = PARSE_OVERFLOW; |
440 | 14.8k | return static_cast<T>(negative ? -max_val : max_val); |
441 | 14.8k | } |
442 | 67.2k | val = val * base + digit; |
443 | 67.2k | } |
444 | 14.7k | *result = PARSE_SUCCESS; |
445 | 14.7k | return static_cast<T>(negative ? -val : val); |
446 | 53.6k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 51.0k | ParseResult* result) { | 391 | 51.0k | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 51.0k | UnsignedT val = 0; | 393 | 51.0k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 51.0k | bool negative = false; | 395 | 51.0k | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 51.0k | int i = 0; | 400 | 51.0k | switch (*s) { | 401 | 13.7k | case '-': | 402 | 13.7k | negative = true; | 403 | 13.7k | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 13.7k | [[fallthrough]]; | 405 | 13.8k | case '+': | 406 | 13.8k | i = 1; | 407 | 51.0k | } | 408 | | | 409 | 51.0k | const T max_div_base = max_val / base; | 410 | 51.0k | const T max_mod_base = max_val % base; | 411 | | | 412 | 51.0k | int first = i; | 413 | 108k | for (; i < len; ++i) { | 414 | 107k | T digit; | 415 | 107k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 72.1k | digit = s[i] - '0'; | 417 | 72.1k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 539 | digit = (s[i] - 'a' + 10); | 419 | 34.3k | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 98 | digit = (s[i] - 'A' + 10); | 421 | 34.2k | } else { | 422 | 34.2k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 22.8k | *result = PARSE_FAILURE; | 426 | 22.8k | return 0; | 427 | 22.8k | } | 428 | | // skip trailing whitespace. | 429 | 11.3k | break; | 430 | 34.2k | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 72.7k | if (digit >= base) { | 434 | 392 | break; | 435 | 392 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 72.4k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 14.5k | *result = PARSE_OVERFLOW; | 440 | 14.5k | return static_cast<T>(negative ? -max_val : max_val); | 441 | 14.5k | } | 442 | 57.8k | val = val * base + digit; | 443 | 57.8k | } | 444 | 13.6k | *result = PARSE_SUCCESS; | 445 | 13.6k | return static_cast<T>(negative ? -val : val); | 446 | 51.0k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 924 | ParseResult* result) { | 391 | 924 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 924 | UnsignedT val = 0; | 393 | 924 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 924 | bool negative = false; | 395 | 924 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 924 | int i = 0; | 400 | 924 | switch (*s) { | 401 | 203 | case '-': | 402 | 203 | negative = true; | 403 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 203 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 924 | } | 408 | | | 409 | 924 | const T max_div_base = max_val / base; | 410 | 924 | const T max_mod_base = max_val % base; | 411 | | | 412 | 924 | int first = i; | 413 | 2.59k | for (; i < len; ++i) { | 414 | 2.54k | T digit; | 415 | 2.54k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 1.68k | digit = s[i] - '0'; | 417 | 1.68k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 98 | digit = (s[i] - 'a' + 10); | 419 | 756 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 756 | } else { | 422 | 756 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 420 | *result = PARSE_FAILURE; | 426 | 420 | return 0; | 427 | 420 | } | 428 | | // skip trailing whitespace. | 429 | 336 | break; | 430 | 756 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 1.78k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 1.78k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 1.67k | val = val * base + digit; | 443 | 1.67k | } | 444 | 392 | *result = PARSE_SUCCESS; | 445 | 392 | return static_cast<T>(negative ? -val : val); | 446 | 924 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 833 | ParseResult* result) { | 391 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 833 | UnsignedT val = 0; | 393 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 833 | bool negative = false; | 395 | 833 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 833 | int i = 0; | 400 | 833 | switch (*s) { | 401 | 154 | case '-': | 402 | 154 | negative = true; | 403 | 154 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 154 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 833 | } | 408 | | | 409 | 833 | const T max_div_base = max_val / base; | 410 | 833 | const T max_mod_base = max_val % base; | 411 | | | 412 | 833 | int first = i; | 413 | 3.55k | for (; i < len; ++i) { | 414 | 3.50k | T digit; | 415 | 3.50k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 2.83k | digit = s[i] - '0'; | 417 | 2.83k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 0 | digit = (s[i] - 'a' + 10); | 419 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 672 | } else { | 422 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 378 | *result = PARSE_FAILURE; | 426 | 378 | return 0; | 427 | 378 | } | 428 | | // skip trailing whitespace. | 429 | 294 | break; | 430 | 672 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 2.83k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 2.83k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 2.72k | val = val * base + digit; | 443 | 2.72k | } | 444 | 343 | *result = PARSE_SUCCESS; | 445 | 343 | return static_cast<T>(negative ? -val : val); | 446 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 833 | ParseResult* result) { | 391 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 833 | UnsignedT val = 0; | 393 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 833 | bool negative = false; | 395 | 833 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 833 | int i = 0; | 400 | 833 | switch (*s) { | 401 | 203 | case '-': | 402 | 203 | negative = true; | 403 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 203 | [[fallthrough]]; | 405 | 252 | case '+': | 406 | 252 | i = 1; | 407 | 833 | } | 408 | | | 409 | 833 | const T max_div_base = max_val / base; | 410 | 833 | const T max_mod_base = max_val % base; | 411 | | | 412 | 833 | int first = i; | 413 | 5.74k | for (; i < len; ++i) { | 414 | 5.69k | T digit; | 415 | 5.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 5.01k | digit = s[i] - '0'; | 417 | 5.01k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 0 | digit = (s[i] - 'a' + 10); | 419 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 672 | } else { | 422 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 378 | *result = PARSE_FAILURE; | 426 | 378 | return 0; | 427 | 378 | } | 428 | | // skip trailing whitespace. | 429 | 294 | break; | 430 | 672 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 5.01k | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 5.01k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 112 | *result = PARSE_OVERFLOW; | 440 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 112 | } | 442 | 4.90k | val = val * base + digit; | 443 | 4.90k | } | 444 | 343 | *result = PARSE_SUCCESS; | 445 | 343 | return static_cast<T>(negative ? -val : val); | 446 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 390 | 1 | ParseResult* result) { | 391 | 1 | typedef typename std::make_unsigned<T>::type UnsignedT; | 392 | 1 | UnsignedT val = 0; | 393 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 394 | 1 | bool negative = false; | 395 | 1 | if (UNLIKELY(len <= 0)) { | 396 | 0 | *result = PARSE_FAILURE; | 397 | 0 | return 0; | 398 | 0 | } | 399 | 1 | int i = 0; | 400 | 1 | switch (*s) { | 401 | 0 | case '-': | 402 | 0 | negative = true; | 403 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 404 | 0 | [[fallthrough]]; | 405 | 0 | case '+': | 406 | 0 | i = 1; | 407 | 1 | } | 408 | | | 409 | 1 | const T max_div_base = max_val / base; | 410 | 1 | const T max_mod_base = max_val % base; | 411 | | | 412 | 1 | int first = i; | 413 | 3 | for (; i < len; ++i) { | 414 | 2 | T digit; | 415 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 416 | 0 | digit = s[i] - '0'; | 417 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 418 | 2 | digit = (s[i] - 'a' + 10); | 419 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 420 | 0 | digit = (s[i] - 'A' + 10); | 421 | 0 | } else { | 422 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 423 | | // Reject the string because either the first char was not an alpha/digit, | 424 | | // or the remaining chars are not all whitespace | 425 | 0 | *result = PARSE_FAILURE; | 426 | 0 | return 0; | 427 | 0 | } | 428 | | // skip trailing whitespace. | 429 | 0 | break; | 430 | 0 | } | 431 | | | 432 | | // Bail, if we encounter a digit that is not available in base. | 433 | 2 | if (digit >= base) { | 434 | 0 | break; | 435 | 0 | } | 436 | | | 437 | | // This is a tricky check to see if adding this digit will cause an overflow. | 438 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 439 | 0 | *result = PARSE_OVERFLOW; | 440 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 441 | 0 | } | 442 | 2 | val = val * base + digit; | 443 | 2 | } | 444 | 1 | *result = PARSE_SUCCESS; | 445 | 1 | return static_cast<T>(negative ? -val : val); | 446 | 1 | } |
|
447 | | |
448 | | template <typename T> |
449 | 315k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
450 | 315k | T val = 0; |
451 | 315k | if (UNLIKELY(len == 0)) { |
452 | 0 | *result = PARSE_SUCCESS; |
453 | 0 | return val; |
454 | 0 | } |
455 | | // Factor out the first char for error handling speeds up the loop. |
456 | 315k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
457 | 311k | val = s[0] - '0'; |
458 | 311k | } else { |
459 | 4.49k | *result = PARSE_FAILURE; |
460 | 4.49k | return 0; |
461 | 4.49k | } |
462 | 520k | for (int i = 1; i < len; ++i) { |
463 | 210k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
464 | 209k | T digit = s[i] - '0'; |
465 | 209k | val = val * 10 + digit; |
466 | 209k | } else { |
467 | 1.56k | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && |
468 | 1.56k | !is_float_suffix(s + i, len - i)))) { |
469 | 188 | *result = PARSE_FAILURE; |
470 | 188 | return 0; |
471 | 188 | } |
472 | 1.38k | *result = PARSE_SUCCESS; |
473 | 1.38k | return val; |
474 | 1.56k | } |
475 | 210k | } |
476 | 309k | *result = PARSE_SUCCESS; |
477 | 309k | return val; |
478 | 311k | } _ZN5doris12StringParser25string_to_int_no_overflowIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 139k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 139k | T val = 0; | 451 | 139k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 139k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 138k | val = s[0] - '0'; | 458 | 138k | } else { | 459 | 492 | *result = PARSE_FAILURE; | 460 | 492 | return 0; | 461 | 492 | } | 462 | 239k | for (int i = 1; i < len; ++i) { | 463 | 101k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 101k | T digit = s[i] - '0'; | 465 | 101k | val = val * 10 + digit; | 466 | 101k | } else { | 467 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 0 | !is_float_suffix(s + i, len - i)))) { | 469 | 0 | *result = PARSE_FAILURE; | 470 | 0 | return 0; | 471 | 0 | } | 472 | 0 | *result = PARSE_SUCCESS; | 473 | 0 | return val; | 474 | 0 | } | 475 | 101k | } | 476 | 138k | *result = PARSE_SUCCESS; | 477 | 138k | return val; | 478 | 138k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 50.8k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 50.8k | T val = 0; | 451 | 50.8k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 50.8k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 50.0k | val = s[0] - '0'; | 458 | 50.0k | } else { | 459 | 749 | *result = PARSE_FAILURE; | 460 | 749 | return 0; | 461 | 749 | } | 462 | 72.6k | for (int i = 1; i < len; ++i) { | 463 | 23.4k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 22.5k | T digit = s[i] - '0'; | 465 | 22.5k | val = val * 10 + digit; | 466 | 22.5k | } else { | 467 | 928 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 928 | !is_float_suffix(s + i, len - i)))) { | 469 | 52 | *result = PARSE_FAILURE; | 470 | 52 | return 0; | 471 | 52 | } | 472 | 876 | *result = PARSE_SUCCESS; | 473 | 876 | return val; | 474 | 928 | } | 475 | 23.4k | } | 476 | 49.1k | *result = PARSE_SUCCESS; | 477 | 49.1k | return val; | 478 | 50.0k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 51.0k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 51.0k | T val = 0; | 451 | 51.0k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 51.0k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 49.8k | val = s[0] - '0'; | 458 | 49.8k | } else { | 459 | 1.25k | *result = PARSE_FAILURE; | 460 | 1.25k | return 0; | 461 | 1.25k | } | 462 | 83.5k | for (int i = 1; i < len; ++i) { | 463 | 33.9k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 33.6k | T digit = s[i] - '0'; | 465 | 33.6k | val = val * 10 + digit; | 466 | 33.6k | } else { | 467 | 254 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 254 | !is_float_suffix(s + i, len - i)))) { | 469 | 44 | *result = PARSE_FAILURE; | 470 | 44 | return 0; | 471 | 44 | } | 472 | 210 | *result = PARSE_SUCCESS; | 473 | 210 | return val; | 474 | 254 | } | 475 | 33.9k | } | 476 | 49.5k | *result = PARSE_SUCCESS; | 477 | 49.5k | return val; | 478 | 49.8k | } |
_ZN5doris12StringParser25string_to_int_no_overflowImEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 32.2k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 32.2k | T val = 0; | 451 | 32.2k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 32.2k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 31.2k | val = s[0] - '0'; | 458 | 31.2k | } else { | 459 | 1.04k | *result = PARSE_FAILURE; | 460 | 1.04k | return 0; | 461 | 1.04k | } | 462 | 64.9k | for (int i = 1; i < len; ++i) { | 463 | 34.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 33.7k | T digit = s[i] - '0'; | 465 | 33.7k | val = val * 10 + digit; | 466 | 33.7k | } else { | 467 | 355 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 355 | !is_float_suffix(s + i, len - i)))) { | 469 | 60 | *result = PARSE_FAILURE; | 470 | 60 | return 0; | 471 | 60 | } | 472 | 295 | *result = PARSE_SUCCESS; | 473 | 295 | return val; | 474 | 355 | } | 475 | 34.0k | } | 476 | 30.8k | *result = PARSE_SUCCESS; | 477 | 30.8k | return val; | 478 | 31.2k | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoEET_PKciPNS0_11ParseResultE Line | Count | Source | 449 | 42.1k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 450 | 42.1k | T val = 0; | 451 | 42.1k | if (UNLIKELY(len == 0)) { | 452 | 0 | *result = PARSE_SUCCESS; | 453 | 0 | return val; | 454 | 0 | } | 455 | | // Factor out the first char for error handling speeds up the loop. | 456 | 42.1k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 457 | 41.2k | val = s[0] - '0'; | 458 | 41.2k | } else { | 459 | 956 | *result = PARSE_FAILURE; | 460 | 956 | return 0; | 461 | 956 | } | 462 | 59.2k | for (int i = 1; i < len; ++i) { | 463 | 18.0k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 464 | 18.0k | T digit = s[i] - '0'; | 465 | 18.0k | val = val * 10 + digit; | 466 | 18.0k | } else { | 467 | 32 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 468 | 32 | !is_float_suffix(s + i, len - i)))) { | 469 | 32 | *result = PARSE_FAILURE; | 470 | 32 | return 0; | 471 | 32 | } | 472 | 0 | *result = PARSE_SUCCESS; | 473 | 0 | return val; | 474 | 32 | } | 475 | 18.0k | } | 476 | 41.2k | *result = PARSE_SUCCESS; | 477 | 41.2k | return val; | 478 | 41.2k | } |
Unexecuted instantiation: _ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEEEET_PKciPNS0_11ParseResultE |
479 | | |
480 | | template <typename T> |
481 | 134k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
482 | 134k | int i = 0; |
483 | | // skip leading spaces |
484 | 176k | for (; i < len; ++i) { |
485 | 176k | if (!is_whitespace(s[i])) { |
486 | 134k | break; |
487 | 134k | } |
488 | 176k | } |
489 | | |
490 | | // skip back spaces |
491 | 134k | int j = len - 1; |
492 | 175k | for (; j >= i; j--) { |
493 | 175k | if (!is_whitespace(s[j])) { |
494 | 134k | break; |
495 | 134k | } |
496 | 175k | } |
497 | | |
498 | | // skip leading '+', from_chars can handle '-' |
499 | 134k | if (i < len && s[i] == '+') { |
500 | 5.29k | i++; |
501 | 5.29k | } |
502 | 134k | if (UNLIKELY(i > j)) { |
503 | 3 | *result = PARSE_FAILURE; |
504 | 3 | return 0; |
505 | 3 | } |
506 | | |
507 | | // Use double here to not lose precision while accumulating the result |
508 | 134k | double val = 0; |
509 | 134k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
510 | | |
511 | 134k | if (res.ec == std::errc() && res.ptr == s + j + 1) { |
512 | 129k | if (abs(val) == std::numeric_limits<T>::infinity()) { |
513 | 886 | auto contain_inf = false; |
514 | 1.27k | for (int k = i; k < j + 1; k++) { |
515 | 1.27k | if (s[k] == 'i' || s[k] == 'I') { |
516 | 882 | contain_inf = true; |
517 | 882 | break; |
518 | 882 | } |
519 | 1.27k | } |
520 | | |
521 | 886 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; |
522 | 128k | } else { |
523 | 128k | *result = PARSE_SUCCESS; |
524 | 128k | } |
525 | 129k | return val; |
526 | 129k | } else { |
527 | 4.71k | *result = PARSE_FAILURE; |
528 | 4.71k | } |
529 | 4.71k | return 0; |
530 | 134k | } _ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 481 | 71.8k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 482 | 71.8k | int i = 0; | 483 | | // skip leading spaces | 484 | 93.0k | for (; i < len; ++i) { | 485 | 93.0k | if (!is_whitespace(s[i])) { | 486 | 71.8k | break; | 487 | 71.8k | } | 488 | 93.0k | } | 489 | | | 490 | | // skip back spaces | 491 | 71.8k | int j = len - 1; | 492 | 92.4k | for (; j >= i; j--) { | 493 | 92.4k | if (!is_whitespace(s[j])) { | 494 | 71.8k | break; | 495 | 71.8k | } | 496 | 92.4k | } | 497 | | | 498 | | // skip leading '+', from_chars can handle '-' | 499 | 71.8k | if (i < len && s[i] == '+') { | 500 | 2.64k | i++; | 501 | 2.64k | } | 502 | 71.8k | if (UNLIKELY(i > j)) { | 503 | 3 | *result = PARSE_FAILURE; | 504 | 3 | return 0; | 505 | 3 | } | 506 | | | 507 | | // Use double here to not lose precision while accumulating the result | 508 | 71.8k | double val = 0; | 509 | 71.8k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 510 | | | 511 | 71.8k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 512 | 69.3k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 513 | 443 | auto contain_inf = false; | 514 | 647 | for (int k = i; k < j + 1; k++) { | 515 | 645 | if (s[k] == 'i' || s[k] == 'I') { | 516 | 441 | contain_inf = true; | 517 | 441 | break; | 518 | 441 | } | 519 | 645 | } | 520 | | | 521 | 443 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 522 | 68.8k | } else { | 523 | 68.8k | *result = PARSE_SUCCESS; | 524 | 68.8k | } | 525 | 69.3k | return val; | 526 | 69.3k | } else { | 527 | 2.55k | *result = PARSE_FAILURE; | 528 | 2.55k | } | 529 | 2.55k | return 0; | 530 | 71.8k | } |
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 481 | 62.3k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 482 | 62.3k | int i = 0; | 483 | | // skip leading spaces | 484 | 83.4k | for (; i < len; ++i) { | 485 | 83.4k | if (!is_whitespace(s[i])) { | 486 | 62.3k | break; | 487 | 62.3k | } | 488 | 83.4k | } | 489 | | | 490 | | // skip back spaces | 491 | 62.3k | int j = len - 1; | 492 | 82.8k | for (; j >= i; j--) { | 493 | 82.8k | if (!is_whitespace(s[j])) { | 494 | 62.3k | break; | 495 | 62.3k | } | 496 | 82.8k | } | 497 | | | 498 | | // skip leading '+', from_chars can handle '-' | 499 | 62.3k | if (i < len && s[i] == '+') { | 500 | 2.64k | i++; | 501 | 2.64k | } | 502 | 62.3k | if (UNLIKELY(i > j)) { | 503 | 0 | *result = PARSE_FAILURE; | 504 | 0 | return 0; | 505 | 0 | } | 506 | | | 507 | | // Use double here to not lose precision while accumulating the result | 508 | 62.3k | double val = 0; | 509 | 62.3k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 510 | | | 511 | 62.3k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 512 | 60.1k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 513 | 443 | auto contain_inf = false; | 514 | 629 | for (int k = i; k < j + 1; k++) { | 515 | 627 | if (s[k] == 'i' || s[k] == 'I') { | 516 | 441 | contain_inf = true; | 517 | 441 | break; | 518 | 441 | } | 519 | 627 | } | 520 | | | 521 | 443 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 522 | 59.7k | } else { | 523 | 59.7k | *result = PARSE_SUCCESS; | 524 | 59.7k | } | 525 | 60.1k | return val; | 526 | 60.1k | } else { | 527 | 2.16k | *result = PARSE_FAILURE; | 528 | 2.16k | } | 529 | 2.16k | return 0; | 530 | 62.3k | } |
|
531 | | |
532 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
533 | 9.05k | ParseResult* result) { |
534 | 9.05k | *result = PARSE_SUCCESS; |
535 | | |
536 | 9.05k | if (len >= 4 && (s[0] == 't' || s[0] == 'T')) { |
537 | 3.65k | bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') && |
538 | 3.65k | (s[3] == 'e' || s[3] == 'E'); |
539 | 3.65k | if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) { |
540 | 3.53k | return true; |
541 | 3.53k | } |
542 | 5.40k | } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) { |
543 | 3.71k | bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') && |
544 | 3.71k | (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E'); |
545 | 3.71k | if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) { |
546 | 3.59k | return false; |
547 | 3.59k | } |
548 | 3.71k | } |
549 | | |
550 | 1.92k | *result = PARSE_FAILURE; |
551 | 1.92k | return false; |
552 | 9.05k | } |
553 | | |
554 | | template <PrimitiveType P, typename T, typename DecimalType> |
555 | | T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision, |
556 | 186k | int type_scale, ParseResult* result) { |
557 | 186k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
558 | 186k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
559 | 186k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
560 | 186k | "wide::Int256."); |
561 | | // Special cases: |
562 | | // 1) '' == Fail, an empty string fails to parse. |
563 | | // 2) ' # ' == #, leading and trailing white space is ignored. |
564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). |
565 | | // 4) '#.' == '#', a trailing dot is ignored. |
566 | | |
567 | | // Ignore leading and trailing spaces. |
568 | 188k | while (len > 0 && is_whitespace(*s)) { |
569 | 2.44k | ++s; |
570 | 2.44k | --len; |
571 | 2.44k | } |
572 | 186k | while (len > 0 && is_whitespace(s[len - 1])) { |
573 | 0 | --len; |
574 | 0 | } |
575 | | |
576 | 186k | bool is_negative = false; |
577 | 186k | if (len > 0) { |
578 | 186k | switch (*s) { |
579 | 9.10k | case '-': |
580 | 9.10k | is_negative = true; |
581 | 9.10k | [[fallthrough]]; |
582 | 9.10k | case '+': |
583 | 9.10k | ++s; |
584 | 9.10k | --len; |
585 | 186k | } |
586 | 186k | } |
587 | | |
588 | | // Ignore leading zeros. |
589 | 186k | bool found_value = false; |
590 | 205k | while (len > 0 && UNLIKELY(*s == '0')) { |
591 | 19.3k | found_value = true; |
592 | 19.3k | ++s; |
593 | 19.3k | --len; |
594 | 19.3k | } |
595 | | |
596 | | // Ignore leading zeros even after a dot. This allows for differentiating between |
597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would |
598 | | // overflow. |
599 | 186k | int scale = 0; |
600 | 186k | int found_dot = 0; |
601 | 186k | if (len > 0 && *s == '.') { |
602 | 15.7k | found_dot = 1; |
603 | 15.7k | ++s; |
604 | 15.7k | --len; |
605 | 30.1k | while (len > 0 && UNLIKELY(*s == '0')) { |
606 | 14.3k | found_value = true; |
607 | 14.3k | ++scale; |
608 | 14.3k | ++s; |
609 | 14.3k | --len; |
610 | 14.3k | } |
611 | 15.7k | } |
612 | | |
613 | 186k | int precision = 0; |
614 | 186k | int max_digit = type_precision - type_scale; |
615 | 186k | int cur_digit = 0; |
616 | 186k | bool found_exponent = false; |
617 | 186k | int8_t exponent = 0; |
618 | 186k | T value = 0; |
619 | 186k | bool has_round = false; |
620 | 2.82M | for (int i = 0; i < len; ++i) { |
621 | 2.71M | const char& c = s[i]; |
622 | 2.71M | if (LIKELY('0' <= c && c <= '9')) { |
623 | 2.49M | found_value = true; |
624 | | // Ignore digits once the type's precision limit is reached. This avoids |
625 | | // overflowing the underlying storage while handling a string like |
626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and |
627 | | // an exponent will be made later. |
628 | 2.49M | if (LIKELY(type_precision > precision) && !has_round) { |
629 | 2.48M | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... |
630 | 2.48M | ++precision; |
631 | 2.48M | scale += found_dot; |
632 | 2.48M | cur_digit = precision - scale; |
633 | 2.48M | } else if (!found_dot && max_digit < (precision - scale)) { |
634 | 426 | *result = StringParser::PARSE_OVERFLOW; |
635 | 426 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
636 | 426 | : vectorized::max_decimal_value<DecimalType>(type_precision); |
637 | 426 | return value; |
638 | 426 | } else if (found_dot && scale >= type_scale && !has_round) { |
639 | | // make rounding cases |
640 | 20 | if (c > '4') { |
641 | 8 | value += 1; |
642 | 8 | } |
643 | 20 | has_round = true; |
644 | 20 | continue; |
645 | 20 | } else if (!found_dot) { |
646 | 0 | ++cur_digit; |
647 | 0 | } |
648 | 2.48M | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. |
649 | 2.48M | } else if (c == '.' && LIKELY(!found_dot)) { |
650 | 146k | found_dot = 1; |
651 | 146k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { |
652 | 75.1k | found_exponent = true; |
653 | 75.1k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); |
654 | 75.1k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { |
655 | 10 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { |
656 | 0 | *result = StringParser::PARSE_UNDERFLOW; |
657 | 0 | } |
658 | 10 | return 0; |
659 | 10 | } |
660 | 75.1k | break; |
661 | 75.1k | } else { |
662 | 278 | if (value == 0) { |
663 | 210 | *result = StringParser::PARSE_FAILURE; |
664 | 210 | return 0; |
665 | 210 | } |
666 | | // here to handle |
667 | 68 | *result = StringParser::PARSE_SUCCESS; |
668 | 68 | if (type_scale >= scale) { |
669 | 66 | value *= get_scale_multiplier<T>(type_scale - scale); |
670 | | // here meet non-valid character, should return the value, keep going to meet |
671 | | // the E/e character because we make right user-given type_precision |
672 | | // not max number type_precision |
673 | 66 | if (!is_numeric_ascii(c)) { |
674 | 66 | if (cur_digit > type_precision) { |
675 | 0 | *result = StringParser::PARSE_OVERFLOW; |
676 | 0 | value = is_negative |
677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) |
678 | 0 | : vectorized::max_decimal_value<DecimalType>( |
679 | 0 | type_precision); |
680 | 0 | return value; |
681 | 0 | } |
682 | 66 | return is_negative ? T(-value) : T(value); |
683 | 66 | } |
684 | 66 | } |
685 | | |
686 | 2 | return is_negative ? T(-value) : T(value); |
687 | 68 | } |
688 | 2.71M | } |
689 | | |
690 | | // Find the number of truncated digits before adjusting the precision for an exponent. |
691 | 185k | if (exponent > scale) { |
692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the |
693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. |
694 | 67.2k | precision += exponent - scale; |
695 | | |
696 | 67.2k | value *= get_scale_multiplier<T>(exponent - scale); |
697 | 67.2k | scale = 0; |
698 | 118k | } else { |
699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, |
700 | | // the precision must also be set to 4 but that will be done below for the |
701 | | // non-exponent case anyways. |
702 | 118k | scale -= exponent; |
703 | 118k | } |
704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros |
705 | | // were ignored during previous parsing. |
706 | 185k | if (scale > precision) { |
707 | 9.81k | precision = scale; |
708 | 9.81k | } |
709 | | |
710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower |
711 | | // than just letting the function run out. |
712 | 127 | *result = StringParser::PARSE_SUCCESS; |
713 | 185k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { |
714 | 11.1k | *result = StringParser::PARSE_OVERFLOW; |
715 | 11.1k | if constexpr (TYPE_DECIMALV2 != P) { |
716 | | // decimalv3 overflow will return max min value for type precision |
717 | 11.1k | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
718 | 11.1k | : vectorized::max_decimal_value<DecimalType>(type_precision); |
719 | 11.1k | return value; |
720 | 11.1k | } |
721 | 174k | } else if (UNLIKELY(scale > type_scale)) { |
722 | 3.74k | *result = StringParser::PARSE_UNDERFLOW; |
723 | 3.74k | int shift = scale - type_scale; |
724 | 3.74k | T divisor = get_scale_multiplier<T>(shift); |
725 | 3.74k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { |
726 | 0 | value = 0; |
727 | 3.74k | } else { |
728 | 3.74k | T remainder = value % divisor; |
729 | 3.74k | value /= divisor; |
730 | 3.74k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { |
731 | 98 | value += 1; |
732 | 98 | } |
733 | 3.74k | } |
734 | 3.74k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. |
735 | 170k | } else if (UNLIKELY(!found_value && !found_dot)) { |
736 | 1 | *result = StringParser::PARSE_FAILURE; |
737 | 1 | } |
738 | | |
739 | 174k | if (type_scale > scale) { |
740 | 161k | value *= get_scale_multiplier<T>(type_scale - scale); |
741 | 161k | } |
742 | | |
743 | 174k | return is_negative ? T(-value) : T(value); |
744 | 185k | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EiNS_10vectorized7DecimalIiEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 22.3k | int type_scale, ParseResult* result) { | 557 | 22.3k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 22.3k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 22.3k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 22.3k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 22.5k | while (len > 0 && is_whitespace(*s)) { | 569 | 220 | ++s; | 570 | 220 | --len; | 571 | 220 | } | 572 | 22.3k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 22.3k | bool is_negative = false; | 577 | 22.3k | if (len > 0) { | 578 | 22.3k | switch (*s) { | 579 | 9.07k | case '-': | 580 | 9.07k | is_negative = true; | 581 | 9.07k | [[fallthrough]]; | 582 | 9.07k | case '+': | 583 | 9.07k | ++s; | 584 | 9.07k | --len; | 585 | 22.3k | } | 586 | 22.3k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 22.3k | bool found_value = false; | 590 | 24.8k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 2.52k | found_value = true; | 592 | 2.52k | ++s; | 593 | 2.52k | --len; | 594 | 2.52k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 22.3k | int scale = 0; | 600 | 22.3k | int found_dot = 0; | 601 | 22.3k | if (len > 0 && *s == '.') { | 602 | 1.65k | found_dot = 1; | 603 | 1.65k | ++s; | 604 | 1.65k | --len; | 605 | 2.06k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 404 | found_value = true; | 607 | 404 | ++scale; | 608 | 404 | ++s; | 609 | 404 | --len; | 610 | 404 | } | 611 | 1.65k | } | 612 | | | 613 | 22.3k | int precision = 0; | 614 | 22.3k | int max_digit = type_precision - type_scale; | 615 | 22.3k | int cur_digit = 0; | 616 | 22.3k | bool found_exponent = false; | 617 | 22.3k | int8_t exponent = 0; | 618 | 22.3k | T value = 0; | 619 | 22.3k | bool has_round = false; | 620 | 60.1k | for (int i = 0; i < len; ++i) { | 621 | 38.2k | const char& c = s[i]; | 622 | 38.2k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 34.5k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 34.5k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 34.1k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 34.1k | ++precision; | 631 | 34.1k | scale += found_dot; | 632 | 34.1k | cur_digit = precision - scale; | 633 | 34.1k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 426 | *result = StringParser::PARSE_OVERFLOW; | 635 | 426 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 426 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 426 | return value; | 638 | 426 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 4 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 4 | has_round = true; | 644 | 4 | continue; | 645 | 10 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 34.1k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 34.1k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 3.61k | found_dot = 1; | 651 | 3.61k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 0 | found_exponent = true; | 653 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 0 | break; | 661 | 64 | } else { | 662 | 64 | if (value == 0) { | 663 | 44 | *result = StringParser::PARSE_FAILURE; | 664 | 44 | return 0; | 665 | 44 | } | 666 | | // here to handle | 667 | 20 | *result = StringParser::PARSE_SUCCESS; | 668 | 20 | if (type_scale >= scale) { | 669 | 20 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 20 | if (!is_numeric_ascii(c)) { | 674 | 20 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 20 | return is_negative ? T(-value) : T(value); | 683 | 20 | } | 684 | 20 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 20 | } | 688 | 38.2k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 21.8k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 21.8k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 21.8k | scale -= exponent; | 703 | 21.8k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 21.8k | if (scale > precision) { | 707 | 308 | precision = scale; | 708 | 308 | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 21.8k | *result = StringParser::PARSE_SUCCESS; | 713 | 21.8k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 714 | *result = StringParser::PARSE_OVERFLOW; | 715 | 714 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 714 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 714 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 714 | return value; | 720 | 714 | } | 721 | 21.1k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 4 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 4 | int shift = scale - type_scale; | 724 | 4 | T divisor = get_scale_multiplier<T>(shift); | 725 | 4 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 4 | } else { | 728 | 4 | T remainder = value % divisor; | 729 | 4 | value /= divisor; | 730 | 4 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 4 | } | 734 | 4 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 21.1k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 21.1k | if (type_scale > scale) { | 740 | 21.0k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 21.0k | } | 742 | | | 743 | 21.1k | return is_negative ? T(-value) : T(value); | 744 | 21.8k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29ElNS_10vectorized7DecimalIlEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 42.9k | int type_scale, ParseResult* result) { | 557 | 42.9k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 42.9k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 42.9k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 42.9k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 43.5k | while (len > 0 && is_whitespace(*s)) { | 569 | 593 | ++s; | 570 | 593 | --len; | 571 | 593 | } | 572 | 42.9k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 42.9k | bool is_negative = false; | 577 | 42.9k | if (len > 0) { | 578 | 42.9k | switch (*s) { | 579 | 4 | case '-': | 580 | 4 | is_negative = true; | 581 | 4 | [[fallthrough]]; | 582 | 4 | case '+': | 583 | 4 | ++s; | 584 | 4 | --len; | 585 | 42.9k | } | 586 | 42.9k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 42.9k | bool found_value = false; | 590 | 50.2k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 7.31k | found_value = true; | 592 | 7.31k | ++s; | 593 | 7.31k | --len; | 594 | 7.31k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 42.9k | int scale = 0; | 600 | 42.9k | int found_dot = 0; | 601 | 42.9k | if (len > 0 && *s == '.') { | 602 | 6.44k | found_dot = 1; | 603 | 6.44k | ++s; | 604 | 6.44k | --len; | 605 | 9.87k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 3.42k | found_value = true; | 607 | 3.42k | ++scale; | 608 | 3.42k | ++s; | 609 | 3.42k | --len; | 610 | 3.42k | } | 611 | 6.44k | } | 612 | | | 613 | 42.9k | int precision = 0; | 614 | 42.9k | int max_digit = type_precision - type_scale; | 615 | 42.9k | int cur_digit = 0; | 616 | 42.9k | bool found_exponent = false; | 617 | 42.9k | int8_t exponent = 0; | 618 | 42.9k | T value = 0; | 619 | 42.9k | bool has_round = false; | 620 | 513k | for (int i = 0; i < len; ++i) { | 621 | 472k | const char& c = s[i]; | 622 | 472k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 437k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 437k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 437k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 437k | ++precision; | 631 | 437k | scale += found_dot; | 632 | 437k | cur_digit = precision - scale; | 633 | 437k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 10 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 4 | if (c > '4') { | 641 | 4 | value += 1; | 642 | 4 | } | 643 | 4 | has_round = true; | 644 | 4 | continue; | 645 | 6 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 437k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 437k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 33.1k | found_dot = 1; | 651 | 33.1k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 2.41k | found_exponent = true; | 653 | 2.41k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 2.41k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 2.41k | break; | 661 | 2.41k | } else { | 662 | 77 | if (value == 0) { | 663 | 54 | *result = StringParser::PARSE_FAILURE; | 664 | 54 | return 0; | 665 | 54 | } | 666 | | // here to handle | 667 | 23 | *result = StringParser::PARSE_SUCCESS; | 668 | 23 | if (type_scale >= scale) { | 669 | 22 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 22 | if (!is_numeric_ascii(c)) { | 674 | 22 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 22 | return is_negative ? T(-value) : T(value); | 683 | 22 | } | 684 | 22 | } | 685 | | | 686 | 1 | return is_negative ? T(-value) : T(value); | 687 | 23 | } | 688 | 472k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 42.8k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 1 | precision += exponent - scale; | 695 | | | 696 | 1 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 1 | scale = 0; | 698 | 42.8k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 42.8k | scale -= exponent; | 703 | 42.8k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 42.8k | if (scale > precision) { | 707 | 4.04k | precision = scale; | 708 | 4.04k | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 42.8k | *result = StringParser::PARSE_SUCCESS; | 713 | 42.8k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 10.4k | *result = StringParser::PARSE_OVERFLOW; | 715 | 10.4k | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 10.4k | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 10.4k | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 10.4k | return value; | 720 | 10.4k | } | 721 | 32.4k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 1.02k | *result = StringParser::PARSE_UNDERFLOW; | 723 | 1.02k | int shift = scale - type_scale; | 724 | 1.02k | T divisor = get_scale_multiplier<T>(shift); | 725 | 1.02k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 1.02k | } else { | 728 | 1.02k | T remainder = value % divisor; | 729 | 1.02k | value /= divisor; | 730 | 1.02k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 1.02k | } | 734 | 1.02k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 31.3k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 1 | *result = StringParser::PARSE_FAILURE; | 737 | 1 | } | 738 | | | 739 | 32.4k | if (type_scale > scale) { | 740 | 22.5k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 22.5k | } | 742 | | | 743 | 32.4k | return is_negative ? T(-value) : T(value); | 744 | 42.8k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EnNS_10vectorized12Decimal128V3EEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 38.8k | int type_scale, ParseResult* result) { | 557 | 38.8k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 38.8k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 38.8k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 38.8k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 39.3k | while (len > 0 && is_whitespace(*s)) { | 569 | 503 | ++s; | 570 | 503 | --len; | 571 | 503 | } | 572 | 38.8k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 38.8k | bool is_negative = false; | 577 | 38.8k | if (len > 0) { | 578 | 38.8k | switch (*s) { | 579 | 1 | case '-': | 580 | 1 | is_negative = true; | 581 | 1 | [[fallthrough]]; | 582 | 1 | case '+': | 583 | 1 | ++s; | 584 | 1 | --len; | 585 | 38.8k | } | 586 | 38.8k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 38.8k | bool found_value = false; | 590 | 47.2k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 8.43k | found_value = true; | 592 | 8.43k | ++s; | 593 | 8.43k | --len; | 594 | 8.43k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 38.8k | int scale = 0; | 600 | 38.8k | int found_dot = 0; | 601 | 38.8k | if (len > 0 && *s == '.') { | 602 | 7.42k | found_dot = 1; | 603 | 7.42k | ++s; | 604 | 7.42k | --len; | 605 | 16.7k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 9.28k | found_value = true; | 607 | 9.28k | ++scale; | 608 | 9.28k | ++s; | 609 | 9.28k | --len; | 610 | 9.28k | } | 611 | 7.42k | } | 612 | | | 613 | 38.8k | int precision = 0; | 614 | 38.8k | int max_digit = type_precision - type_scale; | 615 | 38.8k | int cur_digit = 0; | 616 | 38.8k | bool found_exponent = false; | 617 | 38.8k | int8_t exponent = 0; | 618 | 38.8k | T value = 0; | 619 | 38.8k | bool has_round = false; | 620 | 471k | for (int i = 0; i < len; ++i) { | 621 | 435k | const char& c = s[i]; | 622 | 435k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 403k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 403k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 403k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 403k | ++precision; | 631 | 403k | scale += found_dot; | 632 | 403k | cur_digit = precision - scale; | 633 | 403k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 4 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 4 | if (c > '4') { | 641 | 4 | value += 1; | 642 | 4 | } | 643 | 4 | has_round = true; | 644 | 4 | continue; | 645 | 4 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 403k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 403k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 28.9k | found_dot = 1; | 651 | 28.9k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 3.21k | found_exponent = true; | 653 | 3.21k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 3.21k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 3.21k | break; | 661 | 3.21k | } else { | 662 | 54 | if (value == 0) { | 663 | 42 | *result = StringParser::PARSE_FAILURE; | 664 | 42 | return 0; | 665 | 42 | } | 666 | | // here to handle | 667 | 12 | *result = StringParser::PARSE_SUCCESS; | 668 | 12 | if (type_scale >= scale) { | 669 | 11 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 11 | if (!is_numeric_ascii(c)) { | 674 | 11 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 11 | return is_negative ? T(-value) : T(value); | 683 | 11 | } | 684 | 11 | } | 685 | | | 686 | 1 | return is_negative ? T(-value) : T(value); | 687 | 12 | } | 688 | 435k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 38.7k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 38.7k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 38.7k | scale -= exponent; | 703 | 38.7k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 38.7k | if (scale > precision) { | 707 | 5.29k | precision = scale; | 708 | 5.29k | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 38.7k | *result = StringParser::PARSE_SUCCESS; | 713 | 38.7k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 2 | *result = StringParser::PARSE_OVERFLOW; | 715 | 2 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 2 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 2 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 2 | return value; | 720 | 2 | } | 721 | 38.7k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 2.70k | *result = StringParser::PARSE_UNDERFLOW; | 723 | 2.70k | int shift = scale - type_scale; | 724 | 2.70k | T divisor = get_scale_multiplier<T>(shift); | 725 | 2.70k | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 2.70k | } else { | 728 | 2.70k | T remainder = value % divisor; | 729 | 2.70k | value /= divisor; | 730 | 2.70k | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 81 | value += 1; | 732 | 81 | } | 733 | 2.70k | } | 734 | 2.70k | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 36.0k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 38.7k | if (type_scale > scale) { | 740 | 35.9k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 35.9k | } | 742 | | | 743 | 38.7k | return is_negative ? T(-value) : T(value); | 744 | 38.7k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EN4wide7integerILm256EiEENS_10vectorized7DecimalIS5_EEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 82.2k | int type_scale, ParseResult* result) { | 557 | 82.2k | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 82.2k | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 82.2k | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 82.2k | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 83.3k | while (len > 0 && is_whitespace(*s)) { | 569 | 1.12k | ++s; | 570 | 1.12k | --len; | 571 | 1.12k | } | 572 | 82.2k | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 82.2k | bool is_negative = false; | 577 | 82.2k | if (len > 0) { | 578 | 82.2k | switch (*s) { | 579 | 0 | case '-': | 580 | 0 | is_negative = true; | 581 | 0 | [[fallthrough]]; | 582 | 0 | case '+': | 583 | 0 | ++s; | 584 | 0 | --len; | 585 | 82.2k | } | 586 | 82.2k | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 82.2k | bool found_value = false; | 590 | 83.3k | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 1.04k | found_value = true; | 592 | 1.04k | ++s; | 593 | 1.04k | --len; | 594 | 1.04k | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 82.2k | int scale = 0; | 600 | 82.2k | int found_dot = 0; | 601 | 82.2k | if (len > 0 && *s == '.') { | 602 | 174 | found_dot = 1; | 603 | 174 | ++s; | 604 | 174 | --len; | 605 | 1.44k | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 1.26k | found_value = true; | 607 | 1.26k | ++scale; | 608 | 1.26k | ++s; | 609 | 1.26k | --len; | 610 | 1.26k | } | 611 | 174 | } | 612 | | | 613 | 82.2k | int precision = 0; | 614 | 82.2k | int max_digit = type_precision - type_scale; | 615 | 82.2k | int cur_digit = 0; | 616 | 82.2k | bool found_exponent = false; | 617 | 82.2k | int8_t exponent = 0; | 618 | 82.2k | T value = 0; | 619 | 82.2k | bool has_round = false; | 620 | 1.77M | for (int i = 0; i < len; ++i) { | 621 | 1.76M | const char& c = s[i]; | 622 | 1.76M | if (LIKELY('0' <= c && c <= '9')) { | 623 | 1.61M | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 1.61M | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 1.61M | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 1.61M | ++precision; | 631 | 1.61M | scale += found_dot; | 632 | 1.61M | cur_digit = precision - scale; | 633 | 1.61M | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 0 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 0 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 0 | has_round = true; | 644 | 0 | continue; | 645 | 0 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 1.61M | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 1.61M | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 81.1k | found_dot = 1; | 651 | 81.1k | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 69.4k | found_exponent = true; | 653 | 69.4k | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 69.4k | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 10 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 10 | return 0; | 659 | 10 | } | 660 | 69.4k | break; | 661 | 69.4k | } else { | 662 | 68 | if (value == 0) { | 663 | 61 | *result = StringParser::PARSE_FAILURE; | 664 | 61 | return 0; | 665 | 61 | } | 666 | | // here to handle | 667 | 7 | *result = StringParser::PARSE_SUCCESS; | 668 | 7 | if (type_scale >= scale) { | 669 | 7 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 7 | if (!is_numeric_ascii(c)) { | 674 | 7 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 7 | return is_negative ? T(-value) : T(value); | 683 | 7 | } | 684 | 7 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 7 | } | 688 | 1.76M | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 82.1k | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 67.2k | precision += exponent - scale; | 695 | | | 696 | 67.2k | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 67.2k | scale = 0; | 698 | 67.2k | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 14.8k | scale -= exponent; | 703 | 14.8k | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 82.1k | if (scale > precision) { | 707 | 172 | precision = scale; | 708 | 172 | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 82.1k | *result = StringParser::PARSE_SUCCESS; | 713 | 82.1k | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 0 | *result = StringParser::PARSE_OVERFLOW; | 715 | 0 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 0 | return value; | 720 | 0 | } | 721 | 82.1k | } else if (UNLIKELY(scale > type_scale)) { | 722 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 0 | int shift = scale - type_scale; | 724 | 0 | T divisor = get_scale_multiplier<T>(shift); | 725 | 0 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 0 | } else { | 728 | 0 | T remainder = value % divisor; | 729 | 0 | value /= divisor; | 730 | 0 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 0 | value += 1; | 732 | 0 | } | 733 | 0 | } | 734 | 0 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 82.1k | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 82.1k | if (type_scale > scale) { | 740 | 81.9k | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 81.9k | } | 742 | | | 743 | 82.1k | return is_negative ? T(-value) : T(value); | 744 | 82.1k | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EnNS_10vectorized7DecimalInEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 556 | 142 | int type_scale, ParseResult* result) { | 557 | 142 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 558 | 142 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 559 | 142 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 560 | 142 | "wide::Int256."); | 561 | | // Special cases: | 562 | | // 1) '' == Fail, an empty string fails to parse. | 563 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 564 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 565 | | // 4) '#.' == '#', a trailing dot is ignored. | 566 | | | 567 | | // Ignore leading and trailing spaces. | 568 | 142 | while (len > 0 && is_whitespace(*s)) { | 569 | 0 | ++s; | 570 | 0 | --len; | 571 | 0 | } | 572 | 142 | while (len > 0 && is_whitespace(s[len - 1])) { | 573 | 0 | --len; | 574 | 0 | } | 575 | | | 576 | 142 | bool is_negative = false; | 577 | 142 | if (len > 0) { | 578 | 142 | switch (*s) { | 579 | 25 | case '-': | 580 | 25 | is_negative = true; | 581 | 25 | [[fallthrough]]; | 582 | 25 | case '+': | 583 | 25 | ++s; | 584 | 25 | --len; | 585 | 142 | } | 586 | 142 | } | 587 | | | 588 | | // Ignore leading zeros. | 589 | 142 | bool found_value = false; | 590 | 164 | while (len > 0 && UNLIKELY(*s == '0')) { | 591 | 22 | found_value = true; | 592 | 22 | ++s; | 593 | 22 | --len; | 594 | 22 | } | 595 | | | 596 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 597 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 598 | | // overflow. | 599 | 142 | int scale = 0; | 600 | 142 | int found_dot = 0; | 601 | 142 | if (len > 0 && *s == '.') { | 602 | 9 | found_dot = 1; | 603 | 9 | ++s; | 604 | 9 | --len; | 605 | 14 | while (len > 0 && UNLIKELY(*s == '0')) { | 606 | 5 | found_value = true; | 607 | 5 | ++scale; | 608 | 5 | ++s; | 609 | 5 | --len; | 610 | 5 | } | 611 | 9 | } | 612 | | | 613 | 142 | int precision = 0; | 614 | 142 | int max_digit = type_precision - type_scale; | 615 | 142 | int cur_digit = 0; | 616 | 142 | bool found_exponent = false; | 617 | 142 | int8_t exponent = 0; | 618 | 142 | T value = 0; | 619 | 142 | bool has_round = false; | 620 | 2.24k | for (int i = 0; i < len; ++i) { | 621 | 2.11k | const char& c = s[i]; | 622 | 2.11k | if (LIKELY('0' <= c && c <= '9')) { | 623 | 1.99k | found_value = true; | 624 | | // Ignore digits once the type's precision limit is reached. This avoids | 625 | | // overflowing the underlying storage while handling a string like | 626 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 627 | | // an exponent will be made later. | 628 | 1.99k | if (LIKELY(type_precision > precision) && !has_round) { | 629 | 1.98k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 630 | 1.98k | ++precision; | 631 | 1.98k | scale += found_dot; | 632 | 1.98k | cur_digit = precision - scale; | 633 | 1.98k | } else if (!found_dot && max_digit < (precision - scale)) { | 634 | 0 | *result = StringParser::PARSE_OVERFLOW; | 635 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 636 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 637 | 0 | return value; | 638 | 8 | } else if (found_dot && scale >= type_scale && !has_round) { | 639 | | // make rounding cases | 640 | 8 | if (c > '4') { | 641 | 0 | value += 1; | 642 | 0 | } | 643 | 8 | has_round = true; | 644 | 8 | continue; | 645 | 8 | } else if (!found_dot) { | 646 | 0 | ++cur_digit; | 647 | 0 | } | 648 | 1.98k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 649 | 1.98k | } else if (c == '.' && LIKELY(!found_dot)) { | 650 | 105 | found_dot = 1; | 651 | 105 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 652 | 0 | found_exponent = true; | 653 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 654 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 655 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 656 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 657 | 0 | } | 658 | 0 | return 0; | 659 | 0 | } | 660 | 0 | break; | 661 | 15 | } else { | 662 | 15 | if (value == 0) { | 663 | 9 | *result = StringParser::PARSE_FAILURE; | 664 | 9 | return 0; | 665 | 9 | } | 666 | | // here to handle | 667 | 6 | *result = StringParser::PARSE_SUCCESS; | 668 | 6 | if (type_scale >= scale) { | 669 | 6 | value *= get_scale_multiplier<T>(type_scale - scale); | 670 | | // here meet non-valid character, should return the value, keep going to meet | 671 | | // the E/e character because we make right user-given type_precision | 672 | | // not max number type_precision | 673 | 6 | if (!is_numeric_ascii(c)) { | 674 | 6 | if (cur_digit > type_precision) { | 675 | 0 | *result = StringParser::PARSE_OVERFLOW; | 676 | 0 | value = is_negative | 677 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 678 | 0 | : vectorized::max_decimal_value<DecimalType>( | 679 | 0 | type_precision); | 680 | 0 | return value; | 681 | 0 | } | 682 | 6 | return is_negative ? T(-value) : T(value); | 683 | 6 | } | 684 | 6 | } | 685 | | | 686 | 0 | return is_negative ? T(-value) : T(value); | 687 | 6 | } | 688 | 2.11k | } | 689 | | | 690 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 691 | 127 | if (exponent > scale) { | 692 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 693 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 694 | 0 | precision += exponent - scale; | 695 | |
| 696 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 697 | 0 | scale = 0; | 698 | 127 | } else { | 699 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 700 | | // the precision must also be set to 4 but that will be done below for the | 701 | | // non-exponent case anyways. | 702 | 127 | scale -= exponent; | 703 | 127 | } | 704 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 705 | | // were ignored during previous parsing. | 706 | 127 | if (scale > precision) { | 707 | 3 | precision = scale; | 708 | 3 | } | 709 | | | 710 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 711 | | // than just letting the function run out. | 712 | 127 | *result = StringParser::PARSE_SUCCESS; | 713 | 127 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 714 | 9 | *result = StringParser::PARSE_OVERFLOW; | 715 | 9 | if constexpr (TYPE_DECIMALV2 != P) { | 716 | | // decimalv3 overflow will return max min value for type precision | 717 | 9 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 718 | 9 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 719 | 9 | return value; | 720 | 9 | } | 721 | 118 | } else if (UNLIKELY(scale > type_scale)) { | 722 | 17 | *result = StringParser::PARSE_UNDERFLOW; | 723 | 17 | int shift = scale - type_scale; | 724 | 17 | T divisor = get_scale_multiplier<T>(shift); | 725 | 17 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 726 | 0 | value = 0; | 727 | 17 | } else { | 728 | 17 | T remainder = value % divisor; | 729 | 17 | value /= divisor; | 730 | 17 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 731 | 17 | value += 1; | 732 | 17 | } | 733 | 17 | } | 734 | 17 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 735 | 101 | } else if (UNLIKELY(!found_value && !found_dot)) { | 736 | 0 | *result = StringParser::PARSE_FAILURE; | 737 | 0 | } | 738 | | | 739 | 127 | if (type_scale > scale) { | 740 | 78 | value *= get_scale_multiplier<T>(type_scale - scale); | 741 | 78 | } | 742 | | | 743 | 127 | return is_negative ? T(-value) : T(value); | 744 | 142 | } |
|
745 | | |
746 | | } // end namespace doris |