/root/doris/be/src/util/string_parser.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <fast_float/fast_float.h> |
24 | | #include <fast_float/parse_number.h> |
25 | | #include <glog/logging.h> |
26 | | #include <stdlib.h> |
27 | | |
28 | | // IWYU pragma: no_include <bits/std_abs.h> |
29 | | #include <cmath> // IWYU pragma: keep |
30 | | #include <cstdint> |
31 | | #include <limits> |
32 | | #include <map> |
33 | | #include <string> |
34 | | #include <system_error> |
35 | | #include <type_traits> |
36 | | #include <utility> |
37 | | |
38 | | #include "common/compiler_util.h" // IWYU pragma: keep |
39 | | #include "common/status.h" |
40 | | #include "runtime/large_int_value.h" |
41 | | #include "runtime/primitive_type.h" |
42 | | #include "vec/common/int_exp.h" |
43 | | #include "vec/core/extended_types.h" |
44 | | #include "vec/core/wide_integer.h" |
45 | | #include "vec/data_types/data_type_decimal.h" |
46 | | #include "vec/data_types/number_traits.h" |
47 | | |
48 | | namespace doris { |
49 | | namespace vectorized { |
50 | | template <DecimalNativeTypeConcept T> |
51 | | struct Decimal; |
52 | | } // namespace vectorized |
53 | | |
54 | | // Utility functions for doing atoi/atof on non-null terminated strings. On micro benchmarks, |
55 | | // this is significantly faster than libc (atoi/strtol and atof/strtod). |
56 | | // |
57 | | // Strings with leading and trailing whitespaces are accepted. |
58 | | // Branching is heavily optimized for the non-whitespace successful case. |
59 | | // All the StringTo* functions first parse the input string assuming it has no leading whitespace. |
60 | | // If that first attempt was unsuccessful, these functions retry the parsing after removing |
61 | | // whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction. |
62 | | // |
63 | | // For overflows, we are following the mysql behavior, to cap values at the max/min value for that |
64 | | // data type. This is different from hive, which returns NULL for overflow slots for int types |
65 | | // and inf/-inf for float types. |
66 | | // |
67 | | // Things we tried that did not work: |
68 | | // - lookup table for converting character to digit |
69 | | // Improvements (TODO): |
70 | | // - Validate input using _sidd_compare_ranges |
71 | | // - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2] |
72 | | class StringParser { |
73 | | public: |
74 | | enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW }; |
75 | | |
76 | | template <typename T> |
77 | 122k | static T numeric_limits(bool negative) { |
78 | 122k | if constexpr (std::is_same_v<T, __int128>) { |
79 | 121k | return negative ? MIN_INT128 : MAX_INT128; |
80 | 121k | } else { |
81 | 121k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); |
82 | 121k | } |
83 | 122k | } _ZN5doris12StringParser14numeric_limitsIaEET_b Line | Count | Source | 77 | 115k | static T numeric_limits(bool negative) { | 78 | 115k | if constexpr (std::is_same_v<T, __int128>) { | 79 | 115k | return negative ? MIN_INT128 : MAX_INT128; | 80 | 115k | } else { | 81 | 115k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 115k | } | 83 | 115k | } |
_ZN5doris12StringParser14numeric_limitsIsEET_b Line | Count | Source | 77 | 1.97k | static T numeric_limits(bool negative) { | 78 | 1.97k | if constexpr (std::is_same_v<T, __int128>) { | 79 | 1.97k | return negative ? MIN_INT128 : MAX_INT128; | 80 | 1.97k | } else { | 81 | 1.97k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 1.97k | } | 83 | 1.97k | } |
_ZN5doris12StringParser14numeric_limitsIiEET_b Line | Count | Source | 77 | 1.85k | static T numeric_limits(bool negative) { | 78 | 1.85k | if constexpr (std::is_same_v<T, __int128>) { | 79 | 1.85k | return negative ? MIN_INT128 : MAX_INT128; | 80 | 1.85k | } else { | 81 | 1.85k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 1.85k | } | 83 | 1.85k | } |
_ZN5doris12StringParser14numeric_limitsIlEET_b Line | Count | Source | 77 | 2.10k | static T numeric_limits(bool negative) { | 78 | 2.10k | if constexpr (std::is_same_v<T, __int128>) { | 79 | 2.10k | return negative ? MIN_INT128 : MAX_INT128; | 80 | 2.10k | } else { | 81 | 2.10k | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 2.10k | } | 83 | 2.10k | } |
_ZN5doris12StringParser14numeric_limitsInEET_b Line | Count | Source | 77 | 793 | static T numeric_limits(bool negative) { | 78 | 793 | if constexpr (std::is_same_v<T, __int128>) { | 79 | 793 | return negative ? MIN_INT128 : MAX_INT128; | 80 | 793 | } else { | 81 | 793 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 793 | } | 83 | 793 | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIoEET_b _ZN5doris12StringParser14numeric_limitsImEET_b Line | Count | Source | 77 | 21 | static T numeric_limits(bool negative) { | 78 | 21 | if constexpr (std::is_same_v<T, __int128>) { | 79 | 21 | return negative ? MIN_INT128 : MAX_INT128; | 80 | 21 | } else { | 81 | 21 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 21 | } | 83 | 21 | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIjEET_b _ZN5doris12StringParser14numeric_limitsIhEET_b Line | Count | Source | 77 | 91 | static T numeric_limits(bool negative) { | 78 | 91 | if constexpr (std::is_same_v<T, __int128>) { | 79 | 91 | return negative ? MIN_INT128 : MAX_INT128; | 80 | 91 | } else { | 81 | 91 | return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max(); | 82 | 91 | } | 83 | 91 | } |
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsItEET_b |
84 | | |
85 | | template <typename T> |
86 | 121 | static T get_scale_multiplier(int scale) { |
87 | 121 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
88 | 121 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
89 | 121 | "You can only instantiate as int32_t, int64_t, __int128."); |
90 | 121 | if constexpr (std::is_same_v<T, int32_t>) { |
91 | 117 | return common::exp10_i32(scale); |
92 | 117 | } else if constexpr (std::is_same_v<T, int64_t>) { |
93 | 113 | return common::exp10_i64(scale); |
94 | 113 | } else if constexpr (std::is_same_v<T, __int128>) { |
95 | 0 | return common::exp10_i128(scale); |
96 | 0 | } else if constexpr (std::is_same_v<T, wide::Int256>) { |
97 | 0 | return common::exp10_i256(scale); |
98 | 0 | } |
99 | 121 | } _ZN5doris12StringParser20get_scale_multiplierIiEET_i Line | Count | Source | 86 | 4 | static T get_scale_multiplier(int scale) { | 87 | 4 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 88 | 4 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 89 | 4 | "You can only instantiate as int32_t, int64_t, __int128."); | 90 | 4 | if constexpr (std::is_same_v<T, int32_t>) { | 91 | 4 | return common::exp10_i32(scale); | 92 | 4 | } else if constexpr (std::is_same_v<T, int64_t>) { | 93 | 4 | return common::exp10_i64(scale); | 94 | 4 | } else if constexpr (std::is_same_v<T, __int128>) { | 95 | 4 | return common::exp10_i128(scale); | 96 | 4 | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 97 | 4 | return common::exp10_i256(scale); | 98 | 4 | } | 99 | 4 | } |
_ZN5doris12StringParser20get_scale_multiplierIlEET_i Line | Count | Source | 86 | 4 | static T get_scale_multiplier(int scale) { | 87 | 4 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 88 | 4 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 89 | 4 | "You can only instantiate as int32_t, int64_t, __int128."); | 90 | 4 | if constexpr (std::is_same_v<T, int32_t>) { | 91 | 4 | return common::exp10_i32(scale); | 92 | 4 | } else if constexpr (std::is_same_v<T, int64_t>) { | 93 | 4 | return common::exp10_i64(scale); | 94 | 4 | } else if constexpr (std::is_same_v<T, __int128>) { | 95 | 4 | return common::exp10_i128(scale); | 96 | 4 | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 97 | 4 | return common::exp10_i256(scale); | 98 | 4 | } | 99 | 4 | } |
_ZN5doris12StringParser20get_scale_multiplierInEET_i Line | Count | Source | 86 | 113 | static T get_scale_multiplier(int scale) { | 87 | 113 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 88 | 113 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 89 | 113 | "You can only instantiate as int32_t, int64_t, __int128."); | 90 | 113 | if constexpr (std::is_same_v<T, int32_t>) { | 91 | 113 | return common::exp10_i32(scale); | 92 | 113 | } else if constexpr (std::is_same_v<T, int64_t>) { | 93 | 113 | return common::exp10_i64(scale); | 94 | 113 | } else if constexpr (std::is_same_v<T, __int128>) { | 95 | 113 | return common::exp10_i128(scale); | 96 | 113 | } else if constexpr (std::is_same_v<T, wide::Int256>) { | 97 | 113 | return common::exp10_i256(scale); | 98 | 113 | } | 99 | 113 | } |
Unexecuted instantiation: _ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i |
100 | | |
101 | | // This is considerably faster than glibc's implementation (25x). |
102 | | // In the case of overflow, the max/min value for the data type will be returned. |
103 | | // Assumes s represents a decimal number. |
104 | | template <typename T> |
105 | 28.8k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { |
106 | 28.8k | T ans = string_to_int_internal<T>(s, len, result); |
107 | 28.8k | if (LIKELY(*result == PARSE_SUCCESS)) { |
108 | 3.07k | return ans; |
109 | 3.07k | } |
110 | | |
111 | 25.7k | int i = skip_leading_whitespace(s, len); |
112 | 25.7k | return string_to_int_internal<T>(s + i, len - i, result); |
113 | 28.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 26.3k | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 26.3k | T ans = string_to_int_internal<T>(s, len, result); | 107 | 26.3k | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 1.85k | return ans; | 109 | 1.85k | } | 110 | | | 111 | 24.4k | int i = skip_leading_whitespace(s, len); | 112 | 24.4k | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 26.3k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 450 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 450 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 450 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 52 | return ans; | 109 | 52 | } | 110 | | | 111 | 398 | int i = skip_leading_whitespace(s, len); | 112 | 398 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 450 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 471 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 471 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 471 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 73 | return ans; | 109 | 73 | } | 110 | | | 111 | 398 | int i = skip_leading_whitespace(s, len); | 112 | 398 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 471 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 662 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 662 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 662 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 258 | return ans; | 109 | 258 | } | 110 | | | 111 | 404 | int i = skip_leading_whitespace(s, len); | 112 | 404 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 662 | } |
_ZN5doris12StringParser13string_to_intInEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 787 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 787 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 787 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 781 | return ans; | 109 | 781 | } | 110 | | | 111 | 6 | int i = skip_leading_whitespace(s, len); | 112 | 6 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 787 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEEEET_PKcmPNS0_11ParseResultE Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIoEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intImEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 20 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 20 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 20 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 20 | return ans; | 109 | 20 | } | 110 | | | 111 | 0 | int i = skip_leading_whitespace(s, len); | 112 | 0 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 20 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjEET_PKcmPNS0_11ParseResultE _ZN5doris12StringParser13string_to_intIhEET_PKcmPNS0_11ParseResultE Line | Count | Source | 105 | 63 | static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) { | 106 | 63 | T ans = string_to_int_internal<T>(s, len, result); | 107 | 63 | if (LIKELY(*result == PARSE_SUCCESS)) { | 108 | 35 | return ans; | 109 | 35 | } | 110 | | | 111 | 28 | int i = skip_leading_whitespace(s, len); | 112 | 28 | return string_to_int_internal<T>(s + i, len - i, result); | 113 | 63 | } |
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intItEET_PKcmPNS0_11ParseResultE |
114 | | |
115 | | // This is considerably faster than glibc's implementation. |
116 | | // In the case of overflow, the max/min value for the data type will be returned. |
117 | | // Assumes s represents a decimal number. |
118 | | template <typename T> |
119 | 1.37k | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { |
120 | 1.37k | T ans = string_to_unsigned_int_internal<T>(s, len, result); |
121 | 1.37k | if (LIKELY(*result == PARSE_SUCCESS)) { |
122 | 84 | return ans; |
123 | 84 | } |
124 | | |
125 | 1.28k | int i = skip_leading_whitespace(s, len); |
126 | 1.28k | return string_to_unsigned_int_internal<T>(s + i, len - i, result); |
127 | 1.37k | } _ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 119 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 120 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 121 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 122 | 21 | return ans; | 123 | 21 | } | 124 | | | 125 | 322 | int i = skip_leading_whitespace(s, len); | 126 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 127 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE Line | Count | Source | 119 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 120 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 121 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 122 | 21 | return ans; | 123 | 21 | } | 124 | | | 125 | 322 | int i = skip_leading_whitespace(s, len); | 126 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 127 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 119 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 120 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 121 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 122 | 21 | return ans; | 123 | 21 | } | 124 | | | 125 | 322 | int i = skip_leading_whitespace(s, len); | 126 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 127 | 343 | } |
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE Line | Count | Source | 119 | 343 | static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) { | 120 | 343 | T ans = string_to_unsigned_int_internal<T>(s, len, result); | 121 | 343 | if (LIKELY(*result == PARSE_SUCCESS)) { | 122 | 21 | return ans; | 123 | 21 | } | 124 | | | 125 | 322 | int i = skip_leading_whitespace(s, len); | 126 | 322 | return string_to_unsigned_int_internal<T>(s + i, len - i, result); | 127 | 343 | } |
|
128 | | |
129 | | // Convert a string s representing a number in given base into a decimal number. |
130 | | template <typename T> |
131 | | static inline T string_to_int(const char* __restrict s, int64_t len, int base, |
132 | 27.8k | ParseResult* result) { |
133 | 27.8k | T ans = string_to_int_internal<T>(s, len, base, result); |
134 | 27.8k | if (LIKELY(*result == PARSE_SUCCESS)) { |
135 | 2.06k | return ans; |
136 | 2.06k | } |
137 | | |
138 | 25.7k | int i = skip_leading_whitespace(s, len); |
139 | 25.7k | return string_to_int_internal<T>(s + i, len - i, base, result); |
140 | 27.8k | } _ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 132 | 26.4k | ParseResult* result) { | 133 | 26.4k | T ans = string_to_int_internal<T>(s, len, base, result); | 134 | 26.4k | if (LIKELY(*result == PARSE_SUCCESS)) { | 135 | 1.91k | return ans; | 136 | 1.91k | } | 137 | | | 138 | 24.5k | int i = skip_leading_whitespace(s, len); | 139 | 24.5k | return string_to_int_internal<T>(s + i, len - i, base, result); | 140 | 26.4k | } |
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 132 | 490 | ParseResult* result) { | 133 | 490 | T ans = string_to_int_internal<T>(s, len, base, result); | 134 | 490 | if (LIKELY(*result == PARSE_SUCCESS)) { | 135 | 56 | return ans; | 136 | 56 | } | 137 | | | 138 | 434 | int i = skip_leading_whitespace(s, len); | 139 | 434 | return string_to_int_internal<T>(s + i, len - i, base, result); | 140 | 490 | } |
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 132 | 441 | ParseResult* result) { | 133 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 134 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 135 | 49 | return ans; | 136 | 49 | } | 137 | | | 138 | 392 | int i = skip_leading_whitespace(s, len); | 139 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 140 | 441 | } |
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 132 | 441 | ParseResult* result) { | 133 | 441 | T ans = string_to_int_internal<T>(s, len, base, result); | 134 | 441 | if (LIKELY(*result == PARSE_SUCCESS)) { | 135 | 49 | return ans; | 136 | 49 | } | 137 | | | 138 | 392 | int i = skip_leading_whitespace(s, len); | 139 | 392 | return string_to_int_internal<T>(s + i, len - i, base, result); | 140 | 441 | } |
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 132 | 1 | ParseResult* result) { | 133 | 1 | T ans = string_to_int_internal<T>(s, len, base, result); | 134 | 1 | if (LIKELY(*result == PARSE_SUCCESS)) { | 135 | 1 | return ans; | 136 | 1 | } | 137 | | | 138 | 0 | int i = skip_leading_whitespace(s, len); | 139 | 0 | return string_to_int_internal<T>(s + i, len - i, base, result); | 140 | 1 | } |
|
141 | | |
142 | | template <typename T> |
143 | 19.2k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { |
144 | 19.2k | return string_to_float_internal<T>(s, len, result); |
145 | 19.2k | } _ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE Line | Count | Source | 143 | 8.25k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 144 | 8.25k | return string_to_float_internal<T>(s, len, result); | 145 | 8.25k | } |
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE Line | Count | Source | 143 | 11.0k | static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) { | 144 | 11.0k | return string_to_float_internal<T>(s, len, result); | 145 | 11.0k | } |
|
146 | | |
147 | | // Parses a string for 'true' or 'false', case insensitive. |
148 | 322 | static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) { |
149 | 322 | bool ans = string_to_bool_internal(s, len, result); |
150 | 322 | if (LIKELY(*result == PARSE_SUCCESS)) { |
151 | 26 | return ans; |
152 | 26 | } |
153 | | |
154 | 296 | int i = skip_leading_whitespace(s, len); |
155 | 296 | return string_to_bool_internal(s + i, len - i, result); |
156 | 322 | } |
157 | | |
158 | | template <PrimitiveType P, typename T = PrimitiveTypeTraits<P>::CppType::NativeType, |
159 | | typename DecimalType = PrimitiveTypeTraits<P>::ColumnType::value_type> |
160 | | static inline T string_to_decimal(const char* __restrict s, int len, int type_precision, |
161 | | int type_scale, ParseResult* result); |
162 | | |
163 | | template <typename T> |
164 | | static Status split_string_to_map(const std::string& base, const T element_separator, |
165 | | const T key_value_separator, |
166 | | std::map<std::string, std::string>* result) { |
167 | | int key_pos = 0; |
168 | | int key_end; |
169 | | int val_pos; |
170 | | int val_end; |
171 | | |
172 | | while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) { |
173 | | if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) == |
174 | | std::string::npos) { |
175 | | break; |
176 | | } |
177 | | if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) { |
178 | | val_end = base.size(); |
179 | | } |
180 | | result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos), |
181 | | base.substr(val_pos, val_end - val_pos))); |
182 | | key_pos = val_end; |
183 | | if (key_pos != std::string::npos) { |
184 | | ++key_pos; |
185 | | } |
186 | | } |
187 | | |
188 | | return Status::OK(); |
189 | | } |
190 | | |
191 | | private: |
192 | | // This is considerably faster than glibc's implementation. |
193 | | // In the case of overflow, the max/min value for the data type will be returned. |
194 | | // Assumes s represents a decimal number. |
195 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
196 | | template <typename T> |
197 | | static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result); |
198 | | |
199 | | // This is considerably faster than glibc's implementation. |
200 | | // In the case of overflow, the max/min value for the data type will be returned. |
201 | | // Assumes s represents a decimal number. |
202 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
203 | | template <typename T> |
204 | | static inline T string_to_unsigned_int_internal(const char* __restrict s, int len, |
205 | | ParseResult* result); |
206 | | |
207 | | // Convert a string s representing a number in given base into a decimal number. |
208 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
209 | | template <typename T> |
210 | | static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base, |
211 | | ParseResult* result); |
212 | | |
213 | | // Converts an ascii string to an integer of type T assuming it cannot overflow |
214 | | // and the number is positive. |
215 | | // Leading whitespace is not allowed. Trailing whitespace will be skipped. |
216 | | template <typename T> |
217 | | static inline T string_to_int_no_overflow(const char* __restrict s, int len, |
218 | | ParseResult* result); |
219 | | |
220 | | // This is considerably faster than glibc's implementation (>100x why???) |
221 | | // No special case handling needs to be done for overflows, the floating point spec |
222 | | // already does it and will cap the values to -inf/inf |
223 | | // To avoid inaccurate conversions this function falls back to strtod for |
224 | | // scientific notation. |
225 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
226 | | // TODO: Investigate using intrinsics to speed up the slow strtod path. |
227 | | template <typename T> |
228 | | static inline T string_to_float_internal(const char* __restrict s, int len, |
229 | | ParseResult* result); |
230 | | |
231 | | // parses a string for 'true' or 'false', case insensitive |
232 | | // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed. |
233 | | static inline bool string_to_bool_internal(const char* __restrict s, int len, |
234 | | ParseResult* result); |
235 | | |
236 | | // Returns true if s only contains whitespace. |
237 | 25.4k | static inline bool is_all_whitespace(const char* __restrict s, int len) { |
238 | 100k | for (int i = 0; i < len; ++i) { |
239 | 75.4k | if (!LIKELY(is_whitespace(s[i]))) { |
240 | 618 | return false; |
241 | 618 | } |
242 | 75.4k | } |
243 | 24.8k | return true; |
244 | 25.4k | } |
245 | | |
246 | | // For strings like "3.0", "3.123", and "3.", can parse them as 3. |
247 | 339 | static inline bool is_float_suffix(const char* __restrict s, int len) { |
248 | 339 | return (s[0] == '.' && is_all_digit(s + 1, len - 1)); |
249 | 339 | } |
250 | | |
251 | 1 | static inline bool is_all_digit(const char* __restrict s, int len) { |
252 | 2 | for (int i = 0; i < len; ++i) { |
253 | 1 | if (!LIKELY(s[i] >= '0' && s[i] <= '9')) { |
254 | 0 | return false; |
255 | 0 | } |
256 | 1 | } |
257 | 1 | return true; |
258 | 1 | } |
259 | | |
260 | | // Returns the position of the first non-whitespace character in s. |
261 | 53.0k | static inline int skip_leading_whitespace(const char* __restrict s, int len) { |
262 | 53.0k | int i = 0; |
263 | 202k | while (i < len && is_whitespace(s[i])) { |
264 | 149k | ++i; |
265 | 149k | } |
266 | 53.0k | return i; |
267 | 53.0k | } |
268 | | |
269 | | // Our own definition of "isspace" that optimize on the ' ' branch. |
270 | 398k | static inline bool is_whitespace(const char& c) { |
271 | 398k | return LIKELY(c == ' ') || |
272 | 398k | UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'); |
273 | 398k | } |
274 | | |
275 | | }; // end of class StringParser |
276 | | |
277 | | template <typename T> |
278 | 54.5k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { |
279 | 54.5k | if (UNLIKELY(len <= 0)) { |
280 | 206 | *result = PARSE_FAILURE; |
281 | 206 | return 0; |
282 | 206 | } |
283 | | |
284 | 54.3k | typedef typename std::make_unsigned<T>::type UnsignedT; |
285 | 54.3k | UnsignedT val = 0; |
286 | 54.3k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
287 | 54.3k | bool negative = false; |
288 | 54.3k | int i = 0; |
289 | 54.3k | switch (*s) { |
290 | 14.3k | case '-': |
291 | 14.3k | negative = true; |
292 | 14.3k | max_val += 1; |
293 | 14.3k | [[fallthrough]]; |
294 | 14.6k | case '+': |
295 | 14.6k | ++i; |
296 | | // only one '+'/'-' char, so could return failure directly |
297 | 14.6k | if (UNLIKELY(len == 1)) { |
298 | 0 | *result = PARSE_FAILURE; |
299 | 0 | return 0; |
300 | 0 | } |
301 | 54.3k | } |
302 | | |
303 | | // This is the fast path where the string cannot overflow. |
304 | 54.3k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { |
305 | 3.42k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); |
306 | 3.42k | return static_cast<T>(negative ? -val : val); |
307 | 3.42k | } |
308 | | |
309 | 50.8k | const T max_div_10 = max_val / 10; |
310 | 50.8k | const T max_mod_10 = max_val % 10; |
311 | | |
312 | 50.8k | int first = i; |
313 | 114k | for (; i < len; ++i) { |
314 | 113k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
315 | 78.2k | T digit = s[i] - '0'; |
316 | | // This is a tricky check to see if adding this digit will cause an overflow. |
317 | 78.2k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
318 | 14.8k | *result = PARSE_OVERFLOW; |
319 | 14.8k | return negative ? -max_val : max_val; |
320 | 14.8k | } |
321 | 63.4k | val = val * 10 + digit; |
322 | 63.4k | } else { |
323 | 35.5k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && |
324 | 35.5k | !is_float_suffix(s + i, len - i))))) { |
325 | | // Reject the string because either the first char was not a digit, |
326 | | // or the remaining chars are not all whitespace |
327 | 24.0k | *result = PARSE_FAILURE; |
328 | 24.0k | return 0; |
329 | 24.0k | } |
330 | | // Returning here is slightly faster than breaking the loop. |
331 | 11.4k | *result = PARSE_SUCCESS; |
332 | 11.4k | return static_cast<T>(negative ? -val : val); |
333 | 35.5k | } |
334 | 113k | } |
335 | 504 | *result = PARSE_SUCCESS; |
336 | 504 | return static_cast<T>(negative ? -val : val); |
337 | 50.8k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 50.8k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 50.8k | if (UNLIKELY(len <= 0)) { | 280 | 206 | *result = PARSE_FAILURE; | 281 | 206 | return 0; | 282 | 206 | } | 283 | | | 284 | 50.6k | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 50.6k | UnsignedT val = 0; | 286 | 50.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 50.6k | bool negative = false; | 288 | 50.6k | int i = 0; | 289 | 50.6k | switch (*s) { | 290 | 13.8k | case '-': | 291 | 13.8k | negative = true; | 292 | 13.8k | max_val += 1; | 293 | 13.8k | [[fallthrough]]; | 294 | 13.9k | case '+': | 295 | 13.9k | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 13.9k | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 50.6k | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 50.6k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 1.43k | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 1.43k | return static_cast<T>(negative ? -val : val); | 307 | 1.43k | } | 308 | | | 309 | 49.1k | const T max_div_10 = max_val / 10; | 310 | 49.1k | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 49.1k | int first = i; | 313 | 104k | for (; i < len; ++i) { | 314 | 103k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 69.4k | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 69.4k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 14.5k | *result = PARSE_OVERFLOW; | 319 | 14.5k | return negative ? -max_val : max_val; | 320 | 14.5k | } | 321 | 54.9k | val = val * 10 + digit; | 322 | 54.9k | } else { | 323 | 34.2k | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 34.2k | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 23.2k | *result = PARSE_FAILURE; | 328 | 23.2k | return 0; | 329 | 23.2k | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 11.0k | *result = PARSE_SUCCESS; | 332 | 11.0k | return static_cast<T>(negative ? -val : val); | 333 | 34.2k | } | 334 | 103k | } | 335 | 433 | *result = PARSE_SUCCESS; | 336 | 433 | return static_cast<T>(negative ? -val : val); | 337 | 49.1k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 848 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 848 | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 848 | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 848 | UnsignedT val = 0; | 286 | 848 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 848 | bool negative = false; | 288 | 848 | int i = 0; | 289 | 848 | switch (*s) { | 290 | 209 | case '-': | 291 | 209 | negative = true; | 292 | 209 | max_val += 1; | 293 | 209 | [[fallthrough]]; | 294 | 258 | case '+': | 295 | 258 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 258 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 848 | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 848 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 112 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 112 | return static_cast<T>(negative ? -val : val); | 307 | 112 | } | 308 | | | 309 | 736 | const T max_div_10 = max_val / 10; | 310 | 736 | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 736 | int first = i; | 313 | 2.19k | for (; i < len; ++i) { | 314 | 2.16k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 1.58k | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 1.58k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 124 | *result = PARSE_OVERFLOW; | 319 | 124 | return negative ? -max_val : max_val; | 320 | 124 | } | 321 | 1.45k | val = val * 10 + digit; | 322 | 1.45k | } else { | 323 | 588 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 588 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 378 | *result = PARSE_FAILURE; | 328 | 378 | return 0; | 329 | 378 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 210 | *result = PARSE_SUCCESS; | 332 | 210 | return static_cast<T>(negative ? -val : val); | 333 | 588 | } | 334 | 2.16k | } | 335 | 24 | *result = PARSE_SUCCESS; | 336 | 24 | return static_cast<T>(negative ? -val : val); | 337 | 736 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 869 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 869 | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 869 | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 869 | UnsignedT val = 0; | 286 | 869 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 869 | bool negative = false; | 288 | 869 | int i = 0; | 289 | 869 | switch (*s) { | 290 | 160 | case '-': | 291 | 160 | negative = true; | 292 | 160 | max_val += 1; | 293 | 160 | [[fallthrough]]; | 294 | 258 | case '+': | 295 | 258 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 258 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 869 | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 869 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 392 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 392 | return static_cast<T>(negative ? -val : val); | 307 | 392 | } | 308 | | | 309 | 477 | const T max_div_10 = max_val / 10; | 310 | 477 | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 477 | int first = i; | 313 | 2.93k | for (; i < len; ++i) { | 314 | 2.92k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 2.58k | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 2.58k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 124 | *result = PARSE_OVERFLOW; | 319 | 124 | return negative ? -max_val : max_val; | 320 | 124 | } | 321 | 2.46k | val = val * 10 + digit; | 322 | 2.46k | } else { | 323 | 336 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 336 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 210 | *result = PARSE_FAILURE; | 328 | 210 | return 0; | 329 | 210 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 126 | *result = PARSE_SUCCESS; | 332 | 126 | return static_cast<T>(negative ? -val : val); | 333 | 336 | } | 334 | 2.92k | } | 335 | 17 | *result = PARSE_SUCCESS; | 336 | 17 | return static_cast<T>(negative ? -val : val); | 337 | 477 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 1.06k | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 1.06k | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 1.06k | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 1.06k | UnsignedT val = 0; | 286 | 1.06k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 1.06k | bool negative = false; | 288 | 1.06k | int i = 0; | 289 | 1.06k | switch (*s) { | 290 | 207 | case '-': | 291 | 207 | negative = true; | 292 | 207 | max_val += 1; | 293 | 207 | [[fallthrough]]; | 294 | 256 | case '+': | 295 | 256 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 256 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 1.06k | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 1.06k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 633 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 633 | return static_cast<T>(negative ? -val : val); | 307 | 633 | } | 308 | | | 309 | 433 | const T max_div_10 = max_val / 10; | 310 | 433 | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 433 | int first = i; | 313 | 4.66k | for (; i < len; ++i) { | 314 | 4.63k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 4.35k | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 4.35k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 118 | *result = PARSE_OVERFLOW; | 319 | 118 | return negative ? -max_val : max_val; | 320 | 118 | } | 321 | 4.23k | val = val * 10 + digit; | 322 | 4.23k | } else { | 323 | 288 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 288 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 204 | *result = PARSE_FAILURE; | 328 | 204 | return 0; | 329 | 204 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 84 | *result = PARSE_SUCCESS; | 332 | 84 | return static_cast<T>(negative ? -val : val); | 333 | 288 | } | 334 | 4.63k | } | 335 | 27 | *result = PARSE_SUCCESS; | 336 | 27 | return static_cast<T>(negative ? -val : val); | 337 | 433 | } |
_ZN5doris12StringParser22string_to_int_internalInEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 793 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 793 | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 793 | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 793 | UnsignedT val = 0; | 286 | 793 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 793 | bool negative = false; | 288 | 793 | int i = 0; | 289 | 793 | switch (*s) { | 290 | 0 | case '-': | 291 | 0 | negative = true; | 292 | 0 | max_val += 1; | 293 | 0 | [[fallthrough]]; | 294 | 0 | case '+': | 295 | 0 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 0 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 793 | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 793 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 778 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 778 | return static_cast<T>(negative ? -val : val); | 307 | 778 | } | 308 | | | 309 | 15 | const T max_div_10 = max_val / 10; | 310 | 15 | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 15 | int first = i; | 313 | 360 | for (; i < len; ++i) { | 314 | 357 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 351 | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 351 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 6 | *result = PARSE_OVERFLOW; | 319 | 6 | return negative ? -max_val : max_val; | 320 | 6 | } | 321 | 345 | val = val * 10 + digit; | 322 | 345 | } else { | 323 | 6 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 6 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 6 | *result = PARSE_FAILURE; | 328 | 6 | return 0; | 329 | 6 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 0 | *result = PARSE_SUCCESS; | 332 | 0 | return static_cast<T>(negative ? -val : val); | 333 | 6 | } | 334 | 357 | } | 335 | 3 | *result = PARSE_SUCCESS; | 336 | 3 | return static_cast<T>(negative ? -val : val); | 337 | 15 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIoEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 20 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 20 | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 20 | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 20 | UnsignedT val = 0; | 286 | 20 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 20 | bool negative = false; | 288 | 20 | int i = 0; | 289 | 20 | switch (*s) { | 290 | 0 | case '-': | 291 | 0 | negative = true; | 292 | 0 | max_val += 1; | 293 | 0 | [[fallthrough]]; | 294 | 0 | case '+': | 295 | 0 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 0 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 20 | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 20 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 20 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 20 | return static_cast<T>(negative ? -val : val); | 307 | 20 | } | 308 | | | 309 | 0 | const T max_div_10 = max_val / 10; | 310 | 0 | const T max_mod_10 = max_val % 10; | 311 | |
| 312 | 0 | int first = i; | 313 | 0 | for (; i < len; ++i) { | 314 | 0 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 0 | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 0 | *result = PARSE_OVERFLOW; | 319 | 0 | return negative ? -max_val : max_val; | 320 | 0 | } | 321 | 0 | val = val * 10 + digit; | 322 | 0 | } else { | 323 | 0 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 0 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 0 | *result = PARSE_FAILURE; | 328 | 0 | return 0; | 329 | 0 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 0 | *result = PARSE_SUCCESS; | 332 | 0 | return static_cast<T>(negative ? -val : val); | 333 | 0 | } | 334 | 0 | } | 335 | 0 | *result = PARSE_SUCCESS; | 336 | 0 | return static_cast<T>(negative ? -val : val); | 337 | 0 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjEET_PKciPNS0_11ParseResultE _ZN5doris12StringParser22string_to_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 278 | 91 | T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) { | 279 | 91 | if (UNLIKELY(len <= 0)) { | 280 | 0 | *result = PARSE_FAILURE; | 281 | 0 | return 0; | 282 | 0 | } | 283 | | | 284 | 91 | typedef typename std::make_unsigned<T>::type UnsignedT; | 285 | 91 | UnsignedT val = 0; | 286 | 91 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 287 | 91 | bool negative = false; | 288 | 91 | int i = 0; | 289 | 91 | switch (*s) { | 290 | 8 | case '-': | 291 | 8 | negative = true; | 292 | 8 | max_val += 1; | 293 | 8 | [[fallthrough]]; | 294 | 8 | case '+': | 295 | 8 | ++i; | 296 | | // only one '+'/'-' char, so could return failure directly | 297 | 8 | if (UNLIKELY(len == 1)) { | 298 | 0 | *result = PARSE_FAILURE; | 299 | 0 | return 0; | 300 | 0 | } | 301 | 91 | } | 302 | | | 303 | | // This is the fast path where the string cannot overflow. | 304 | 91 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) { | 305 | 53 | val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result); | 306 | 53 | return static_cast<T>(negative ? -val : val); | 307 | 53 | } | 308 | | | 309 | 38 | const T max_div_10 = max_val / 10; | 310 | 38 | const T max_mod_10 = max_val % 10; | 311 | | | 312 | 38 | int first = i; | 313 | 38 | for (; i < len; ++i) { | 314 | 38 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 315 | 0 | T digit = s[i] - '0'; | 316 | | // This is a tricky check to see if adding this digit will cause an overflow. | 317 | 0 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 318 | 0 | *result = PARSE_OVERFLOW; | 319 | 0 | return negative ? -max_val : max_val; | 320 | 0 | } | 321 | 0 | val = val * 10 + digit; | 322 | 38 | } else { | 323 | 38 | if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) && | 324 | 38 | !is_float_suffix(s + i, len - i))))) { | 325 | | // Reject the string because either the first char was not a digit, | 326 | | // or the remaining chars are not all whitespace | 327 | 38 | *result = PARSE_FAILURE; | 328 | 38 | return 0; | 329 | 38 | } | 330 | | // Returning here is slightly faster than breaking the loop. | 331 | 0 | *result = PARSE_SUCCESS; | 332 | 0 | return static_cast<T>(negative ? -val : val); | 333 | 38 | } | 334 | 38 | } | 335 | 0 | *result = PARSE_SUCCESS; | 336 | 0 | return static_cast<T>(negative ? -val : val); | 337 | 38 | } |
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalItEET_PKciPNS0_11ParseResultE |
338 | | |
339 | | template <typename T> |
340 | | T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len, |
341 | 2.66k | ParseResult* result) { |
342 | 2.66k | if (UNLIKELY(len <= 0)) { |
343 | 0 | *result = PARSE_FAILURE; |
344 | 0 | return 0; |
345 | 0 | } |
346 | | |
347 | 2.66k | T val = 0; |
348 | 2.66k | T max_val = std::numeric_limits<T>::max(); |
349 | 2.66k | int i = 0; |
350 | | |
351 | 2.66k | typedef typename std::make_signed<T>::type signedT; |
352 | | // This is the fast path where the string cannot overflow. |
353 | 2.66k | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { |
354 | 879 | val = string_to_int_no_overflow<T>(s + i, len - i, result); |
355 | 879 | return val; |
356 | 879 | } |
357 | | |
358 | 1.78k | const T max_div_10 = max_val / 10; |
359 | 1.78k | const T max_mod_10 = max_val % 10; |
360 | | |
361 | 1.78k | int first = i; |
362 | 6.54k | for (; i < len; ++i) { |
363 | 6.49k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
364 | 4.99k | T digit = s[i] - '0'; |
365 | | // This is a tricky check to see if adding this digit will cause an overflow. |
366 | 4.99k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { |
367 | 224 | *result = PARSE_OVERFLOW; |
368 | 224 | return max_val; |
369 | 224 | } |
370 | 4.76k | val = val * 10 + digit; |
371 | 4.76k | } else { |
372 | 1.50k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
373 | | // Reject the string because either the first char was not a digit, |
374 | | // or the remaining chars are not all whitespace |
375 | 1.13k | *result = PARSE_FAILURE; |
376 | 1.13k | return 0; |
377 | 1.13k | } |
378 | | // Returning here is slightly faster than breaking the loop. |
379 | 378 | *result = PARSE_SUCCESS; |
380 | 378 | return val; |
381 | 1.50k | } |
382 | 6.49k | } |
383 | 49 | *result = PARSE_SUCCESS; |
384 | 49 | return val; |
385 | 1.78k | } _ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 341 | 665 | ParseResult* result) { | 342 | 665 | if (UNLIKELY(len <= 0)) { | 343 | 0 | *result = PARSE_FAILURE; | 344 | 0 | return 0; | 345 | 0 | } | 346 | | | 347 | 665 | T val = 0; | 348 | 665 | T max_val = std::numeric_limits<T>::max(); | 349 | 665 | int i = 0; | 350 | | | 351 | 665 | typedef typename std::make_signed<T>::type signedT; | 352 | | // This is the fast path where the string cannot overflow. | 353 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 354 | 16 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 355 | 16 | return val; | 356 | 16 | } | 357 | | | 358 | 649 | const T max_div_10 = max_val / 10; | 359 | 649 | const T max_mod_10 = max_val % 10; | 360 | | | 361 | 649 | int first = i; | 362 | 1.20k | for (; i < len; ++i) { | 363 | 1.18k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 364 | 609 | T digit = s[i] - '0'; | 365 | | // This is a tricky check to see if adding this digit will cause an overflow. | 366 | 609 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 367 | 56 | *result = PARSE_OVERFLOW; | 368 | 56 | return max_val; | 369 | 56 | } | 370 | 553 | val = val * 10 + digit; | 371 | 572 | } else { | 372 | 572 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 373 | | // Reject the string because either the first char was not a digit, | 374 | | // or the remaining chars are not all whitespace | 375 | 446 | *result = PARSE_FAILURE; | 376 | 446 | return 0; | 377 | 446 | } | 378 | | // Returning here is slightly faster than breaking the loop. | 379 | 126 | *result = PARSE_SUCCESS; | 380 | 126 | return val; | 381 | 572 | } | 382 | 1.18k | } | 383 | 21 | *result = PARSE_SUCCESS; | 384 | 21 | return val; | 385 | 649 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE Line | Count | Source | 341 | 665 | ParseResult* result) { | 342 | 665 | if (UNLIKELY(len <= 0)) { | 343 | 0 | *result = PARSE_FAILURE; | 344 | 0 | return 0; | 345 | 0 | } | 346 | | | 347 | 665 | T val = 0; | 348 | 665 | T max_val = std::numeric_limits<T>::max(); | 349 | 665 | int i = 0; | 350 | | | 351 | 665 | typedef typename std::make_signed<T>::type signedT; | 352 | | // This is the fast path where the string cannot overflow. | 353 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 354 | 31 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 355 | 31 | return val; | 356 | 31 | } | 357 | | | 358 | 634 | const T max_div_10 = max_val / 10; | 359 | 634 | const T max_mod_10 = max_val % 10; | 360 | | | 361 | 634 | int first = i; | 362 | 1.47k | for (; i < len; ++i) { | 363 | 1.46k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 364 | 896 | T digit = s[i] - '0'; | 365 | | // This is a tricky check to see if adding this digit will cause an overflow. | 366 | 896 | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 367 | 56 | *result = PARSE_OVERFLOW; | 368 | 56 | return max_val; | 369 | 56 | } | 370 | 840 | val = val * 10 + digit; | 371 | 840 | } else { | 372 | 564 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 373 | | // Reject the string because either the first char was not a digit, | 374 | | // or the remaining chars are not all whitespace | 375 | 438 | *result = PARSE_FAILURE; | 376 | 438 | return 0; | 377 | 438 | } | 378 | | // Returning here is slightly faster than breaking the loop. | 379 | 126 | *result = PARSE_SUCCESS; | 380 | 126 | return val; | 381 | 564 | } | 382 | 1.46k | } | 383 | 14 | *result = PARSE_SUCCESS; | 384 | 14 | return val; | 385 | 634 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 341 | 665 | ParseResult* result) { | 342 | 665 | if (UNLIKELY(len <= 0)) { | 343 | 0 | *result = PARSE_FAILURE; | 344 | 0 | return 0; | 345 | 0 | } | 346 | | | 347 | 665 | T val = 0; | 348 | 665 | T max_val = std::numeric_limits<T>::max(); | 349 | 665 | int i = 0; | 350 | | | 351 | 665 | typedef typename std::make_signed<T>::type signedT; | 352 | | // This is the fast path where the string cannot overflow. | 353 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 354 | 392 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 355 | 392 | return val; | 356 | 392 | } | 357 | | | 358 | 273 | const T max_div_10 = max_val / 10; | 359 | 273 | const T max_mod_10 = max_val % 10; | 360 | | | 361 | 273 | int first = i; | 362 | 1.60k | for (; i < len; ++i) { | 363 | 1.59k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 364 | 1.38k | T digit = s[i] - '0'; | 365 | | // This is a tricky check to see if adding this digit will cause an overflow. | 366 | 1.38k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 367 | 56 | *result = PARSE_OVERFLOW; | 368 | 56 | return max_val; | 369 | 56 | } | 370 | 1.33k | val = val * 10 + digit; | 371 | 1.33k | } else { | 372 | 210 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 373 | | // Reject the string because either the first char was not a digit, | 374 | | // or the remaining chars are not all whitespace | 375 | 126 | *result = PARSE_FAILURE; | 376 | 126 | return 0; | 377 | 126 | } | 378 | | // Returning here is slightly faster than breaking the loop. | 379 | 84 | *result = PARSE_SUCCESS; | 380 | 84 | return val; | 381 | 210 | } | 382 | 1.59k | } | 383 | 7 | *result = PARSE_SUCCESS; | 384 | 7 | return val; | 385 | 273 | } |
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE Line | Count | Source | 341 | 665 | ParseResult* result) { | 342 | 665 | if (UNLIKELY(len <= 0)) { | 343 | 0 | *result = PARSE_FAILURE; | 344 | 0 | return 0; | 345 | 0 | } | 346 | | | 347 | 665 | T val = 0; | 348 | 665 | T max_val = std::numeric_limits<T>::max(); | 349 | 665 | int i = 0; | 350 | | | 351 | 665 | typedef typename std::make_signed<T>::type signedT; | 352 | | // This is the fast path where the string cannot overflow. | 353 | 665 | if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) { | 354 | 440 | val = string_to_int_no_overflow<T>(s + i, len - i, result); | 355 | 440 | return val; | 356 | 440 | } | 357 | | | 358 | 225 | const T max_div_10 = max_val / 10; | 359 | 225 | const T max_mod_10 = max_val % 10; | 360 | | | 361 | 225 | int first = i; | 362 | 2.26k | for (; i < len; ++i) { | 363 | 2.26k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 364 | 2.10k | T digit = s[i] - '0'; | 365 | | // This is a tricky check to see if adding this digit will cause an overflow. | 366 | 2.10k | if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) { | 367 | 56 | *result = PARSE_OVERFLOW; | 368 | 56 | return max_val; | 369 | 56 | } | 370 | 2.04k | val = val * 10 + digit; | 371 | 2.04k | } else { | 372 | 162 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 373 | | // Reject the string because either the first char was not a digit, | 374 | | // or the remaining chars are not all whitespace | 375 | 120 | *result = PARSE_FAILURE; | 376 | 120 | return 0; | 377 | 120 | } | 378 | | // Returning here is slightly faster than breaking the loop. | 379 | 42 | *result = PARSE_SUCCESS; | 380 | 42 | return val; | 381 | 162 | } | 382 | 2.26k | } | 383 | 7 | *result = PARSE_SUCCESS; | 384 | 7 | return val; | 385 | 225 | } |
|
386 | | |
387 | | template <typename T> |
388 | | T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base, |
389 | 53.6k | ParseResult* result) { |
390 | 53.6k | typedef typename std::make_unsigned<T>::type UnsignedT; |
391 | 53.6k | UnsignedT val = 0; |
392 | 53.6k | UnsignedT max_val = StringParser::numeric_limits<T>(false); |
393 | 53.6k | bool negative = false; |
394 | 53.6k | if (UNLIKELY(len <= 0)) { |
395 | 0 | *result = PARSE_FAILURE; |
396 | 0 | return 0; |
397 | 0 | } |
398 | 53.6k | int i = 0; |
399 | 53.6k | switch (*s) { |
400 | 14.3k | case '-': |
401 | 14.3k | negative = true; |
402 | 14.3k | max_val = StringParser::numeric_limits<T>(false) + 1; |
403 | 14.3k | [[fallthrough]]; |
404 | 14.6k | case '+': |
405 | 14.6k | i = 1; |
406 | 53.6k | } |
407 | | |
408 | 53.6k | const T max_div_base = max_val / base; |
409 | 53.6k | const T max_mod_base = max_val % base; |
410 | | |
411 | 53.6k | int first = i; |
412 | 120k | for (; i < len; ++i) { |
413 | 118k | T digit; |
414 | 118k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
415 | 81.6k | digit = s[i] - '0'; |
416 | 81.6k | } else if (s[i] >= 'a' && s[i] <= 'z') { |
417 | 639 | digit = (s[i] - 'a' + 10); |
418 | 36.4k | } else if (s[i] >= 'A' && s[i] <= 'Z') { |
419 | 98 | digit = (s[i] - 'A' + 10); |
420 | 36.3k | } else { |
421 | 36.3k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { |
422 | | // Reject the string because either the first char was not an alpha/digit, |
423 | | // or the remaining chars are not all whitespace |
424 | 24.0k | *result = PARSE_FAILURE; |
425 | 24.0k | return 0; |
426 | 24.0k | } |
427 | | // skip trailing whitespace. |
428 | 12.2k | break; |
429 | 36.3k | } |
430 | | |
431 | | // Bail, if we encounter a digit that is not available in base. |
432 | 82.4k | if (digit >= base) { |
433 | 392 | break; |
434 | 392 | } |
435 | | |
436 | | // This is a tricky check to see if adding this digit will cause an overflow. |
437 | 82.0k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { |
438 | 14.8k | *result = PARSE_OVERFLOW; |
439 | 14.8k | return static_cast<T>(negative ? -max_val : max_val); |
440 | 14.8k | } |
441 | 67.2k | val = val * base + digit; |
442 | 67.2k | } |
443 | 14.7k | *result = PARSE_SUCCESS; |
444 | 14.7k | return static_cast<T>(negative ? -val : val); |
445 | 53.6k | } _ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE Line | Count | Source | 389 | 51.0k | ParseResult* result) { | 390 | 51.0k | typedef typename std::make_unsigned<T>::type UnsignedT; | 391 | 51.0k | UnsignedT val = 0; | 392 | 51.0k | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 393 | 51.0k | bool negative = false; | 394 | 51.0k | if (UNLIKELY(len <= 0)) { | 395 | 0 | *result = PARSE_FAILURE; | 396 | 0 | return 0; | 397 | 0 | } | 398 | 51.0k | int i = 0; | 399 | 51.0k | switch (*s) { | 400 | 13.7k | case '-': | 401 | 13.7k | negative = true; | 402 | 13.7k | max_val = StringParser::numeric_limits<T>(false) + 1; | 403 | 13.7k | [[fallthrough]]; | 404 | 13.8k | case '+': | 405 | 13.8k | i = 1; | 406 | 51.0k | } | 407 | | | 408 | 51.0k | const T max_div_base = max_val / base; | 409 | 51.0k | const T max_mod_base = max_val % base; | 410 | | | 411 | 51.0k | int first = i; | 412 | 108k | for (; i < len; ++i) { | 413 | 107k | T digit; | 414 | 107k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 415 | 72.1k | digit = s[i] - '0'; | 416 | 72.1k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 417 | 539 | digit = (s[i] - 'a' + 10); | 418 | 34.3k | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 419 | 98 | digit = (s[i] - 'A' + 10); | 420 | 34.2k | } else { | 421 | 34.2k | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 422 | | // Reject the string because either the first char was not an alpha/digit, | 423 | | // or the remaining chars are not all whitespace | 424 | 22.8k | *result = PARSE_FAILURE; | 425 | 22.8k | return 0; | 426 | 22.8k | } | 427 | | // skip trailing whitespace. | 428 | 11.3k | break; | 429 | 34.2k | } | 430 | | | 431 | | // Bail, if we encounter a digit that is not available in base. | 432 | 72.7k | if (digit >= base) { | 433 | 392 | break; | 434 | 392 | } | 435 | | | 436 | | // This is a tricky check to see if adding this digit will cause an overflow. | 437 | 72.4k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 438 | 14.5k | *result = PARSE_OVERFLOW; | 439 | 14.5k | return static_cast<T>(negative ? -max_val : max_val); | 440 | 14.5k | } | 441 | 57.8k | val = val * base + digit; | 442 | 57.8k | } | 443 | 13.6k | *result = PARSE_SUCCESS; | 444 | 13.6k | return static_cast<T>(negative ? -val : val); | 445 | 51.0k | } |
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE Line | Count | Source | 389 | 924 | ParseResult* result) { | 390 | 924 | typedef typename std::make_unsigned<T>::type UnsignedT; | 391 | 924 | UnsignedT val = 0; | 392 | 924 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 393 | 924 | bool negative = false; | 394 | 924 | if (UNLIKELY(len <= 0)) { | 395 | 0 | *result = PARSE_FAILURE; | 396 | 0 | return 0; | 397 | 0 | } | 398 | 924 | int i = 0; | 399 | 924 | switch (*s) { | 400 | 203 | case '-': | 401 | 203 | negative = true; | 402 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 403 | 203 | [[fallthrough]]; | 404 | 252 | case '+': | 405 | 252 | i = 1; | 406 | 924 | } | 407 | | | 408 | 924 | const T max_div_base = max_val / base; | 409 | 924 | const T max_mod_base = max_val % base; | 410 | | | 411 | 924 | int first = i; | 412 | 2.59k | for (; i < len; ++i) { | 413 | 2.54k | T digit; | 414 | 2.54k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 415 | 1.68k | digit = s[i] - '0'; | 416 | 1.68k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 417 | 98 | digit = (s[i] - 'a' + 10); | 418 | 756 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 419 | 0 | digit = (s[i] - 'A' + 10); | 420 | 756 | } else { | 421 | 756 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 422 | | // Reject the string because either the first char was not an alpha/digit, | 423 | | // or the remaining chars are not all whitespace | 424 | 420 | *result = PARSE_FAILURE; | 425 | 420 | return 0; | 426 | 420 | } | 427 | | // skip trailing whitespace. | 428 | 336 | break; | 429 | 756 | } | 430 | | | 431 | | // Bail, if we encounter a digit that is not available in base. | 432 | 1.78k | if (digit >= base) { | 433 | 0 | break; | 434 | 0 | } | 435 | | | 436 | | // This is a tricky check to see if adding this digit will cause an overflow. | 437 | 1.78k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 438 | 112 | *result = PARSE_OVERFLOW; | 439 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 440 | 112 | } | 441 | 1.67k | val = val * base + digit; | 442 | 1.67k | } | 443 | 392 | *result = PARSE_SUCCESS; | 444 | 392 | return static_cast<T>(negative ? -val : val); | 445 | 924 | } |
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE Line | Count | Source | 389 | 833 | ParseResult* result) { | 390 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 391 | 833 | UnsignedT val = 0; | 392 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 393 | 833 | bool negative = false; | 394 | 833 | if (UNLIKELY(len <= 0)) { | 395 | 0 | *result = PARSE_FAILURE; | 396 | 0 | return 0; | 397 | 0 | } | 398 | 833 | int i = 0; | 399 | 833 | switch (*s) { | 400 | 154 | case '-': | 401 | 154 | negative = true; | 402 | 154 | max_val = StringParser::numeric_limits<T>(false) + 1; | 403 | 154 | [[fallthrough]]; | 404 | 252 | case '+': | 405 | 252 | i = 1; | 406 | 833 | } | 407 | | | 408 | 833 | const T max_div_base = max_val / base; | 409 | 833 | const T max_mod_base = max_val % base; | 410 | | | 411 | 833 | int first = i; | 412 | 3.55k | for (; i < len; ++i) { | 413 | 3.50k | T digit; | 414 | 3.50k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 415 | 2.83k | digit = s[i] - '0'; | 416 | 2.83k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 417 | 0 | digit = (s[i] - 'a' + 10); | 418 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 419 | 0 | digit = (s[i] - 'A' + 10); | 420 | 672 | } else { | 421 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 422 | | // Reject the string because either the first char was not an alpha/digit, | 423 | | // or the remaining chars are not all whitespace | 424 | 378 | *result = PARSE_FAILURE; | 425 | 378 | return 0; | 426 | 378 | } | 427 | | // skip trailing whitespace. | 428 | 294 | break; | 429 | 672 | } | 430 | | | 431 | | // Bail, if we encounter a digit that is not available in base. | 432 | 2.83k | if (digit >= base) { | 433 | 0 | break; | 434 | 0 | } | 435 | | | 436 | | // This is a tricky check to see if adding this digit will cause an overflow. | 437 | 2.83k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 438 | 112 | *result = PARSE_OVERFLOW; | 439 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 440 | 112 | } | 441 | 2.72k | val = val * base + digit; | 442 | 2.72k | } | 443 | 343 | *result = PARSE_SUCCESS; | 444 | 343 | return static_cast<T>(negative ? -val : val); | 445 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE Line | Count | Source | 389 | 833 | ParseResult* result) { | 390 | 833 | typedef typename std::make_unsigned<T>::type UnsignedT; | 391 | 833 | UnsignedT val = 0; | 392 | 833 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 393 | 833 | bool negative = false; | 394 | 833 | if (UNLIKELY(len <= 0)) { | 395 | 0 | *result = PARSE_FAILURE; | 396 | 0 | return 0; | 397 | 0 | } | 398 | 833 | int i = 0; | 399 | 833 | switch (*s) { | 400 | 203 | case '-': | 401 | 203 | negative = true; | 402 | 203 | max_val = StringParser::numeric_limits<T>(false) + 1; | 403 | 203 | [[fallthrough]]; | 404 | 252 | case '+': | 405 | 252 | i = 1; | 406 | 833 | } | 407 | | | 408 | 833 | const T max_div_base = max_val / base; | 409 | 833 | const T max_mod_base = max_val % base; | 410 | | | 411 | 833 | int first = i; | 412 | 5.74k | for (; i < len; ++i) { | 413 | 5.69k | T digit; | 414 | 5.69k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 415 | 5.01k | digit = s[i] - '0'; | 416 | 5.01k | } else if (s[i] >= 'a' && s[i] <= 'z') { | 417 | 0 | digit = (s[i] - 'a' + 10); | 418 | 672 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 419 | 0 | digit = (s[i] - 'A' + 10); | 420 | 672 | } else { | 421 | 672 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 422 | | // Reject the string because either the first char was not an alpha/digit, | 423 | | // or the remaining chars are not all whitespace | 424 | 378 | *result = PARSE_FAILURE; | 425 | 378 | return 0; | 426 | 378 | } | 427 | | // skip trailing whitespace. | 428 | 294 | break; | 429 | 672 | } | 430 | | | 431 | | // Bail, if we encounter a digit that is not available in base. | 432 | 5.01k | if (digit >= base) { | 433 | 0 | break; | 434 | 0 | } | 435 | | | 436 | | // This is a tricky check to see if adding this digit will cause an overflow. | 437 | 5.01k | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 438 | 112 | *result = PARSE_OVERFLOW; | 439 | 112 | return static_cast<T>(negative ? -max_val : max_val); | 440 | 112 | } | 441 | 4.90k | val = val * base + digit; | 442 | 4.90k | } | 443 | 343 | *result = PARSE_SUCCESS; | 444 | 343 | return static_cast<T>(negative ? -val : val); | 445 | 833 | } |
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE Line | Count | Source | 389 | 1 | ParseResult* result) { | 390 | 1 | typedef typename std::make_unsigned<T>::type UnsignedT; | 391 | 1 | UnsignedT val = 0; | 392 | 1 | UnsignedT max_val = StringParser::numeric_limits<T>(false); | 393 | 1 | bool negative = false; | 394 | 1 | if (UNLIKELY(len <= 0)) { | 395 | 0 | *result = PARSE_FAILURE; | 396 | 0 | return 0; | 397 | 0 | } | 398 | 1 | int i = 0; | 399 | 1 | switch (*s) { | 400 | 0 | case '-': | 401 | 0 | negative = true; | 402 | 0 | max_val = StringParser::numeric_limits<T>(false) + 1; | 403 | 0 | [[fallthrough]]; | 404 | 0 | case '+': | 405 | 0 | i = 1; | 406 | 1 | } | 407 | | | 408 | 1 | const T max_div_base = max_val / base; | 409 | 1 | const T max_mod_base = max_val % base; | 410 | | | 411 | 1 | int first = i; | 412 | 3 | for (; i < len; ++i) { | 413 | 2 | T digit; | 414 | 2 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 415 | 0 | digit = s[i] - '0'; | 416 | 2 | } else if (s[i] >= 'a' && s[i] <= 'z') { | 417 | 2 | digit = (s[i] - 'a' + 10); | 418 | 2 | } else if (s[i] >= 'A' && s[i] <= 'Z') { | 419 | 0 | digit = (s[i] - 'A' + 10); | 420 | 0 | } else { | 421 | 0 | if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) { | 422 | | // Reject the string because either the first char was not an alpha/digit, | 423 | | // or the remaining chars are not all whitespace | 424 | 0 | *result = PARSE_FAILURE; | 425 | 0 | return 0; | 426 | 0 | } | 427 | | // skip trailing whitespace. | 428 | 0 | break; | 429 | 0 | } | 430 | | | 431 | | // Bail, if we encounter a digit that is not available in base. | 432 | 2 | if (digit >= base) { | 433 | 0 | break; | 434 | 0 | } | 435 | | | 436 | | // This is a tricky check to see if adding this digit will cause an overflow. | 437 | 2 | if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) { | 438 | 0 | *result = PARSE_OVERFLOW; | 439 | 0 | return static_cast<T>(negative ? -max_val : max_val); | 440 | 0 | } | 441 | 2 | val = val * base + digit; | 442 | 2 | } | 443 | 1 | *result = PARSE_SUCCESS; | 444 | 1 | return static_cast<T>(negative ? -val : val); | 445 | 1 | } |
|
446 | | |
447 | | template <typename T> |
448 | 4.30k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { |
449 | 4.30k | T val = 0; |
450 | 4.30k | if (UNLIKELY(len == 0)) { |
451 | 0 | *result = PARSE_SUCCESS; |
452 | 0 | return val; |
453 | 0 | } |
454 | | // Factor out the first char for error handling speeds up the loop. |
455 | 4.30k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { |
456 | 3.20k | val = s[0] - '0'; |
457 | 3.20k | } else { |
458 | 1.09k | *result = PARSE_FAILURE; |
459 | 1.09k | return 0; |
460 | 1.09k | } |
461 | 10.2k | for (int i = 1; i < len; ++i) { |
462 | 7.61k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { |
463 | 7.00k | T digit = s[i] - '0'; |
464 | 7.00k | val = val * 10 + digit; |
465 | 7.00k | } else { |
466 | 605 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && |
467 | 605 | !is_float_suffix(s + i, len - i)))) { |
468 | 16 | *result = PARSE_FAILURE; |
469 | 16 | return 0; |
470 | 16 | } |
471 | 589 | *result = PARSE_SUCCESS; |
472 | 589 | return val; |
473 | 605 | } |
474 | 7.61k | } |
475 | 2.60k | *result = PARSE_SUCCESS; |
476 | 2.60k | return val; |
477 | 3.20k | } _ZN5doris12StringParser25string_to_int_no_overflowIhEET_PKciPNS0_11ParseResultE Line | Count | Source | 448 | 1.50k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 449 | 1.50k | T val = 0; | 450 | 1.50k | if (UNLIKELY(len == 0)) { | 451 | 0 | *result = PARSE_SUCCESS; | 452 | 0 | return val; | 453 | 0 | } | 454 | | // Factor out the first char for error handling speeds up the loop. | 455 | 1.50k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 456 | 1.45k | val = s[0] - '0'; | 457 | 1.45k | } else { | 458 | 50 | *result = PARSE_FAILURE; | 459 | 50 | return 0; | 460 | 50 | } | 461 | 2.72k | for (int i = 1; i < len; ++i) { | 462 | 1.27k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 463 | 1.27k | T digit = s[i] - '0'; | 464 | 1.27k | val = val * 10 + digit; | 465 | 1.27k | } else { | 466 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 467 | 0 | !is_float_suffix(s + i, len - i)))) { | 468 | 0 | *result = PARSE_FAILURE; | 469 | 0 | return 0; | 470 | 0 | } | 471 | 0 | *result = PARSE_SUCCESS; | 472 | 0 | return val; | 473 | 0 | } | 474 | 1.27k | } | 475 | 1.45k | *result = PARSE_SUCCESS; | 476 | 1.45k | return val; | 477 | 1.45k | } |
_ZN5doris12StringParser25string_to_int_no_overflowItEET_PKciPNS0_11ParseResultE Line | Count | Source | 448 | 143 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 449 | 143 | T val = 0; | 450 | 143 | if (UNLIKELY(len == 0)) { | 451 | 0 | *result = PARSE_SUCCESS; | 452 | 0 | return val; | 453 | 0 | } | 454 | | // Factor out the first char for error handling speeds up the loop. | 455 | 143 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 456 | 119 | val = s[0] - '0'; | 457 | 119 | } else { | 458 | 24 | *result = PARSE_FAILURE; | 459 | 24 | return 0; | 460 | 24 | } | 461 | 154 | for (int i = 1; i < len; ++i) { | 462 | 119 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 463 | 35 | T digit = s[i] - '0'; | 464 | 35 | val = val * 10 + digit; | 465 | 84 | } else { | 466 | 84 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 467 | 84 | !is_float_suffix(s + i, len - i)))) { | 468 | 0 | *result = PARSE_FAILURE; | 469 | 0 | return 0; | 470 | 0 | } | 471 | 84 | *result = PARSE_SUCCESS; | 472 | 84 | return val; | 473 | 84 | } | 474 | 119 | } | 475 | 35 | *result = PARSE_SUCCESS; | 476 | 35 | return val; | 477 | 119 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIjEET_PKciPNS0_11ParseResultE Line | Count | Source | 448 | 784 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 449 | 784 | T val = 0; | 450 | 784 | if (UNLIKELY(len == 0)) { | 451 | 0 | *result = PARSE_SUCCESS; | 452 | 0 | return val; | 453 | 0 | } | 454 | | // Factor out the first char for error handling speeds up the loop. | 455 | 784 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 456 | 280 | val = s[0] - '0'; | 457 | 504 | } else { | 458 | 504 | *result = PARSE_FAILURE; | 459 | 504 | return 0; | 460 | 504 | } | 461 | 641 | for (int i = 1; i < len; ++i) { | 462 | 571 | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 463 | 361 | T digit = s[i] - '0'; | 464 | 361 | val = val * 10 + digit; | 465 | 361 | } else { | 466 | 210 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 467 | 210 | !is_float_suffix(s + i, len - i)))) { | 468 | 0 | *result = PARSE_FAILURE; | 469 | 0 | return 0; | 470 | 0 | } | 471 | 210 | *result = PARSE_SUCCESS; | 472 | 210 | return val; | 473 | 210 | } | 474 | 571 | } | 475 | 70 | *result = PARSE_SUCCESS; | 476 | 70 | return val; | 477 | 280 | } |
_ZN5doris12StringParser25string_to_int_no_overflowImEET_PKciPNS0_11ParseResultE Line | Count | Source | 448 | 1.09k | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 449 | 1.09k | T val = 0; | 450 | 1.09k | if (UNLIKELY(len == 0)) { | 451 | 0 | *result = PARSE_SUCCESS; | 452 | 0 | return val; | 453 | 0 | } | 454 | | // Factor out the first char for error handling speeds up the loop. | 455 | 1.09k | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 456 | 575 | val = s[0] - '0'; | 457 | 575 | } else { | 458 | 518 | *result = PARSE_FAILURE; | 459 | 518 | return 0; | 460 | 518 | } | 461 | 2.97k | for (int i = 1; i < len; ++i) { | 462 | 2.71k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 463 | 2.39k | T digit = s[i] - '0'; | 464 | 2.39k | val = val * 10 + digit; | 465 | 2.39k | } else { | 466 | 311 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 467 | 311 | !is_float_suffix(s + i, len - i)))) { | 468 | 16 | *result = PARSE_FAILURE; | 469 | 16 | return 0; | 470 | 16 | } | 471 | 295 | *result = PARSE_SUCCESS; | 472 | 295 | return val; | 473 | 311 | } | 474 | 2.71k | } | 475 | 264 | *result = PARSE_SUCCESS; | 476 | 264 | return val; | 477 | 575 | } |
_ZN5doris12StringParser25string_to_int_no_overflowIoEET_PKciPNS0_11ParseResultE Line | Count | Source | 448 | 778 | T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) { | 449 | 778 | T val = 0; | 450 | 778 | if (UNLIKELY(len == 0)) { | 451 | 0 | *result = PARSE_SUCCESS; | 452 | 0 | return val; | 453 | 0 | } | 454 | | // Factor out the first char for error handling speeds up the loop. | 455 | 778 | if (LIKELY(s[0] >= '0' && s[0] <= '9')) { | 456 | 778 | val = s[0] - '0'; | 457 | 778 | } else { | 458 | 0 | *result = PARSE_FAILURE; | 459 | 0 | return 0; | 460 | 0 | } | 461 | 3.71k | for (int i = 1; i < len; ++i) { | 462 | 2.94k | if (LIKELY(s[i] >= '0' && s[i] <= '9')) { | 463 | 2.94k | T digit = s[i] - '0'; | 464 | 2.94k | val = val * 10 + digit; | 465 | 2.94k | } else { | 466 | 0 | if ((UNLIKELY(!is_all_whitespace(s + i, len - i) && | 467 | 0 | !is_float_suffix(s + i, len - i)))) { | 468 | 0 | *result = PARSE_FAILURE; | 469 | 0 | return 0; | 470 | 0 | } | 471 | 0 | *result = PARSE_SUCCESS; | 472 | 0 | return val; | 473 | 0 | } | 474 | 2.94k | } | 475 | 778 | *result = PARSE_SUCCESS; | 476 | 778 | return val; | 477 | 778 | } |
Unexecuted instantiation: _ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEEEET_PKciPNS0_11ParseResultE |
478 | | |
479 | | template <typename T> |
480 | 19.2k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { |
481 | 19.2k | int i = 0; |
482 | | // skip leading spaces |
483 | 60.1k | for (; i < len; ++i) { |
484 | 60.1k | if (!is_whitespace(s[i])) { |
485 | 19.2k | break; |
486 | 19.2k | } |
487 | 60.1k | } |
488 | | |
489 | | // skip back spaces |
490 | 19.2k | int j = len - 1; |
491 | 60.4k | for (; j >= i; j--) { |
492 | 60.4k | if (!is_whitespace(s[j])) { |
493 | 19.2k | break; |
494 | 19.2k | } |
495 | 60.4k | } |
496 | | |
497 | | // skip leading '+', from_chars can handle '-' |
498 | 19.2k | if (i < len && s[i] == '+') { |
499 | 5.29k | i++; |
500 | 5.29k | } |
501 | 19.2k | if (UNLIKELY(i > j)) { |
502 | 3 | *result = PARSE_FAILURE; |
503 | 3 | return 0; |
504 | 3 | } |
505 | | |
506 | | // Use double here to not lose precision while accumulating the result |
507 | 19.2k | double val = 0; |
508 | 19.2k | auto res = fast_float::from_chars(s + i, s + j + 1, val); |
509 | | |
510 | 19.2k | if (res.ec == std::errc() && res.ptr == s + j + 1) { |
511 | 15.4k | if (abs(val) == std::numeric_limits<T>::infinity()) { |
512 | 886 | auto contain_inf = false; |
513 | 1.27k | for (int k = i; k < j + 1; k++) { |
514 | 1.27k | if (s[k] == 'i' || s[k] == 'I') { |
515 | 882 | contain_inf = true; |
516 | 882 | break; |
517 | 882 | } |
518 | 1.27k | } |
519 | | |
520 | 886 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; |
521 | 14.5k | } else { |
522 | 14.5k | *result = PARSE_SUCCESS; |
523 | 14.5k | } |
524 | 15.4k | return val; |
525 | 15.4k | } else { |
526 | 3.86k | *result = PARSE_FAILURE; |
527 | 3.86k | } |
528 | 3.86k | return 0; |
529 | 19.2k | } _ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 8.25k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 8.25k | int i = 0; | 482 | | // skip leading spaces | 483 | 28.6k | for (; i < len; ++i) { | 484 | 28.6k | if (!is_whitespace(s[i])) { | 485 | 8.25k | break; | 486 | 8.25k | } | 487 | 28.6k | } | 488 | | | 489 | | // skip back spaces | 490 | 8.25k | int j = len - 1; | 491 | 28.8k | for (; j >= i; j--) { | 492 | 28.8k | if (!is_whitespace(s[j])) { | 493 | 8.25k | break; | 494 | 8.25k | } | 495 | 28.8k | } | 496 | | | 497 | | // skip leading '+', from_chars can handle '-' | 498 | 8.25k | if (i < len && s[i] == '+') { | 499 | 2.64k | i++; | 500 | 2.64k | } | 501 | 8.25k | if (UNLIKELY(i > j)) { | 502 | 0 | *result = PARSE_FAILURE; | 503 | 0 | return 0; | 504 | 0 | } | 505 | | | 506 | | // Use double here to not lose precision while accumulating the result | 507 | 8.25k | double val = 0; | 508 | 8.25k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 509 | | | 510 | 8.25k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 511 | 6.32k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 512 | 443 | auto contain_inf = false; | 513 | 629 | for (int k = i; k < j + 1; k++) { | 514 | 627 | if (s[k] == 'i' || s[k] == 'I') { | 515 | 441 | contain_inf = true; | 516 | 441 | break; | 517 | 441 | } | 518 | 627 | } | 519 | | | 520 | 443 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 521 | 5.88k | } else { | 522 | 5.88k | *result = PARSE_SUCCESS; | 523 | 5.88k | } | 524 | 6.32k | return val; | 525 | 6.32k | } else { | 526 | 1.92k | *result = PARSE_FAILURE; | 527 | 1.92k | } | 528 | 1.92k | return 0; | 529 | 8.25k | } |
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE Line | Count | Source | 480 | 11.0k | T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) { | 481 | 11.0k | int i = 0; | 482 | | // skip leading spaces | 483 | 31.4k | for (; i < len; ++i) { | 484 | 31.4k | if (!is_whitespace(s[i])) { | 485 | 11.0k | break; | 486 | 11.0k | } | 487 | 31.4k | } | 488 | | | 489 | | // skip back spaces | 490 | 11.0k | int j = len - 1; | 491 | 31.5k | for (; j >= i; j--) { | 492 | 31.5k | if (!is_whitespace(s[j])) { | 493 | 11.0k | break; | 494 | 11.0k | } | 495 | 31.5k | } | 496 | | | 497 | | // skip leading '+', from_chars can handle '-' | 498 | 11.0k | if (i < len && s[i] == '+') { | 499 | 2.64k | i++; | 500 | 2.64k | } | 501 | 11.0k | if (UNLIKELY(i > j)) { | 502 | 3 | *result = PARSE_FAILURE; | 503 | 3 | return 0; | 504 | 3 | } | 505 | | | 506 | | // Use double here to not lose precision while accumulating the result | 507 | 11.0k | double val = 0; | 508 | 11.0k | auto res = fast_float::from_chars(s + i, s + j + 1, val); | 509 | | | 510 | 11.0k | if (res.ec == std::errc() && res.ptr == s + j + 1) { | 511 | 9.08k | if (abs(val) == std::numeric_limits<T>::infinity()) { | 512 | 443 | auto contain_inf = false; | 513 | 647 | for (int k = i; k < j + 1; k++) { | 514 | 645 | if (s[k] == 'i' || s[k] == 'I') { | 515 | 441 | contain_inf = true; | 516 | 441 | break; | 517 | 441 | } | 518 | 645 | } | 519 | | | 520 | 443 | *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW; | 521 | 8.64k | } else { | 522 | 8.64k | *result = PARSE_SUCCESS; | 523 | 8.64k | } | 524 | 9.08k | return val; | 525 | 9.08k | } else { | 526 | 1.93k | *result = PARSE_FAILURE; | 527 | 1.93k | } | 528 | 1.93k | return 0; | 529 | 11.0k | } |
|
530 | | |
531 | | inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len, |
532 | 618 | ParseResult* result) { |
533 | 618 | *result = PARSE_SUCCESS; |
534 | | |
535 | 618 | if (len >= 4 && (s[0] == 't' || s[0] == 'T')) { |
536 | 170 | bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') && |
537 | 170 | (s[3] == 'e' || s[3] == 'E'); |
538 | 170 | if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) { |
539 | 58 | return true; |
540 | 58 | } |
541 | 448 | } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) { |
542 | 170 | bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') && |
543 | 170 | (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E'); |
544 | 170 | if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) { |
545 | 52 | return false; |
546 | 52 | } |
547 | 170 | } |
548 | | |
549 | 508 | *result = PARSE_FAILURE; |
550 | 508 | return false; |
551 | 618 | } |
552 | | |
553 | | template <PrimitiveType P, typename T, typename DecimalType> |
554 | | T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision, |
555 | 203 | int type_scale, ParseResult* result) { |
556 | 203 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || |
557 | 203 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, |
558 | 203 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " |
559 | 203 | "wide::Int256."); |
560 | | // Special cases: |
561 | | // 1) '' == Fail, an empty string fails to parse. |
562 | | // 2) ' # ' == #, leading and trailing white space is ignored. |
563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). |
564 | | // 4) '#.' == '#', a trailing dot is ignored. |
565 | | |
566 | | // Ignore leading and trailing spaces. |
567 | 203 | while (len > 0 && is_whitespace(*s)) { |
568 | 0 | ++s; |
569 | 0 | --len; |
570 | 0 | } |
571 | 203 | while (len > 0 && is_whitespace(s[len - 1])) { |
572 | 0 | --len; |
573 | 0 | } |
574 | | |
575 | 203 | bool is_negative = false; |
576 | 203 | if (len > 0) { |
577 | 203 | switch (*s) { |
578 | 26 | case '-': |
579 | 26 | is_negative = true; |
580 | 26 | [[fallthrough]]; |
581 | 26 | case '+': |
582 | 26 | ++s; |
583 | 26 | --len; |
584 | 203 | } |
585 | 203 | } |
586 | | |
587 | | // Ignore leading zeros. |
588 | 203 | bool found_value = false; |
589 | 229 | while (len > 0 && UNLIKELY(*s == '0')) { |
590 | 26 | found_value = true; |
591 | 26 | ++s; |
592 | 26 | --len; |
593 | 26 | } |
594 | | |
595 | | // Ignore leading zeros even after a dot. This allows for differentiating between |
596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would |
597 | | // overflow. |
598 | 203 | int scale = 0; |
599 | 203 | int found_dot = 0; |
600 | 203 | if (len > 0 && *s == '.') { |
601 | 9 | found_dot = 1; |
602 | 9 | ++s; |
603 | 9 | --len; |
604 | 14 | while (len > 0 && UNLIKELY(*s == '0')) { |
605 | 5 | found_value = true; |
606 | 5 | ++scale; |
607 | 5 | ++s; |
608 | 5 | --len; |
609 | 5 | } |
610 | 9 | } |
611 | | |
612 | 203 | int precision = 0; |
613 | 203 | int max_digit = type_precision - type_scale; |
614 | 203 | int cur_digit = 0; |
615 | 203 | bool found_exponent = false; |
616 | 203 | int8_t exponent = 0; |
617 | 203 | T value = 0; |
618 | 203 | bool has_round = false; |
619 | 3.85k | for (int i = 0; i < len; ++i) { |
620 | 3.66k | const char& c = s[i]; |
621 | 3.66k | if (LIKELY('0' <= c && c <= '9')) { |
622 | 3.48k | found_value = true; |
623 | | // Ignore digits once the type's precision limit is reached. This avoids |
624 | | // overflowing the underlying storage while handling a string like |
625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and |
626 | | // an exponent will be made later. |
627 | 3.48k | if (LIKELY(type_precision > precision) && !has_round) { |
628 | 3.45k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... |
629 | 3.45k | ++precision; |
630 | 3.45k | scale += found_dot; |
631 | 3.45k | cur_digit = precision - scale; |
632 | 3.45k | } else if (!found_dot && max_digit < (precision - scale)) { |
633 | 0 | *result = StringParser::PARSE_OVERFLOW; |
634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); |
636 | 0 | return value; |
637 | 34 | } else if (found_dot && scale >= type_scale && !has_round) { |
638 | | // make rounding cases |
639 | 20 | if (c > '4') { |
640 | 8 | value += 1; |
641 | 8 | } |
642 | 20 | has_round = true; |
643 | 20 | continue; |
644 | 20 | } else if (!found_dot) { |
645 | 0 | ++cur_digit; |
646 | 0 | } |
647 | 3.46k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. |
648 | 3.46k | } else if (c == '.' && LIKELY(!found_dot)) { |
649 | 164 | found_dot = 1; |
650 | 164 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { |
651 | 0 | found_exponent = true; |
652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); |
653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { |
654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { |
655 | 0 | *result = StringParser::PARSE_UNDERFLOW; |
656 | 0 | } |
657 | 0 | return 0; |
658 | 0 | } |
659 | 0 | break; |
660 | 15 | } else { |
661 | 15 | if (value == 0) { |
662 | 9 | *result = StringParser::PARSE_FAILURE; |
663 | 9 | return 0; |
664 | 9 | } |
665 | | // here to handle |
666 | 6 | *result = StringParser::PARSE_SUCCESS; |
667 | 6 | if (type_scale >= scale) { |
668 | 6 | value *= get_scale_multiplier<T>(type_scale - scale); |
669 | | // here meet non-valid character, should return the value, keep going to meet |
670 | | // the E/e character because we make right user-given type_precision |
671 | | // not max number type_precision |
672 | 6 | if (!is_numeric_ascii(c)) { |
673 | 6 | if (cur_digit > type_precision) { |
674 | 0 | *result = StringParser::PARSE_OVERFLOW; |
675 | 0 | value = is_negative |
676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) |
677 | 0 | : vectorized::max_decimal_value<DecimalType>( |
678 | 0 | type_precision); |
679 | 0 | return value; |
680 | 0 | } |
681 | 6 | return is_negative ? T(-value) : T(value); |
682 | 6 | } |
683 | 6 | } |
684 | | |
685 | 0 | return is_negative ? T(-value) : T(value); |
686 | 6 | } |
687 | 3.66k | } |
688 | | |
689 | | // Find the number of truncated digits before adjusting the precision for an exponent. |
690 | 188 | if (exponent > scale) { |
691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the |
692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. |
693 | 0 | precision += exponent - scale; |
694 | |
|
695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); |
696 | 0 | scale = 0; |
697 | 188 | } else { |
698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, |
699 | | // the precision must also be set to 4 but that will be done below for the |
700 | | // non-exponent case anyways. |
701 | 188 | scale -= exponent; |
702 | 188 | } |
703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros |
704 | | // were ignored during previous parsing. |
705 | 188 | if (scale > precision) { |
706 | 3 | precision = scale; |
707 | 3 | } |
708 | | |
709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower |
710 | | // than just letting the function run out. |
711 | 127 | *result = StringParser::PARSE_SUCCESS; |
712 | 127 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { |
713 | 19 | *result = StringParser::PARSE_OVERFLOW; |
714 | 19 | if constexpr (TYPE_DECIMALV2 != P) { |
715 | | // decimalv3 overflow will return max min value for type precision |
716 | 10 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) |
717 | 10 | : vectorized::max_decimal_value<DecimalType>(type_precision); |
718 | 10 | return value; |
719 | 10 | } |
720 | 169 | } else if (UNLIKELY(scale > type_scale)) { |
721 | 29 | *result = StringParser::PARSE_UNDERFLOW; |
722 | 29 | int shift = scale - type_scale; |
723 | 29 | T divisor = get_scale_multiplier<T>(shift); |
724 | 29 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { |
725 | 0 | value = 0; |
726 | 29 | } else { |
727 | 29 | T remainder = value % divisor; |
728 | 29 | value /= divisor; |
729 | 29 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { |
730 | 25 | value += 1; |
731 | 25 | } |
732 | 29 | } |
733 | 29 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. |
734 | 140 | } else if (UNLIKELY(!found_value && !found_dot)) { |
735 | 0 | *result = StringParser::PARSE_FAILURE; |
736 | 0 | } |
737 | | |
738 | 178 | if (type_scale > scale) { |
739 | 86 | value *= get_scale_multiplier<T>(type_scale - scale); |
740 | 86 | } |
741 | | |
742 | 178 | return is_negative ? T(-value) : T(value); |
743 | 61 | } _ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EiNS_10vectorized7DecimalIiEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 555 | 16 | int type_scale, ParseResult* result) { | 556 | 16 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 557 | 16 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 558 | 16 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 559 | 16 | "wide::Int256."); | 560 | | // Special cases: | 561 | | // 1) '' == Fail, an empty string fails to parse. | 562 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 564 | | // 4) '#.' == '#', a trailing dot is ignored. | 565 | | | 566 | | // Ignore leading and trailing spaces. | 567 | 16 | while (len > 0 && is_whitespace(*s)) { | 568 | 0 | ++s; | 569 | 0 | --len; | 570 | 0 | } | 571 | 16 | while (len > 0 && is_whitespace(s[len - 1])) { | 572 | 0 | --len; | 573 | 0 | } | 574 | | | 575 | 16 | bool is_negative = false; | 576 | 16 | if (len > 0) { | 577 | 16 | switch (*s) { | 578 | 0 | case '-': | 579 | 0 | is_negative = true; | 580 | 0 | [[fallthrough]]; | 581 | 0 | case '+': | 582 | 0 | ++s; | 583 | 0 | --len; | 584 | 16 | } | 585 | 16 | } | 586 | | | 587 | | // Ignore leading zeros. | 588 | 16 | bool found_value = false; | 589 | 16 | while (len > 0 && UNLIKELY(*s == '0')) { | 590 | 0 | found_value = true; | 591 | 0 | ++s; | 592 | 0 | --len; | 593 | 0 | } | 594 | | | 595 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 597 | | // overflow. | 598 | 16 | int scale = 0; | 599 | 16 | int found_dot = 0; | 600 | 16 | if (len > 0 && *s == '.') { | 601 | 0 | found_dot = 1; | 602 | 0 | ++s; | 603 | 0 | --len; | 604 | 0 | while (len > 0 && UNLIKELY(*s == '0')) { | 605 | 0 | found_value = true; | 606 | 0 | ++scale; | 607 | 0 | ++s; | 608 | 0 | --len; | 609 | 0 | } | 610 | 0 | } | 611 | | | 612 | 16 | int precision = 0; | 613 | 16 | int max_digit = type_precision - type_scale; | 614 | 16 | int cur_digit = 0; | 615 | 16 | bool found_exponent = false; | 616 | 16 | int8_t exponent = 0; | 617 | 16 | T value = 0; | 618 | 16 | bool has_round = false; | 619 | 190 | for (int i = 0; i < len; ++i) { | 620 | 174 | const char& c = s[i]; | 621 | 174 | if (LIKELY('0' <= c && c <= '9')) { | 622 | 158 | found_value = true; | 623 | | // Ignore digits once the type's precision limit is reached. This avoids | 624 | | // overflowing the underlying storage while handling a string like | 625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 626 | | // an exponent will be made later. | 627 | 158 | if (LIKELY(type_precision > precision) && !has_round) { | 628 | 144 | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 629 | 144 | ++precision; | 630 | 144 | scale += found_dot; | 631 | 144 | cur_digit = precision - scale; | 632 | 144 | } else if (!found_dot && max_digit < (precision - scale)) { | 633 | 0 | *result = StringParser::PARSE_OVERFLOW; | 634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 636 | 0 | return value; | 637 | 14 | } else if (found_dot && scale >= type_scale && !has_round) { | 638 | | // make rounding cases | 639 | 4 | if (c > '4') { | 640 | 0 | value += 1; | 641 | 0 | } | 642 | 4 | has_round = true; | 643 | 4 | continue; | 644 | 10 | } else if (!found_dot) { | 645 | 0 | ++cur_digit; | 646 | 0 | } | 647 | 154 | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 648 | 154 | } else if (c == '.' && LIKELY(!found_dot)) { | 649 | 16 | found_dot = 1; | 650 | 16 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 651 | 0 | found_exponent = true; | 652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 655 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 656 | 0 | } | 657 | 0 | return 0; | 658 | 0 | } | 659 | 0 | break; | 660 | 0 | } else { | 661 | 0 | if (value == 0) { | 662 | 0 | *result = StringParser::PARSE_FAILURE; | 663 | 0 | return 0; | 664 | 0 | } | 665 | | // here to handle | 666 | 0 | *result = StringParser::PARSE_SUCCESS; | 667 | 0 | if (type_scale >= scale) { | 668 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 669 | | // here meet non-valid character, should return the value, keep going to meet | 670 | | // the E/e character because we make right user-given type_precision | 671 | | // not max number type_precision | 672 | 0 | if (!is_numeric_ascii(c)) { | 673 | 0 | if (cur_digit > type_precision) { | 674 | 0 | *result = StringParser::PARSE_OVERFLOW; | 675 | 0 | value = is_negative | 676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 677 | 0 | : vectorized::max_decimal_value<DecimalType>( | 678 | 0 | type_precision); | 679 | 0 | return value; | 680 | 0 | } | 681 | 0 | return is_negative ? T(-value) : T(value); | 682 | 0 | } | 683 | 0 | } | 684 | | | 685 | 0 | return is_negative ? T(-value) : T(value); | 686 | 0 | } | 687 | 174 | } | 688 | | | 689 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 690 | 16 | if (exponent > scale) { | 691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 693 | 0 | precision += exponent - scale; | 694 | |
| 695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 696 | 0 | scale = 0; | 697 | 16 | } else { | 698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 699 | | // the precision must also be set to 4 but that will be done below for the | 700 | | // non-exponent case anyways. | 701 | 16 | scale -= exponent; | 702 | 16 | } | 703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 704 | | // were ignored during previous parsing. | 705 | 16 | if (scale > precision) { | 706 | 0 | precision = scale; | 707 | 0 | } | 708 | | | 709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 710 | | // than just letting the function run out. | 711 | 16 | *result = StringParser::PARSE_SUCCESS; | 712 | 16 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 713 | 4 | *result = StringParser::PARSE_OVERFLOW; | 714 | 4 | if constexpr (TYPE_DECIMALV2 != P) { | 715 | | // decimalv3 overflow will return max min value for type precision | 716 | 4 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 717 | 4 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 718 | 4 | return value; | 719 | 4 | } | 720 | 12 | } else if (UNLIKELY(scale > type_scale)) { | 721 | 4 | *result = StringParser::PARSE_UNDERFLOW; | 722 | 4 | int shift = scale - type_scale; | 723 | 4 | T divisor = get_scale_multiplier<T>(shift); | 724 | 4 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 725 | 0 | value = 0; | 726 | 4 | } else { | 727 | 4 | T remainder = value % divisor; | 728 | 4 | value /= divisor; | 729 | 4 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 730 | 0 | value += 1; | 731 | 0 | } | 732 | 4 | } | 733 | 4 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 734 | 8 | } else if (UNLIKELY(!found_value && !found_dot)) { | 735 | 0 | *result = StringParser::PARSE_FAILURE; | 736 | 0 | } | 737 | | | 738 | 12 | if (type_scale > scale) { | 739 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 740 | 0 | } | 741 | | | 742 | 12 | return is_negative ? T(-value) : T(value); | 743 | 16 | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29ElNS_10vectorized7DecimalIlEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 555 | 16 | int type_scale, ParseResult* result) { | 556 | 16 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 557 | 16 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 558 | 16 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 559 | 16 | "wide::Int256."); | 560 | | // Special cases: | 561 | | // 1) '' == Fail, an empty string fails to parse. | 562 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 564 | | // 4) '#.' == '#', a trailing dot is ignored. | 565 | | | 566 | | // Ignore leading and trailing spaces. | 567 | 16 | while (len > 0 && is_whitespace(*s)) { | 568 | 0 | ++s; | 569 | 0 | --len; | 570 | 0 | } | 571 | 16 | while (len > 0 && is_whitespace(s[len - 1])) { | 572 | 0 | --len; | 573 | 0 | } | 574 | | | 575 | 16 | bool is_negative = false; | 576 | 16 | if (len > 0) { | 577 | 16 | switch (*s) { | 578 | 0 | case '-': | 579 | 0 | is_negative = true; | 580 | 0 | [[fallthrough]]; | 581 | 0 | case '+': | 582 | 0 | ++s; | 583 | 0 | --len; | 584 | 16 | } | 585 | 16 | } | 586 | | | 587 | | // Ignore leading zeros. | 588 | 16 | bool found_value = false; | 589 | 16 | while (len > 0 && UNLIKELY(*s == '0')) { | 590 | 0 | found_value = true; | 591 | 0 | ++s; | 592 | 0 | --len; | 593 | 0 | } | 594 | | | 595 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 597 | | // overflow. | 598 | 16 | int scale = 0; | 599 | 16 | int found_dot = 0; | 600 | 16 | if (len > 0 && *s == '.') { | 601 | 0 | found_dot = 1; | 602 | 0 | ++s; | 603 | 0 | --len; | 604 | 0 | while (len > 0 && UNLIKELY(*s == '0')) { | 605 | 0 | found_value = true; | 606 | 0 | ++scale; | 607 | 0 | ++s; | 608 | 0 | --len; | 609 | 0 | } | 610 | 0 | } | 611 | | | 612 | 16 | int precision = 0; | 613 | 16 | int max_digit = type_precision - type_scale; | 614 | 16 | int cur_digit = 0; | 615 | 16 | bool found_exponent = false; | 616 | 16 | int8_t exponent = 0; | 617 | 16 | T value = 0; | 618 | 16 | bool has_round = false; | 619 | 276 | for (int i = 0; i < len; ++i) { | 620 | 260 | const char& c = s[i]; | 621 | 260 | if (LIKELY('0' <= c && c <= '9')) { | 622 | 244 | found_value = true; | 623 | | // Ignore digits once the type's precision limit is reached. This avoids | 624 | | // overflowing the underlying storage while handling a string like | 625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 626 | | // an exponent will be made later. | 627 | 244 | if (LIKELY(type_precision > precision) && !has_round) { | 628 | 236 | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 629 | 236 | ++precision; | 630 | 236 | scale += found_dot; | 631 | 236 | cur_digit = precision - scale; | 632 | 236 | } else if (!found_dot && max_digit < (precision - scale)) { | 633 | 0 | *result = StringParser::PARSE_OVERFLOW; | 634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 636 | 0 | return value; | 637 | 8 | } else if (found_dot && scale >= type_scale && !has_round) { | 638 | | // make rounding cases | 639 | 4 | if (c > '4') { | 640 | 4 | value += 1; | 641 | 4 | } | 642 | 4 | has_round = true; | 643 | 4 | continue; | 644 | 4 | } else if (!found_dot) { | 645 | 0 | ++cur_digit; | 646 | 0 | } | 647 | 240 | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 648 | 240 | } else if (c == '.' && LIKELY(!found_dot)) { | 649 | 16 | found_dot = 1; | 650 | 16 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 651 | 0 | found_exponent = true; | 652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 655 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 656 | 0 | } | 657 | 0 | return 0; | 658 | 0 | } | 659 | 0 | break; | 660 | 0 | } else { | 661 | 0 | if (value == 0) { | 662 | 0 | *result = StringParser::PARSE_FAILURE; | 663 | 0 | return 0; | 664 | 0 | } | 665 | | // here to handle | 666 | 0 | *result = StringParser::PARSE_SUCCESS; | 667 | 0 | if (type_scale >= scale) { | 668 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 669 | | // here meet non-valid character, should return the value, keep going to meet | 670 | | // the E/e character because we make right user-given type_precision | 671 | | // not max number type_precision | 672 | 0 | if (!is_numeric_ascii(c)) { | 673 | 0 | if (cur_digit > type_precision) { | 674 | 0 | *result = StringParser::PARSE_OVERFLOW; | 675 | 0 | value = is_negative | 676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 677 | 0 | : vectorized::max_decimal_value<DecimalType>( | 678 | 0 | type_precision); | 679 | 0 | return value; | 680 | 0 | } | 681 | 0 | return is_negative ? T(-value) : T(value); | 682 | 0 | } | 683 | 0 | } | 684 | | | 685 | 0 | return is_negative ? T(-value) : T(value); | 686 | 0 | } | 687 | 260 | } | 688 | | | 689 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 690 | 16 | if (exponent > scale) { | 691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 693 | 0 | precision += exponent - scale; | 694 | |
| 695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 696 | 0 | scale = 0; | 697 | 16 | } else { | 698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 699 | | // the precision must also be set to 4 but that will be done below for the | 700 | | // non-exponent case anyways. | 701 | 16 | scale -= exponent; | 702 | 16 | } | 703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 704 | | // were ignored during previous parsing. | 705 | 16 | if (scale > precision) { | 706 | 0 | precision = scale; | 707 | 0 | } | 708 | | | 709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 710 | | // than just letting the function run out. | 711 | 16 | *result = StringParser::PARSE_SUCCESS; | 712 | 16 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 713 | 4 | *result = StringParser::PARSE_OVERFLOW; | 714 | 4 | if constexpr (TYPE_DECIMALV2 != P) { | 715 | | // decimalv3 overflow will return max min value for type precision | 716 | 4 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 717 | 4 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 718 | 4 | return value; | 719 | 4 | } | 720 | 12 | } else if (UNLIKELY(scale > type_scale)) { | 721 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 722 | 0 | int shift = scale - type_scale; | 723 | 0 | T divisor = get_scale_multiplier<T>(shift); | 724 | 0 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 725 | 0 | value = 0; | 726 | 0 | } else { | 727 | 0 | T remainder = value % divisor; | 728 | 0 | value /= divisor; | 729 | 0 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 730 | 0 | value += 1; | 731 | 0 | } | 732 | 0 | } | 733 | 0 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 734 | 12 | } else if (UNLIKELY(!found_value && !found_dot)) { | 735 | 0 | *result = StringParser::PARSE_FAILURE; | 736 | 0 | } | 737 | | | 738 | 12 | if (type_scale > scale) { | 739 | 4 | value *= get_scale_multiplier<T>(type_scale - scale); | 740 | 4 | } | 741 | | | 742 | 12 | return is_negative ? T(-value) : T(value); | 743 | 16 | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EnNS_10vectorized12Decimal128V3EEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 555 | 28 | int type_scale, ParseResult* result) { | 556 | 28 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 557 | 28 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 558 | 28 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 559 | 28 | "wide::Int256."); | 560 | | // Special cases: | 561 | | // 1) '' == Fail, an empty string fails to parse. | 562 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 564 | | // 4) '#.' == '#', a trailing dot is ignored. | 565 | | | 566 | | // Ignore leading and trailing spaces. | 567 | 28 | while (len > 0 && is_whitespace(*s)) { | 568 | 0 | ++s; | 569 | 0 | --len; | 570 | 0 | } | 571 | 28 | while (len > 0 && is_whitespace(s[len - 1])) { | 572 | 0 | --len; | 573 | 0 | } | 574 | | | 575 | 28 | bool is_negative = false; | 576 | 28 | if (len > 0) { | 577 | 28 | switch (*s) { | 578 | 1 | case '-': | 579 | 1 | is_negative = true; | 580 | 1 | [[fallthrough]]; | 581 | 1 | case '+': | 582 | 1 | ++s; | 583 | 1 | --len; | 584 | 28 | } | 585 | 28 | } | 586 | | | 587 | | // Ignore leading zeros. | 588 | 28 | bool found_value = false; | 589 | 32 | while (len > 0 && UNLIKELY(*s == '0')) { | 590 | 4 | found_value = true; | 591 | 4 | ++s; | 592 | 4 | --len; | 593 | 4 | } | 594 | | | 595 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 597 | | // overflow. | 598 | 28 | int scale = 0; | 599 | 28 | int found_dot = 0; | 600 | 28 | if (len > 0 && *s == '.') { | 601 | 0 | found_dot = 1; | 602 | 0 | ++s; | 603 | 0 | --len; | 604 | 0 | while (len > 0 && UNLIKELY(*s == '0')) { | 605 | 0 | found_value = true; | 606 | 0 | ++scale; | 607 | 0 | ++s; | 608 | 0 | --len; | 609 | 0 | } | 610 | 0 | } | 611 | | | 612 | 28 | int precision = 0; | 613 | 28 | int max_digit = type_precision - type_scale; | 614 | 28 | int cur_digit = 0; | 615 | 28 | bool found_exponent = false; | 616 | 28 | int8_t exponent = 0; | 617 | 28 | T value = 0; | 618 | 28 | bool has_round = false; | 619 | 1.07k | for (int i = 0; i < len; ++i) { | 620 | 1.04k | const char& c = s[i]; | 621 | 1.04k | if (LIKELY('0' <= c && c <= '9')) { | 622 | 1.01k | found_value = true; | 623 | | // Ignore digits once the type's precision limit is reached. This avoids | 624 | | // overflowing the underlying storage while handling a string like | 625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 626 | | // an exponent will be made later. | 627 | 1.01k | if (LIKELY(type_precision > precision) && !has_round) { | 628 | 1.01k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 629 | 1.01k | ++precision; | 630 | 1.01k | scale += found_dot; | 631 | 1.01k | cur_digit = precision - scale; | 632 | 1.01k | } else if (!found_dot && max_digit < (precision - scale)) { | 633 | 0 | *result = StringParser::PARSE_OVERFLOW; | 634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 636 | 0 | return value; | 637 | 4 | } else if (found_dot && scale >= type_scale && !has_round) { | 638 | | // make rounding cases | 639 | 4 | if (c > '4') { | 640 | 4 | value += 1; | 641 | 4 | } | 642 | 4 | has_round = true; | 643 | 4 | continue; | 644 | 4 | } else if (!found_dot) { | 645 | 0 | ++cur_digit; | 646 | 0 | } | 647 | 1.01k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 648 | 1.01k | } else if (c == '.' && LIKELY(!found_dot)) { | 649 | 26 | found_dot = 1; | 650 | 26 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 651 | 0 | found_exponent = true; | 652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 655 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 656 | 0 | } | 657 | 0 | return 0; | 658 | 0 | } | 659 | 0 | break; | 660 | 0 | } else { | 661 | 0 | if (value == 0) { | 662 | 0 | *result = StringParser::PARSE_FAILURE; | 663 | 0 | return 0; | 664 | 0 | } | 665 | | // here to handle | 666 | 0 | *result = StringParser::PARSE_SUCCESS; | 667 | 0 | if (type_scale >= scale) { | 668 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 669 | | // here meet non-valid character, should return the value, keep going to meet | 670 | | // the E/e character because we make right user-given type_precision | 671 | | // not max number type_precision | 672 | 0 | if (!is_numeric_ascii(c)) { | 673 | 0 | if (cur_digit > type_precision) { | 674 | 0 | *result = StringParser::PARSE_OVERFLOW; | 675 | 0 | value = is_negative | 676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 677 | 0 | : vectorized::max_decimal_value<DecimalType>( | 678 | 0 | type_precision); | 679 | 0 | return value; | 680 | 0 | } | 681 | 0 | return is_negative ? T(-value) : T(value); | 682 | 0 | } | 683 | 0 | } | 684 | | | 685 | 0 | return is_negative ? T(-value) : T(value); | 686 | 0 | } | 687 | 1.04k | } | 688 | | | 689 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 690 | 28 | if (exponent > scale) { | 691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 693 | 0 | precision += exponent - scale; | 694 | |
| 695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 696 | 0 | scale = 0; | 697 | 28 | } else { | 698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 699 | | // the precision must also be set to 4 but that will be done below for the | 700 | | // non-exponent case anyways. | 701 | 28 | scale -= exponent; | 702 | 28 | } | 703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 704 | | // were ignored during previous parsing. | 705 | 28 | if (scale > precision) { | 706 | 0 | precision = scale; | 707 | 0 | } | 708 | | | 709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 710 | | // than just letting the function run out. | 711 | 28 | *result = StringParser::PARSE_SUCCESS; | 712 | 28 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 713 | 2 | *result = StringParser::PARSE_OVERFLOW; | 714 | 2 | if constexpr (TYPE_DECIMALV2 != P) { | 715 | | // decimalv3 overflow will return max min value for type precision | 716 | 2 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 717 | 2 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 718 | 2 | return value; | 719 | 2 | } | 720 | 26 | } else if (UNLIKELY(scale > type_scale)) { | 721 | 8 | *result = StringParser::PARSE_UNDERFLOW; | 722 | 8 | int shift = scale - type_scale; | 723 | 8 | T divisor = get_scale_multiplier<T>(shift); | 724 | 8 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 725 | 0 | value = 0; | 726 | 8 | } else { | 727 | 8 | T remainder = value % divisor; | 728 | 8 | value /= divisor; | 729 | 8 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 730 | 8 | value += 1; | 731 | 8 | } | 732 | 8 | } | 733 | 8 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 734 | 18 | } else if (UNLIKELY(!found_value && !found_dot)) { | 735 | 0 | *result = StringParser::PARSE_FAILURE; | 736 | 0 | } | 737 | | | 738 | 26 | if (type_scale > scale) { | 739 | 4 | value *= get_scale_multiplier<T>(type_scale - scale); | 740 | 4 | } | 741 | | | 742 | 26 | return is_negative ? T(-value) : T(value); | 743 | 28 | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EN4wide7integerILm256EiEENS_10vectorized7DecimalIS5_EEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 555 | 1 | int type_scale, ParseResult* result) { | 556 | 1 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 557 | 1 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 558 | 1 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 559 | 1 | "wide::Int256."); | 560 | | // Special cases: | 561 | | // 1) '' == Fail, an empty string fails to parse. | 562 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 564 | | // 4) '#.' == '#', a trailing dot is ignored. | 565 | | | 566 | | // Ignore leading and trailing spaces. | 567 | 1 | while (len > 0 && is_whitespace(*s)) { | 568 | 0 | ++s; | 569 | 0 | --len; | 570 | 0 | } | 571 | 1 | while (len > 0 && is_whitespace(s[len - 1])) { | 572 | 0 | --len; | 573 | 0 | } | 574 | | | 575 | 1 | bool is_negative = false; | 576 | 1 | if (len > 0) { | 577 | 1 | switch (*s) { | 578 | 0 | case '-': | 579 | 0 | is_negative = true; | 580 | 0 | [[fallthrough]]; | 581 | 0 | case '+': | 582 | 0 | ++s; | 583 | 0 | --len; | 584 | 1 | } | 585 | 1 | } | 586 | | | 587 | | // Ignore leading zeros. | 588 | 1 | bool found_value = false; | 589 | 1 | while (len > 0 && UNLIKELY(*s == '0')) { | 590 | 0 | found_value = true; | 591 | 0 | ++s; | 592 | 0 | --len; | 593 | 0 | } | 594 | | | 595 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 597 | | // overflow. | 598 | 1 | int scale = 0; | 599 | 1 | int found_dot = 0; | 600 | 1 | if (len > 0 && *s == '.') { | 601 | 0 | found_dot = 1; | 602 | 0 | ++s; | 603 | 0 | --len; | 604 | 0 | while (len > 0 && UNLIKELY(*s == '0')) { | 605 | 0 | found_value = true; | 606 | 0 | ++scale; | 607 | 0 | ++s; | 608 | 0 | --len; | 609 | 0 | } | 610 | 0 | } | 611 | | | 612 | 1 | int precision = 0; | 613 | 1 | int max_digit = type_precision - type_scale; | 614 | 1 | int cur_digit = 0; | 615 | 1 | bool found_exponent = false; | 616 | 1 | int8_t exponent = 0; | 617 | 1 | T value = 0; | 618 | 1 | bool has_round = false; | 619 | 78 | for (int i = 0; i < len; ++i) { | 620 | 77 | const char& c = s[i]; | 621 | 77 | if (LIKELY('0' <= c && c <= '9')) { | 622 | 76 | found_value = true; | 623 | | // Ignore digits once the type's precision limit is reached. This avoids | 624 | | // overflowing the underlying storage while handling a string like | 625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 626 | | // an exponent will be made later. | 627 | 76 | if (LIKELY(type_precision > precision) && !has_round) { | 628 | 76 | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 629 | 76 | ++precision; | 630 | 76 | scale += found_dot; | 631 | 76 | cur_digit = precision - scale; | 632 | 76 | } else if (!found_dot && max_digit < (precision - scale)) { | 633 | 0 | *result = StringParser::PARSE_OVERFLOW; | 634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 636 | 0 | return value; | 637 | 0 | } else if (found_dot && scale >= type_scale && !has_round) { | 638 | | // make rounding cases | 639 | 0 | if (c > '4') { | 640 | 0 | value += 1; | 641 | 0 | } | 642 | 0 | has_round = true; | 643 | 0 | continue; | 644 | 0 | } else if (!found_dot) { | 645 | 0 | ++cur_digit; | 646 | 0 | } | 647 | 76 | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 648 | 76 | } else if (c == '.' && LIKELY(!found_dot)) { | 649 | 1 | found_dot = 1; | 650 | 1 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 651 | 0 | found_exponent = true; | 652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 655 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 656 | 0 | } | 657 | 0 | return 0; | 658 | 0 | } | 659 | 0 | break; | 660 | 0 | } else { | 661 | 0 | if (value == 0) { | 662 | 0 | *result = StringParser::PARSE_FAILURE; | 663 | 0 | return 0; | 664 | 0 | } | 665 | | // here to handle | 666 | 0 | *result = StringParser::PARSE_SUCCESS; | 667 | 0 | if (type_scale >= scale) { | 668 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 669 | | // here meet non-valid character, should return the value, keep going to meet | 670 | | // the E/e character because we make right user-given type_precision | 671 | | // not max number type_precision | 672 | 0 | if (!is_numeric_ascii(c)) { | 673 | 0 | if (cur_digit > type_precision) { | 674 | 0 | *result = StringParser::PARSE_OVERFLOW; | 675 | 0 | value = is_negative | 676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 677 | 0 | : vectorized::max_decimal_value<DecimalType>( | 678 | 0 | type_precision); | 679 | 0 | return value; | 680 | 0 | } | 681 | 0 | return is_negative ? T(-value) : T(value); | 682 | 0 | } | 683 | 0 | } | 684 | | | 685 | 0 | return is_negative ? T(-value) : T(value); | 686 | 0 | } | 687 | 77 | } | 688 | | | 689 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 690 | 1 | if (exponent > scale) { | 691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 693 | 0 | precision += exponent - scale; | 694 | |
| 695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 696 | 0 | scale = 0; | 697 | 1 | } else { | 698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 699 | | // the precision must also be set to 4 but that will be done below for the | 700 | | // non-exponent case anyways. | 701 | 1 | scale -= exponent; | 702 | 1 | } | 703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 704 | | // were ignored during previous parsing. | 705 | 1 | if (scale > precision) { | 706 | 0 | precision = scale; | 707 | 0 | } | 708 | | | 709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 710 | | // than just letting the function run out. | 711 | 1 | *result = StringParser::PARSE_SUCCESS; | 712 | 1 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 713 | 0 | *result = StringParser::PARSE_OVERFLOW; | 714 | 0 | if constexpr (TYPE_DECIMALV2 != P) { | 715 | | // decimalv3 overflow will return max min value for type precision | 716 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 717 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 718 | 0 | return value; | 719 | 0 | } | 720 | 1 | } else if (UNLIKELY(scale > type_scale)) { | 721 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 722 | 0 | int shift = scale - type_scale; | 723 | 0 | T divisor = get_scale_multiplier<T>(shift); | 724 | 0 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 725 | 0 | value = 0; | 726 | 0 | } else { | 727 | 0 | T remainder = value % divisor; | 728 | 0 | value /= divisor; | 729 | 0 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 730 | 0 | value += 1; | 731 | 0 | } | 732 | 0 | } | 733 | 0 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 734 | 1 | } else if (UNLIKELY(!found_value && !found_dot)) { | 735 | 0 | *result = StringParser::PARSE_FAILURE; | 736 | 0 | } | 737 | | | 738 | 1 | if (type_scale > scale) { | 739 | 0 | value *= get_scale_multiplier<T>(type_scale - scale); | 740 | 0 | } | 741 | | | 742 | 1 | return is_negative ? T(-value) : T(value); | 743 | 1 | } |
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EnNS_10vectorized7DecimalInEEEET0_PKciiiPNS0_11ParseResultE Line | Count | Source | 555 | 142 | int type_scale, ParseResult* result) { | 556 | 142 | static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || | 557 | 142 | std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>, | 558 | 142 | "Cast string to decimal only support target type int32_t, int64_t, __int128 or " | 559 | 142 | "wide::Int256."); | 560 | | // Special cases: | 561 | | // 1) '' == Fail, an empty string fails to parse. | 562 | | // 2) ' # ' == #, leading and trailing white space is ignored. | 563 | | // 3) '.' == 0, a single dot parses as zero (for consistency with other types). | 564 | | // 4) '#.' == '#', a trailing dot is ignored. | 565 | | | 566 | | // Ignore leading and trailing spaces. | 567 | 142 | while (len > 0 && is_whitespace(*s)) { | 568 | 0 | ++s; | 569 | 0 | --len; | 570 | 0 | } | 571 | 142 | while (len > 0 && is_whitespace(s[len - 1])) { | 572 | 0 | --len; | 573 | 0 | } | 574 | | | 575 | 142 | bool is_negative = false; | 576 | 142 | if (len > 0) { | 577 | 142 | switch (*s) { | 578 | 25 | case '-': | 579 | 25 | is_negative = true; | 580 | 25 | [[fallthrough]]; | 581 | 25 | case '+': | 582 | 25 | ++s; | 583 | 25 | --len; | 584 | 142 | } | 585 | 142 | } | 586 | | | 587 | | // Ignore leading zeros. | 588 | 142 | bool found_value = false; | 589 | 164 | while (len > 0 && UNLIKELY(*s == '0')) { | 590 | 22 | found_value = true; | 591 | 22 | ++s; | 592 | 22 | --len; | 593 | 22 | } | 594 | | | 595 | | // Ignore leading zeros even after a dot. This allows for differentiating between | 596 | | // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would | 597 | | // overflow. | 598 | 142 | int scale = 0; | 599 | 142 | int found_dot = 0; | 600 | 142 | if (len > 0 && *s == '.') { | 601 | 9 | found_dot = 1; | 602 | 9 | ++s; | 603 | 9 | --len; | 604 | 14 | while (len > 0 && UNLIKELY(*s == '0')) { | 605 | 5 | found_value = true; | 606 | 5 | ++scale; | 607 | 5 | ++s; | 608 | 5 | --len; | 609 | 5 | } | 610 | 9 | } | 611 | | | 612 | 142 | int precision = 0; | 613 | 142 | int max_digit = type_precision - type_scale; | 614 | 142 | int cur_digit = 0; | 615 | 142 | bool found_exponent = false; | 616 | 142 | int8_t exponent = 0; | 617 | 142 | T value = 0; | 618 | 142 | bool has_round = false; | 619 | 2.24k | for (int i = 0; i < len; ++i) { | 620 | 2.11k | const char& c = s[i]; | 621 | 2.11k | if (LIKELY('0' <= c && c <= '9')) { | 622 | 1.99k | found_value = true; | 623 | | // Ignore digits once the type's precision limit is reached. This avoids | 624 | | // overflowing the underlying storage while handling a string like | 625 | | // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and | 626 | | // an exponent will be made later. | 627 | 1.99k | if (LIKELY(type_precision > precision) && !has_round) { | 628 | 1.98k | value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis... | 629 | 1.98k | ++precision; | 630 | 1.98k | scale += found_dot; | 631 | 1.98k | cur_digit = precision - scale; | 632 | 1.98k | } else if (!found_dot && max_digit < (precision - scale)) { | 633 | 0 | *result = StringParser::PARSE_OVERFLOW; | 634 | 0 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 635 | 0 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 636 | 0 | return value; | 637 | 8 | } else if (found_dot && scale >= type_scale && !has_round) { | 638 | | // make rounding cases | 639 | 8 | if (c > '4') { | 640 | 0 | value += 1; | 641 | 0 | } | 642 | 8 | has_round = true; | 643 | 8 | continue; | 644 | 8 | } else if (!found_dot) { | 645 | 0 | ++cur_digit; | 646 | 0 | } | 647 | 1.98k | DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128. | 648 | 1.98k | } else if (c == '.' && LIKELY(!found_dot)) { | 649 | 105 | found_dot = 1; | 650 | 105 | } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) { | 651 | 0 | found_exponent = true; | 652 | 0 | exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result); | 653 | 0 | if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) { | 654 | 0 | if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) { | 655 | 0 | *result = StringParser::PARSE_UNDERFLOW; | 656 | 0 | } | 657 | 0 | return 0; | 658 | 0 | } | 659 | 0 | break; | 660 | 15 | } else { | 661 | 15 | if (value == 0) { | 662 | 9 | *result = StringParser::PARSE_FAILURE; | 663 | 9 | return 0; | 664 | 9 | } | 665 | | // here to handle | 666 | 6 | *result = StringParser::PARSE_SUCCESS; | 667 | 6 | if (type_scale >= scale) { | 668 | 6 | value *= get_scale_multiplier<T>(type_scale - scale); | 669 | | // here meet non-valid character, should return the value, keep going to meet | 670 | | // the E/e character because we make right user-given type_precision | 671 | | // not max number type_precision | 672 | 6 | if (!is_numeric_ascii(c)) { | 673 | 6 | if (cur_digit > type_precision) { | 674 | 0 | *result = StringParser::PARSE_OVERFLOW; | 675 | 0 | value = is_negative | 676 | 0 | ? vectorized::min_decimal_value<DecimalType>(type_precision) | 677 | 0 | : vectorized::max_decimal_value<DecimalType>( | 678 | 0 | type_precision); | 679 | 0 | return value; | 680 | 0 | } | 681 | 6 | return is_negative ? T(-value) : T(value); | 682 | 6 | } | 683 | 6 | } | 684 | | | 685 | 0 | return is_negative ? T(-value) : T(value); | 686 | 6 | } | 687 | 2.11k | } | 688 | | | 689 | | // Find the number of truncated digits before adjusting the precision for an exponent. | 690 | 127 | if (exponent > scale) { | 691 | | // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the | 692 | | // scale must be set to 0 and the value set to 100 which means a precision of 3. | 693 | 0 | precision += exponent - scale; | 694 | |
| 695 | 0 | value *= get_scale_multiplier<T>(exponent - scale); | 696 | 0 | scale = 0; | 697 | 127 | } else { | 698 | | // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed, | 699 | | // the precision must also be set to 4 but that will be done below for the | 700 | | // non-exponent case anyways. | 701 | 127 | scale -= exponent; | 702 | 127 | } | 703 | | // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros | 704 | | // were ignored during previous parsing. | 705 | 127 | if (scale > precision) { | 706 | 3 | precision = scale; | 707 | 3 | } | 708 | | | 709 | | // Microbenchmarks show that beyond this point, returning on parse failure is slower | 710 | | // than just letting the function run out. | 711 | 127 | *result = StringParser::PARSE_SUCCESS; | 712 | 127 | if (UNLIKELY(precision - scale > type_precision - type_scale)) { | 713 | 9 | *result = StringParser::PARSE_OVERFLOW; | 714 | 9 | if constexpr (TYPE_DECIMALV2 != P) { | 715 | | // decimalv3 overflow will return max min value for type precision | 716 | 9 | value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision) | 717 | 9 | : vectorized::max_decimal_value<DecimalType>(type_precision); | 718 | 9 | return value; | 719 | 9 | } | 720 | 118 | } else if (UNLIKELY(scale > type_scale)) { | 721 | 17 | *result = StringParser::PARSE_UNDERFLOW; | 722 | 17 | int shift = scale - type_scale; | 723 | 17 | T divisor = get_scale_multiplier<T>(shift); | 724 | 17 | if (UNLIKELY(divisor == std::numeric_limits<T>::max())) { | 725 | 0 | value = 0; | 726 | 17 | } else { | 727 | 17 | T remainder = value % divisor; | 728 | 17 | value /= divisor; | 729 | 17 | if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) { | 730 | 17 | value += 1; | 731 | 17 | } | 732 | 17 | } | 733 | 17 | DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128. | 734 | 101 | } else if (UNLIKELY(!found_value && !found_dot)) { | 735 | 0 | *result = StringParser::PARSE_FAILURE; | 736 | 0 | } | 737 | | | 738 | 127 | if (type_scale > scale) { | 739 | 78 | value *= get_scale_multiplier<T>(type_scale - scale); | 740 | 78 | } | 741 | | | 742 | 127 | return is_negative ? T(-value) : T(value); | 743 | 142 | } |
|
744 | | |
745 | | } // end namespace doris |