Coverage Report

Created: 2024-11-20 15:53

/root/doris/be/src/util/string_parser.hpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
#include <stdlib.h>
27
28
// IWYU pragma: no_include <bits/std_abs.h>
29
#include <cmath> // IWYU pragma: keep
30
#include <cstdint>
31
#include <limits>
32
#include <map>
33
#include <string>
34
#include <system_error>
35
#include <type_traits>
36
#include <utility>
37
38
#include "common/compiler_util.h" // IWYU pragma: keep
39
#include "common/status.h"
40
#include "runtime/large_int_value.h"
41
#include "runtime/primitive_type.h"
42
#include "vec/common/int_exp.h"
43
#include "vec/core/extended_types.h"
44
#include "vec/core/wide_integer.h"
45
#include "vec/data_types/data_type_decimal.h"
46
#include "vec/data_types/number_traits.h"
47
48
namespace doris {
49
namespace vectorized {
50
template <DecimalNativeTypeConcept T>
51
struct Decimal;
52
} // namespace vectorized
53
54
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
55
// this is significantly faster than libc (atoi/strtol and atof/strtod).
56
//
57
// Strings with leading and trailing whitespaces are accepted.
58
// Branching is heavily optimized for the non-whitespace successful case.
59
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
60
// If that first attempt was unsuccessful, these functions retry the parsing after removing
61
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
62
//
63
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
64
// data type.  This is different from hive, which returns NULL for overflow slots for int types
65
// and inf/-inf for float types.
66
//
67
// Things we tried that did not work:
68
//  - lookup table for converting character to digit
69
// Improvements (TODO):
70
//  - Validate input using _sidd_compare_ranges
71
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
72
class StringParser {
73
public:
74
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
75
76
    template <typename T>
77
122k
    static T numeric_limits(bool negative) {
78
122k
        if constexpr (std::is_same_v<T, __int128>) {
79
121k
            return negative ? MIN_INT128 : MAX_INT128;
80
121k
        } else {
81
121k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
121k
        }
83
122k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
77
115k
    static T numeric_limits(bool negative) {
78
115k
        if constexpr (std::is_same_v<T, __int128>) {
79
115k
            return negative ? MIN_INT128 : MAX_INT128;
80
115k
        } else {
81
115k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
115k
        }
83
115k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
77
1.97k
    static T numeric_limits(bool negative) {
78
1.97k
        if constexpr (std::is_same_v<T, __int128>) {
79
1.97k
            return negative ? MIN_INT128 : MAX_INT128;
80
1.97k
        } else {
81
1.97k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
1.97k
        }
83
1.97k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
77
1.85k
    static T numeric_limits(bool negative) {
78
1.85k
        if constexpr (std::is_same_v<T, __int128>) {
79
1.85k
            return negative ? MIN_INT128 : MAX_INT128;
80
1.85k
        } else {
81
1.85k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
1.85k
        }
83
1.85k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
77
2.10k
    static T numeric_limits(bool negative) {
78
2.10k
        if constexpr (std::is_same_v<T, __int128>) {
79
2.10k
            return negative ? MIN_INT128 : MAX_INT128;
80
2.10k
        } else {
81
2.10k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
2.10k
        }
83
2.10k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
77
725
    static T numeric_limits(bool negative) {
78
725
        if constexpr (std::is_same_v<T, __int128>) {
79
725
            return negative ? MIN_INT128 : MAX_INT128;
80
725
        } else {
81
725
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
725
        }
83
725
    }
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
77
21
    static T numeric_limits(bool negative) {
78
21
        if constexpr (std::is_same_v<T, __int128>) {
79
21
            return negative ? MIN_INT128 : MAX_INT128;
80
21
        } else {
81
21
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
21
        }
83
21
    }
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIjEET_b
_ZN5doris12StringParser14numeric_limitsIhEET_b
Line
Count
Source
77
91
    static T numeric_limits(bool negative) {
78
91
        if constexpr (std::is_same_v<T, __int128>) {
79
91
            return negative ? MIN_INT128 : MAX_INT128;
80
91
        } else {
81
91
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
82
91
        }
83
91
    }
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsItEET_b
Unexecuted instantiation: _ZN5doris12StringParser14numeric_limitsIoEET_b
84
85
    template <typename T>
86
129
    static T get_scale_multiplier(int scale) {
87
129
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
88
129
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
89
129
                      "You can only instantiate as int32_t, int64_t, __int128.");
90
129
        if constexpr (std::is_same_v<T, int32_t>) {
91
119
            return common::exp10_i32(scale);
92
119
        } else if constexpr (std::is_same_v<T, int64_t>) {
93
115
            return common::exp10_i64(scale);
94
115
        } else if constexpr (std::is_same_v<T, __int128>) {
95
0
            return common::exp10_i128(scale);
96
0
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
97
0
            return common::exp10_i256(scale);
98
0
        }
99
129
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
86
10
    static T get_scale_multiplier(int scale) {
87
10
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
88
10
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
89
10
                      "You can only instantiate as int32_t, int64_t, __int128.");
90
10
        if constexpr (std::is_same_v<T, int32_t>) {
91
10
            return common::exp10_i32(scale);
92
10
        } else if constexpr (std::is_same_v<T, int64_t>) {
93
10
            return common::exp10_i64(scale);
94
10
        } else if constexpr (std::is_same_v<T, __int128>) {
95
10
            return common::exp10_i128(scale);
96
10
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
97
10
            return common::exp10_i256(scale);
98
10
        }
99
10
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
86
4
    static T get_scale_multiplier(int scale) {
87
4
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
88
4
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
89
4
                      "You can only instantiate as int32_t, int64_t, __int128.");
90
4
        if constexpr (std::is_same_v<T, int32_t>) {
91
4
            return common::exp10_i32(scale);
92
4
        } else if constexpr (std::is_same_v<T, int64_t>) {
93
4
            return common::exp10_i64(scale);
94
4
        } else if constexpr (std::is_same_v<T, __int128>) {
95
4
            return common::exp10_i128(scale);
96
4
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
97
4
            return common::exp10_i256(scale);
98
4
        }
99
4
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
86
115
    static T get_scale_multiplier(int scale) {
87
115
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
88
115
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
89
115
                      "You can only instantiate as int32_t, int64_t, __int128.");
90
115
        if constexpr (std::is_same_v<T, int32_t>) {
91
115
            return common::exp10_i32(scale);
92
115
        } else if constexpr (std::is_same_v<T, int64_t>) {
93
115
            return common::exp10_i64(scale);
94
115
        } else if constexpr (std::is_same_v<T, __int128>) {
95
115
            return common::exp10_i128(scale);
96
115
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
97
115
            return common::exp10_i256(scale);
98
115
        }
99
115
    }
Unexecuted instantiation: _ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
100
101
    // This is considerably faster than glibc's implementation (25x).
102
    // In the case of overflow, the max/min value for the data type will be returned.
103
    // Assumes s represents a decimal number.
104
    template <typename T>
105
28.7k
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
28.7k
        T ans = string_to_int_internal<T>(s, len, result);
107
28.7k
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
3.00k
            return ans;
109
3.00k
        }
110
111
25.7k
        int i = skip_leading_whitespace(s, len);
112
25.7k
        return string_to_int_internal<T>(s + i, len - i, result);
113
28.7k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
26.3k
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
26.3k
        T ans = string_to_int_internal<T>(s, len, result);
107
26.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
1.85k
            return ans;
109
1.85k
        }
110
111
24.4k
        int i = skip_leading_whitespace(s, len);
112
24.4k
        return string_to_int_internal<T>(s + i, len - i, result);
113
26.3k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
450
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
450
        T ans = string_to_int_internal<T>(s, len, result);
107
450
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
52
            return ans;
109
52
        }
110
111
398
        int i = skip_leading_whitespace(s, len);
112
398
        return string_to_int_internal<T>(s + i, len - i, result);
113
450
    }
_ZN5doris12StringParser13string_to_intIiEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
471
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
471
        T ans = string_to_int_internal<T>(s, len, result);
107
471
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
73
            return ans;
109
73
        }
110
111
398
        int i = skip_leading_whitespace(s, len);
112
398
        return string_to_int_internal<T>(s + i, len - i, result);
113
471
    }
_ZN5doris12StringParser13string_to_intIlEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
662
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
662
        T ans = string_to_int_internal<T>(s, len, result);
107
662
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
258
            return ans;
109
258
        }
110
111
404
        int i = skip_leading_whitespace(s, len);
112
404
        return string_to_int_internal<T>(s + i, len - i, result);
113
662
    }
_ZN5doris12StringParser13string_to_intInEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
719
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
719
        T ans = string_to_int_internal<T>(s, len, result);
107
719
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
713
            return ans;
109
713
        }
110
111
6
        int i = skip_leading_whitespace(s, len);
112
6
        return string_to_int_internal<T>(s + i, len - i, result);
113
719
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser13string_to_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
20
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
20
        T ans = string_to_int_internal<T>(s, len, result);
107
20
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
20
            return ans;
109
20
        }
110
111
0
        int i = skip_leading_whitespace(s, len);
112
0
        return string_to_int_internal<T>(s + i, len - i, result);
113
20
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIjEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser13string_to_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
105
63
    static inline T string_to_int(const char* __restrict s, int len, ParseResult* result) {
106
63
        T ans = string_to_int_internal<T>(s, len, result);
107
63
        if (LIKELY(*result == PARSE_SUCCESS)) {
108
35
            return ans;
109
35
        }
110
111
28
        int i = skip_leading_whitespace(s, len);
112
28
        return string_to_int_internal<T>(s + i, len - i, result);
113
63
    }
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intItEET_PKciPNS0_11ParseResultE
Unexecuted instantiation: _ZN5doris12StringParser13string_to_intIoEET_PKciPNS0_11ParseResultE
114
115
    // This is considerably faster than glibc's implementation.
116
    // In the case of overflow, the max/min value for the data type will be returned.
117
    // Assumes s represents a decimal number.
118
    template <typename T>
119
1.37k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
120
1.37k
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
121
1.37k
        if (LIKELY(*result == PARSE_SUCCESS)) {
122
84
            return ans;
123
84
        }
124
125
1.28k
        int i = skip_leading_whitespace(s, len);
126
1.28k
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
127
1.37k
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
119
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
120
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
121
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
122
21
            return ans;
123
21
        }
124
125
322
        int i = skip_leading_whitespace(s, len);
126
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
127
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
119
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
120
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
121
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
122
21
            return ans;
123
21
        }
124
125
322
        int i = skip_leading_whitespace(s, len);
126
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
127
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
119
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
120
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
121
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
122
21
            return ans;
123
21
        }
124
125
322
        int i = skip_leading_whitespace(s, len);
126
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
127
343
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
119
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
120
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
121
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
122
21
            return ans;
123
21
        }
124
125
322
        int i = skip_leading_whitespace(s, len);
126
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
127
343
    }
128
129
    // Convert a string s representing a number in given base into a decimal number.
130
    template <typename T>
131
    static inline T string_to_int(const char* __restrict s, int len, int base,
132
27.8k
                                  ParseResult* result) {
133
27.8k
        T ans = string_to_int_internal<T>(s, len, base, result);
134
27.8k
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
2.06k
            return ans;
136
2.06k
        }
137
138
25.7k
        int i = skip_leading_whitespace(s, len);
139
25.7k
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKciiPNS0_11ParseResultE
Line
Count
Source
132
26.4k
                                  ParseResult* result) {
133
26.4k
        T ans = string_to_int_internal<T>(s, len, base, result);
134
26.4k
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
1.91k
            return ans;
136
1.91k
        }
137
138
24.5k
        int i = skip_leading_whitespace(s, len);
139
24.5k
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKciiPNS0_11ParseResultE
Line
Count
Source
132
490
                                  ParseResult* result) {
133
490
        T ans = string_to_int_internal<T>(s, len, base, result);
134
490
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
56
            return ans;
136
56
        }
137
138
434
        int i = skip_leading_whitespace(s, len);
139
434
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKciiPNS0_11ParseResultE
Line
Count
Source
132
441
                                  ParseResult* result) {
133
441
        T ans = string_to_int_internal<T>(s, len, base, result);
134
441
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
49
            return ans;
136
49
        }
137
138
392
        int i = skip_leading_whitespace(s, len);
139
392
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKciiPNS0_11ParseResultE
Line
Count
Source
132
441
                                  ParseResult* result) {
133
441
        T ans = string_to_int_internal<T>(s, len, base, result);
134
441
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
49
            return ans;
136
49
        }
137
138
392
        int i = skip_leading_whitespace(s, len);
139
392
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKciiPNS0_11ParseResultE
Line
Count
Source
132
1
                                  ParseResult* result) {
133
1
        T ans = string_to_int_internal<T>(s, len, base, result);
134
1
        if (LIKELY(*result == PARSE_SUCCESS)) {
135
1
            return ans;
136
1
        }
137
138
0
        int i = skip_leading_whitespace(s, len);
139
0
        return string_to_int_internal<T>(s + i, len - i, base, result);
140
1
    }
141
142
    template <typename T>
143
19.1k
    static inline T string_to_float(const char* __restrict s, int len, ParseResult* result) {
144
19.1k
        return string_to_float_internal<T>(s, len, result);
145
19.1k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
143
8.25k
    static inline T string_to_float(const char* __restrict s, int len, ParseResult* result) {
144
8.25k
        return string_to_float_internal<T>(s, len, result);
145
8.25k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
143
10.9k
    static inline T string_to_float(const char* __restrict s, int len, ParseResult* result) {
144
10.9k
        return string_to_float_internal<T>(s, len, result);
145
10.9k
    }
146
147
    // Parses a string for 'true' or 'false', case insensitive.
148
322
    static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) {
149
322
        bool ans = string_to_bool_internal(s, len, result);
150
322
        if (LIKELY(*result == PARSE_SUCCESS)) {
151
26
            return ans;
152
26
        }
153
154
296
        int i = skip_leading_whitespace(s, len);
155
296
        return string_to_bool_internal(s + i, len - i, result);
156
322
    }
157
158
    template <PrimitiveType P, typename T = PrimitiveTypeTraits<P>::CppType::NativeType,
159
              typename DecimalType = PrimitiveTypeTraits<P>::ColumnType::value_type>
160
    static inline T string_to_decimal(const char* __restrict s, int len, int type_precision,
161
                                      int type_scale, ParseResult* result);
162
163
    template <typename T>
164
    static Status split_string_to_map(const std::string& base, const T element_separator,
165
                                      const T key_value_separator,
166
                                      std::map<std::string, std::string>* result) {
167
        int key_pos = 0;
168
        int key_end;
169
        int val_pos;
170
        int val_end;
171
172
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
173
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
174
                std::string::npos) {
175
                break;
176
            }
177
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
178
                val_end = base.size();
179
            }
180
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
181
                                          base.substr(val_pos, val_end - val_pos)));
182
            key_pos = val_end;
183
            if (key_pos != std::string::npos) {
184
                ++key_pos;
185
            }
186
        }
187
188
        return Status::OK();
189
    }
190
191
private:
192
    // This is considerably faster than glibc's implementation.
193
    // In the case of overflow, the max/min value for the data type will be returned.
194
    // Assumes s represents a decimal number.
195
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
196
    template <typename T>
197
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
198
199
    // This is considerably faster than glibc's implementation.
200
    // In the case of overflow, the max/min value for the data type will be returned.
201
    // Assumes s represents a decimal number.
202
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
203
    template <typename T>
204
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
205
                                                    ParseResult* result);
206
207
    // Convert a string s representing a number in given base into a decimal number.
208
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
209
    template <typename T>
210
    static inline T string_to_int_internal(const char* __restrict s, int len, int base,
211
                                           ParseResult* result);
212
213
    // Converts an ascii string to an integer of type T assuming it cannot overflow
214
    // and the number is positive.
215
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
216
    template <typename T>
217
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
218
                                              ParseResult* result);
219
220
    // This is considerably faster than glibc's implementation (>100x why???)
221
    // No special case handling needs to be done for overflows, the floating point spec
222
    // already does it and will cap the values to -inf/inf
223
    // To avoid inaccurate conversions this function falls back to strtod for
224
    // scientific notation.
225
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
226
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
227
    template <typename T>
228
    static inline T string_to_float_internal(const char* __restrict s, int len,
229
                                             ParseResult* result);
230
231
    // parses a string for 'true' or 'false', case insensitive
232
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
233
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
234
                                               ParseResult* result);
235
236
    // Returns true if s only contains whitespace.
237
25.4k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
238
100k
        for (int i = 0; i < len; ++i) {
239
75.4k
            if (!LIKELY(is_whitespace(s[i]))) {
240
618
                return false;
241
618
            }
242
75.4k
        }
243
24.8k
        return true;
244
25.4k
    }
245
246
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
247
339
    static inline bool is_float_suffix(const char* __restrict s, int len) {
248
339
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
249
339
    }
250
251
1
    static inline bool is_all_digit(const char* __restrict s, int len) {
252
2
        for (int i = 0; i < len; ++i) {
253
1
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
254
0
                return false;
255
0
            }
256
1
        }
257
1
        return true;
258
1
    }
259
260
    // Returns the position of the first non-whitespace character in s.
261
53.0k
    static inline int skip_leading_whitespace(const char* __restrict s, int len) {
262
53.0k
        int i = 0;
263
202k
        while (i < len && is_whitespace(s[i])) {
264
149k
            ++i;
265
149k
        }
266
53.0k
        return i;
267
53.0k
    }
268
269
    // Our own definition of "isspace" that optimize on the ' ' branch.
270
398k
    static inline bool is_whitespace(const char& c) {
271
398k
        return LIKELY(c == ' ') ||
272
398k
               UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r');
273
398k
    }
274
275
}; // end of class StringParser
276
277
template <typename T>
278
54.4k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
54.4k
    if (UNLIKELY(len <= 0)) {
280
206
        *result = PARSE_FAILURE;
281
206
        return 0;
282
206
    }
283
284
54.2k
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
54.2k
    UnsignedT val = 0;
286
54.2k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
54.2k
    bool negative = false;
288
54.2k
    int i = 0;
289
54.2k
    switch (*s) {
290
14.3k
    case '-':
291
14.3k
        negative = true;
292
14.3k
        max_val += 1;
293
14.3k
        [[fallthrough]];
294
14.6k
    case '+':
295
14.6k
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
14.6k
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
54.2k
    }
302
303
    // This is the fast path where the string cannot overflow.
304
54.2k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
3.35k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
3.35k
        return static_cast<T>(negative ? -val : val);
307
3.35k
    }
308
309
50.8k
    const T max_div_10 = max_val / 10;
310
50.8k
    const T max_mod_10 = max_val % 10;
311
312
50.8k
    int first = i;
313
114k
    for (; i < len; ++i) {
314
113k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
78.2k
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
78.2k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
14.8k
                *result = PARSE_OVERFLOW;
319
14.8k
                return negative ? -max_val : max_val;
320
14.8k
            }
321
63.4k
            val = val * 10 + digit;
322
63.4k
        } else {
323
35.5k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
35.5k
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
24.0k
                *result = PARSE_FAILURE;
328
24.0k
                return 0;
329
24.0k
            }
330
            // Returning here is slightly faster than breaking the loop.
331
11.4k
            *result = PARSE_SUCCESS;
332
11.4k
            return static_cast<T>(negative ? -val : val);
333
35.5k
        }
334
113k
    }
335
504
    *result = PARSE_SUCCESS;
336
504
    return static_cast<T>(negative ? -val : val);
337
50.8k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
50.8k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
50.8k
    if (UNLIKELY(len <= 0)) {
280
206
        *result = PARSE_FAILURE;
281
206
        return 0;
282
206
    }
283
284
50.6k
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
50.6k
    UnsignedT val = 0;
286
50.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
50.6k
    bool negative = false;
288
50.6k
    int i = 0;
289
50.6k
    switch (*s) {
290
13.8k
    case '-':
291
13.8k
        negative = true;
292
13.8k
        max_val += 1;
293
13.8k
        [[fallthrough]];
294
13.9k
    case '+':
295
13.9k
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
13.9k
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
50.6k
    }
302
303
    // This is the fast path where the string cannot overflow.
304
50.6k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
1.43k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
1.43k
        return static_cast<T>(negative ? -val : val);
307
1.43k
    }
308
309
49.1k
    const T max_div_10 = max_val / 10;
310
49.1k
    const T max_mod_10 = max_val % 10;
311
312
49.1k
    int first = i;
313
104k
    for (; i < len; ++i) {
314
103k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
69.4k
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
69.4k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
14.5k
                *result = PARSE_OVERFLOW;
319
14.5k
                return negative ? -max_val : max_val;
320
14.5k
            }
321
54.9k
            val = val * 10 + digit;
322
54.9k
        } else {
323
34.2k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
34.2k
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
23.2k
                *result = PARSE_FAILURE;
328
23.2k
                return 0;
329
23.2k
            }
330
            // Returning here is slightly faster than breaking the loop.
331
11.0k
            *result = PARSE_SUCCESS;
332
11.0k
            return static_cast<T>(negative ? -val : val);
333
34.2k
        }
334
103k
    }
335
433
    *result = PARSE_SUCCESS;
336
433
    return static_cast<T>(negative ? -val : val);
337
49.1k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
848
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
848
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
848
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
848
    UnsignedT val = 0;
286
848
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
848
    bool negative = false;
288
848
    int i = 0;
289
848
    switch (*s) {
290
209
    case '-':
291
209
        negative = true;
292
209
        max_val += 1;
293
209
        [[fallthrough]];
294
258
    case '+':
295
258
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
258
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
848
    }
302
303
    // This is the fast path where the string cannot overflow.
304
848
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
112
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
112
        return static_cast<T>(negative ? -val : val);
307
112
    }
308
309
736
    const T max_div_10 = max_val / 10;
310
736
    const T max_mod_10 = max_val % 10;
311
312
736
    int first = i;
313
2.19k
    for (; i < len; ++i) {
314
2.16k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
1.58k
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
1.58k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
124
                *result = PARSE_OVERFLOW;
319
124
                return negative ? -max_val : max_val;
320
124
            }
321
1.45k
            val = val * 10 + digit;
322
1.45k
        } else {
323
588
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
588
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
378
                *result = PARSE_FAILURE;
328
378
                return 0;
329
378
            }
330
            // Returning here is slightly faster than breaking the loop.
331
210
            *result = PARSE_SUCCESS;
332
210
            return static_cast<T>(negative ? -val : val);
333
588
        }
334
2.16k
    }
335
24
    *result = PARSE_SUCCESS;
336
24
    return static_cast<T>(negative ? -val : val);
337
736
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
869
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
869
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
869
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
869
    UnsignedT val = 0;
286
869
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
869
    bool negative = false;
288
869
    int i = 0;
289
869
    switch (*s) {
290
160
    case '-':
291
160
        negative = true;
292
160
        max_val += 1;
293
160
        [[fallthrough]];
294
258
    case '+':
295
258
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
258
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
869
    }
302
303
    // This is the fast path where the string cannot overflow.
304
869
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
392
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
392
        return static_cast<T>(negative ? -val : val);
307
392
    }
308
309
477
    const T max_div_10 = max_val / 10;
310
477
    const T max_mod_10 = max_val % 10;
311
312
477
    int first = i;
313
2.93k
    for (; i < len; ++i) {
314
2.92k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
2.58k
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
2.58k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
124
                *result = PARSE_OVERFLOW;
319
124
                return negative ? -max_val : max_val;
320
124
            }
321
2.46k
            val = val * 10 + digit;
322
2.46k
        } else {
323
336
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
336
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
210
                *result = PARSE_FAILURE;
328
210
                return 0;
329
210
            }
330
            // Returning here is slightly faster than breaking the loop.
331
126
            *result = PARSE_SUCCESS;
332
126
            return static_cast<T>(negative ? -val : val);
333
336
        }
334
2.92k
    }
335
17
    *result = PARSE_SUCCESS;
336
17
    return static_cast<T>(negative ? -val : val);
337
477
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
1.06k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
1.06k
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
1.06k
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
1.06k
    UnsignedT val = 0;
286
1.06k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
1.06k
    bool negative = false;
288
1.06k
    int i = 0;
289
1.06k
    switch (*s) {
290
207
    case '-':
291
207
        negative = true;
292
207
        max_val += 1;
293
207
        [[fallthrough]];
294
256
    case '+':
295
256
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
256
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
1.06k
    }
302
303
    // This is the fast path where the string cannot overflow.
304
1.06k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
633
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
633
        return static_cast<T>(negative ? -val : val);
307
633
    }
308
309
433
    const T max_div_10 = max_val / 10;
310
433
    const T max_mod_10 = max_val % 10;
311
312
433
    int first = i;
313
4.66k
    for (; i < len; ++i) {
314
4.63k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
4.35k
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
4.35k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
118
                *result = PARSE_OVERFLOW;
319
118
                return negative ? -max_val : max_val;
320
118
            }
321
4.23k
            val = val * 10 + digit;
322
4.23k
        } else {
323
288
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
288
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
204
                *result = PARSE_FAILURE;
328
204
                return 0;
329
204
            }
330
            // Returning here is slightly faster than breaking the loop.
331
84
            *result = PARSE_SUCCESS;
332
84
            return static_cast<T>(negative ? -val : val);
333
288
        }
334
4.63k
    }
335
27
    *result = PARSE_SUCCESS;
336
27
    return static_cast<T>(negative ? -val : val);
337
433
}
_ZN5doris12StringParser22string_to_int_internalInEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
725
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
725
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
725
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
725
    UnsignedT val = 0;
286
725
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
725
    bool negative = false;
288
725
    int i = 0;
289
725
    switch (*s) {
290
0
    case '-':
291
0
        negative = true;
292
0
        max_val += 1;
293
0
        [[fallthrough]];
294
0
    case '+':
295
0
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
0
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
725
    }
302
303
    // This is the fast path where the string cannot overflow.
304
725
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
710
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
710
        return static_cast<T>(negative ? -val : val);
307
710
    }
308
309
15
    const T max_div_10 = max_val / 10;
310
15
    const T max_mod_10 = max_val % 10;
311
312
15
    int first = i;
313
360
    for (; i < len; ++i) {
314
357
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
351
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
351
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
6
                *result = PARSE_OVERFLOW;
319
6
                return negative ? -max_val : max_val;
320
6
            }
321
345
            val = val * 10 + digit;
322
345
        } else {
323
6
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
6
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
6
                *result = PARSE_FAILURE;
328
6
                return 0;
329
6
            }
330
            // Returning here is slightly faster than breaking the loop.
331
0
            *result = PARSE_SUCCESS;
332
0
            return static_cast<T>(negative ? -val : val);
333
6
        }
334
357
    }
335
3
    *result = PARSE_SUCCESS;
336
3
    return static_cast<T>(negative ? -val : val);
337
15
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser22string_to_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
20
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
20
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
20
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
20
    UnsignedT val = 0;
286
20
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
20
    bool negative = false;
288
20
    int i = 0;
289
20
    switch (*s) {
290
0
    case '-':
291
0
        negative = true;
292
0
        max_val += 1;
293
0
        [[fallthrough]];
294
0
    case '+':
295
0
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
0
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
20
    }
302
303
    // This is the fast path where the string cannot overflow.
304
20
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
20
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
20
        return static_cast<T>(negative ? -val : val);
307
20
    }
308
309
0
    const T max_div_10 = max_val / 10;
310
0
    const T max_mod_10 = max_val % 10;
311
312
0
    int first = i;
313
0
    for (; i < len; ++i) {
314
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
0
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
0
                *result = PARSE_OVERFLOW;
319
0
                return negative ? -max_val : max_val;
320
0
            }
321
0
            val = val * 10 + digit;
322
0
        } else {
323
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
0
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
0
                *result = PARSE_FAILURE;
328
0
                return 0;
329
0
            }
330
            // Returning here is slightly faster than breaking the loop.
331
0
            *result = PARSE_SUCCESS;
332
0
            return static_cast<T>(negative ? -val : val);
333
0
        }
334
0
    }
335
0
    *result = PARSE_SUCCESS;
336
0
    return static_cast<T>(negative ? -val : val);
337
0
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIjEET_PKciPNS0_11ParseResultE
_ZN5doris12StringParser22string_to_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
278
91
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
279
91
    if (UNLIKELY(len <= 0)) {
280
0
        *result = PARSE_FAILURE;
281
0
        return 0;
282
0
    }
283
284
91
    typedef typename std::make_unsigned<T>::type UnsignedT;
285
91
    UnsignedT val = 0;
286
91
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
287
91
    bool negative = false;
288
91
    int i = 0;
289
91
    switch (*s) {
290
8
    case '-':
291
8
        negative = true;
292
8
        max_val += 1;
293
8
        [[fallthrough]];
294
8
    case '+':
295
8
        ++i;
296
        // only one '+'/'-' char, so could return failure directly
297
8
        if (UNLIKELY(len == 1)) {
298
0
            *result = PARSE_FAILURE;
299
0
            return 0;
300
0
        }
301
91
    }
302
303
    // This is the fast path where the string cannot overflow.
304
91
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
305
53
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
306
53
        return static_cast<T>(negative ? -val : val);
307
53
    }
308
309
38
    const T max_div_10 = max_val / 10;
310
38
    const T max_mod_10 = max_val % 10;
311
312
38
    int first = i;
313
38
    for (; i < len; ++i) {
314
38
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
315
0
            T digit = s[i] - '0';
316
            // This is a tricky check to see if adding this digit will cause an overflow.
317
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
318
0
                *result = PARSE_OVERFLOW;
319
0
                return negative ? -max_val : max_val;
320
0
            }
321
0
            val = val * 10 + digit;
322
38
        } else {
323
38
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
324
38
                                         !is_float_suffix(s + i, len - i))))) {
325
                // Reject the string because either the first char was not a digit,
326
                // or the remaining chars are not all whitespace
327
38
                *result = PARSE_FAILURE;
328
38
                return 0;
329
38
            }
330
            // Returning here is slightly faster than breaking the loop.
331
0
            *result = PARSE_SUCCESS;
332
0
            return static_cast<T>(negative ? -val : val);
333
38
        }
334
38
    }
335
0
    *result = PARSE_SUCCESS;
336
0
    return static_cast<T>(negative ? -val : val);
337
38
}
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalItEET_PKciPNS0_11ParseResultE
Unexecuted instantiation: _ZN5doris12StringParser22string_to_int_internalIoEET_PKciPNS0_11ParseResultE
338
339
template <typename T>
340
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
341
2.66k
                                                ParseResult* result) {
342
2.66k
    if (UNLIKELY(len <= 0)) {
343
0
        *result = PARSE_FAILURE;
344
0
        return 0;
345
0
    }
346
347
2.66k
    T val = 0;
348
2.66k
    T max_val = std::numeric_limits<T>::max();
349
2.66k
    int i = 0;
350
351
2.66k
    typedef typename std::make_signed<T>::type signedT;
352
    // This is the fast path where the string cannot overflow.
353
2.66k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
354
879
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
355
879
        return val;
356
879
    }
357
358
1.78k
    const T max_div_10 = max_val / 10;
359
1.78k
    const T max_mod_10 = max_val % 10;
360
361
1.78k
    int first = i;
362
6.54k
    for (; i < len; ++i) {
363
6.49k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
364
4.99k
            T digit = s[i] - '0';
365
            // This is a tricky check to see if adding this digit will cause an overflow.
366
4.99k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
367
224
                *result = PARSE_OVERFLOW;
368
224
                return max_val;
369
224
            }
370
4.76k
            val = val * 10 + digit;
371
4.76k
        } else {
372
1.50k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
373
                // Reject the string because either the first char was not a digit,
374
                // or the remaining chars are not all whitespace
375
1.13k
                *result = PARSE_FAILURE;
376
1.13k
                return 0;
377
1.13k
            }
378
            // Returning here is slightly faster than breaking the loop.
379
378
            *result = PARSE_SUCCESS;
380
378
            return val;
381
1.50k
        }
382
6.49k
    }
383
49
    *result = PARSE_SUCCESS;
384
49
    return val;
385
1.78k
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
341
665
                                                ParseResult* result) {
342
665
    if (UNLIKELY(len <= 0)) {
343
0
        *result = PARSE_FAILURE;
344
0
        return 0;
345
0
    }
346
347
665
    T val = 0;
348
665
    T max_val = std::numeric_limits<T>::max();
349
665
    int i = 0;
350
351
665
    typedef typename std::make_signed<T>::type signedT;
352
    // This is the fast path where the string cannot overflow.
353
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
354
16
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
355
16
        return val;
356
16
    }
357
358
649
    const T max_div_10 = max_val / 10;
359
649
    const T max_mod_10 = max_val % 10;
360
361
649
    int first = i;
362
1.20k
    for (; i < len; ++i) {
363
1.18k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
364
609
            T digit = s[i] - '0';
365
            // This is a tricky check to see if adding this digit will cause an overflow.
366
609
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
367
56
                *result = PARSE_OVERFLOW;
368
56
                return max_val;
369
56
            }
370
553
            val = val * 10 + digit;
371
572
        } else {
372
572
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
373
                // Reject the string because either the first char was not a digit,
374
                // or the remaining chars are not all whitespace
375
446
                *result = PARSE_FAILURE;
376
446
                return 0;
377
446
            }
378
            // Returning here is slightly faster than breaking the loop.
379
126
            *result = PARSE_SUCCESS;
380
126
            return val;
381
572
        }
382
1.18k
    }
383
21
    *result = PARSE_SUCCESS;
384
21
    return val;
385
649
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
341
665
                                                ParseResult* result) {
342
665
    if (UNLIKELY(len <= 0)) {
343
0
        *result = PARSE_FAILURE;
344
0
        return 0;
345
0
    }
346
347
665
    T val = 0;
348
665
    T max_val = std::numeric_limits<T>::max();
349
665
    int i = 0;
350
351
665
    typedef typename std::make_signed<T>::type signedT;
352
    // This is the fast path where the string cannot overflow.
353
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
354
31
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
355
31
        return val;
356
31
    }
357
358
634
    const T max_div_10 = max_val / 10;
359
634
    const T max_mod_10 = max_val % 10;
360
361
634
    int first = i;
362
1.47k
    for (; i < len; ++i) {
363
1.46k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
364
896
            T digit = s[i] - '0';
365
            // This is a tricky check to see if adding this digit will cause an overflow.
366
896
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
367
56
                *result = PARSE_OVERFLOW;
368
56
                return max_val;
369
56
            }
370
840
            val = val * 10 + digit;
371
840
        } else {
372
564
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
373
                // Reject the string because either the first char was not a digit,
374
                // or the remaining chars are not all whitespace
375
438
                *result = PARSE_FAILURE;
376
438
                return 0;
377
438
            }
378
            // Returning here is slightly faster than breaking the loop.
379
126
            *result = PARSE_SUCCESS;
380
126
            return val;
381
564
        }
382
1.46k
    }
383
14
    *result = PARSE_SUCCESS;
384
14
    return val;
385
634
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
341
665
                                                ParseResult* result) {
342
665
    if (UNLIKELY(len <= 0)) {
343
0
        *result = PARSE_FAILURE;
344
0
        return 0;
345
0
    }
346
347
665
    T val = 0;
348
665
    T max_val = std::numeric_limits<T>::max();
349
665
    int i = 0;
350
351
665
    typedef typename std::make_signed<T>::type signedT;
352
    // This is the fast path where the string cannot overflow.
353
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
354
392
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
355
392
        return val;
356
392
    }
357
358
273
    const T max_div_10 = max_val / 10;
359
273
    const T max_mod_10 = max_val % 10;
360
361
273
    int first = i;
362
1.60k
    for (; i < len; ++i) {
363
1.59k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
364
1.38k
            T digit = s[i] - '0';
365
            // This is a tricky check to see if adding this digit will cause an overflow.
366
1.38k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
367
56
                *result = PARSE_OVERFLOW;
368
56
                return max_val;
369
56
            }
370
1.33k
            val = val * 10 + digit;
371
1.33k
        } else {
372
210
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
373
                // Reject the string because either the first char was not a digit,
374
                // or the remaining chars are not all whitespace
375
126
                *result = PARSE_FAILURE;
376
126
                return 0;
377
126
            }
378
            // Returning here is slightly faster than breaking the loop.
379
84
            *result = PARSE_SUCCESS;
380
84
            return val;
381
210
        }
382
1.59k
    }
383
7
    *result = PARSE_SUCCESS;
384
7
    return val;
385
273
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
341
665
                                                ParseResult* result) {
342
665
    if (UNLIKELY(len <= 0)) {
343
0
        *result = PARSE_FAILURE;
344
0
        return 0;
345
0
    }
346
347
665
    T val = 0;
348
665
    T max_val = std::numeric_limits<T>::max();
349
665
    int i = 0;
350
351
665
    typedef typename std::make_signed<T>::type signedT;
352
    // This is the fast path where the string cannot overflow.
353
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
354
440
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
355
440
        return val;
356
440
    }
357
358
225
    const T max_div_10 = max_val / 10;
359
225
    const T max_mod_10 = max_val % 10;
360
361
225
    int first = i;
362
2.26k
    for (; i < len; ++i) {
363
2.26k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
364
2.10k
            T digit = s[i] - '0';
365
            // This is a tricky check to see if adding this digit will cause an overflow.
366
2.10k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
367
56
                *result = PARSE_OVERFLOW;
368
56
                return max_val;
369
56
            }
370
2.04k
            val = val * 10 + digit;
371
2.04k
        } else {
372
162
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
373
                // Reject the string because either the first char was not a digit,
374
                // or the remaining chars are not all whitespace
375
120
                *result = PARSE_FAILURE;
376
120
                return 0;
377
120
            }
378
            // Returning here is slightly faster than breaking the loop.
379
42
            *result = PARSE_SUCCESS;
380
42
            return val;
381
162
        }
382
2.26k
    }
383
7
    *result = PARSE_SUCCESS;
384
7
    return val;
385
225
}
386
387
template <typename T>
388
T StringParser::string_to_int_internal(const char* __restrict s, int len, int base,
389
53.6k
                                       ParseResult* result) {
390
53.6k
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
53.6k
    UnsignedT val = 0;
392
53.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
53.6k
    bool negative = false;
394
53.6k
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
53.6k
    int i = 0;
399
53.6k
    switch (*s) {
400
14.3k
    case '-':
401
14.3k
        negative = true;
402
14.3k
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
14.3k
        [[fallthrough]];
404
14.6k
    case '+':
405
14.6k
        i = 1;
406
53.6k
    }
407
408
53.6k
    const T max_div_base = max_val / base;
409
53.6k
    const T max_mod_base = max_val % base;
410
411
53.6k
    int first = i;
412
120k
    for (; i < len; ++i) {
413
118k
        T digit;
414
118k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
81.6k
            digit = s[i] - '0';
416
81.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
639
            digit = (s[i] - 'a' + 10);
418
36.4k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
98
            digit = (s[i] - 'A' + 10);
420
36.3k
        } else {
421
36.3k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
24.0k
                *result = PARSE_FAILURE;
425
24.0k
                return 0;
426
24.0k
            }
427
            // skip trailing whitespace.
428
12.2k
            break;
429
36.3k
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
82.4k
        if (digit >= base) {
433
392
            break;
434
392
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
82.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
14.8k
            *result = PARSE_OVERFLOW;
439
14.8k
            return static_cast<T>(negative ? -max_val : max_val);
440
14.8k
        }
441
67.2k
        val = val * base + digit;
442
67.2k
    }
443
14.7k
    *result = PARSE_SUCCESS;
444
14.7k
    return static_cast<T>(negative ? -val : val);
445
53.6k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKciiPNS0_11ParseResultE
Line
Count
Source
389
51.0k
                                       ParseResult* result) {
390
51.0k
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
51.0k
    UnsignedT val = 0;
392
51.0k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
51.0k
    bool negative = false;
394
51.0k
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
51.0k
    int i = 0;
399
51.0k
    switch (*s) {
400
13.7k
    case '-':
401
13.7k
        negative = true;
402
13.7k
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
13.7k
        [[fallthrough]];
404
13.8k
    case '+':
405
13.8k
        i = 1;
406
51.0k
    }
407
408
51.0k
    const T max_div_base = max_val / base;
409
51.0k
    const T max_mod_base = max_val % base;
410
411
51.0k
    int first = i;
412
108k
    for (; i < len; ++i) {
413
107k
        T digit;
414
107k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
72.1k
            digit = s[i] - '0';
416
72.1k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
539
            digit = (s[i] - 'a' + 10);
418
34.3k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
98
            digit = (s[i] - 'A' + 10);
420
34.2k
        } else {
421
34.2k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
22.8k
                *result = PARSE_FAILURE;
425
22.8k
                return 0;
426
22.8k
            }
427
            // skip trailing whitespace.
428
11.3k
            break;
429
34.2k
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
72.7k
        if (digit >= base) {
433
392
            break;
434
392
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
72.4k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
14.5k
            *result = PARSE_OVERFLOW;
439
14.5k
            return static_cast<T>(negative ? -max_val : max_val);
440
14.5k
        }
441
57.8k
        val = val * base + digit;
442
57.8k
    }
443
13.6k
    *result = PARSE_SUCCESS;
444
13.6k
    return static_cast<T>(negative ? -val : val);
445
51.0k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciiPNS0_11ParseResultE
Line
Count
Source
389
924
                                       ParseResult* result) {
390
924
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
924
    UnsignedT val = 0;
392
924
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
924
    bool negative = false;
394
924
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
924
    int i = 0;
399
924
    switch (*s) {
400
203
    case '-':
401
203
        negative = true;
402
203
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
203
        [[fallthrough]];
404
252
    case '+':
405
252
        i = 1;
406
924
    }
407
408
924
    const T max_div_base = max_val / base;
409
924
    const T max_mod_base = max_val % base;
410
411
924
    int first = i;
412
2.59k
    for (; i < len; ++i) {
413
2.54k
        T digit;
414
2.54k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
1.68k
            digit = s[i] - '0';
416
1.68k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
98
            digit = (s[i] - 'a' + 10);
418
756
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
0
            digit = (s[i] - 'A' + 10);
420
756
        } else {
421
756
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
420
                *result = PARSE_FAILURE;
425
420
                return 0;
426
420
            }
427
            // skip trailing whitespace.
428
336
            break;
429
756
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
1.78k
        if (digit >= base) {
433
0
            break;
434
0
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
1.78k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
112
            *result = PARSE_OVERFLOW;
439
112
            return static_cast<T>(negative ? -max_val : max_val);
440
112
        }
441
1.67k
        val = val * base + digit;
442
1.67k
    }
443
392
    *result = PARSE_SUCCESS;
444
392
    return static_cast<T>(negative ? -val : val);
445
924
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciiPNS0_11ParseResultE
Line
Count
Source
389
833
                                       ParseResult* result) {
390
833
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
833
    UnsignedT val = 0;
392
833
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
833
    bool negative = false;
394
833
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
833
    int i = 0;
399
833
    switch (*s) {
400
154
    case '-':
401
154
        negative = true;
402
154
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
154
        [[fallthrough]];
404
252
    case '+':
405
252
        i = 1;
406
833
    }
407
408
833
    const T max_div_base = max_val / base;
409
833
    const T max_mod_base = max_val % base;
410
411
833
    int first = i;
412
3.55k
    for (; i < len; ++i) {
413
3.50k
        T digit;
414
3.50k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
2.83k
            digit = s[i] - '0';
416
2.83k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
0
            digit = (s[i] - 'a' + 10);
418
672
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
0
            digit = (s[i] - 'A' + 10);
420
672
        } else {
421
672
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
378
                *result = PARSE_FAILURE;
425
378
                return 0;
426
378
            }
427
            // skip trailing whitespace.
428
294
            break;
429
672
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
2.83k
        if (digit >= base) {
433
0
            break;
434
0
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
2.83k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
112
            *result = PARSE_OVERFLOW;
439
112
            return static_cast<T>(negative ? -max_val : max_val);
440
112
        }
441
2.72k
        val = val * base + digit;
442
2.72k
    }
443
343
    *result = PARSE_SUCCESS;
444
343
    return static_cast<T>(negative ? -val : val);
445
833
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciiPNS0_11ParseResultE
Line
Count
Source
389
833
                                       ParseResult* result) {
390
833
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
833
    UnsignedT val = 0;
392
833
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
833
    bool negative = false;
394
833
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
833
    int i = 0;
399
833
    switch (*s) {
400
203
    case '-':
401
203
        negative = true;
402
203
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
203
        [[fallthrough]];
404
252
    case '+':
405
252
        i = 1;
406
833
    }
407
408
833
    const T max_div_base = max_val / base;
409
833
    const T max_mod_base = max_val % base;
410
411
833
    int first = i;
412
5.74k
    for (; i < len; ++i) {
413
5.69k
        T digit;
414
5.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
5.01k
            digit = s[i] - '0';
416
5.01k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
0
            digit = (s[i] - 'a' + 10);
418
672
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
0
            digit = (s[i] - 'A' + 10);
420
672
        } else {
421
672
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
378
                *result = PARSE_FAILURE;
425
378
                return 0;
426
378
            }
427
            // skip trailing whitespace.
428
294
            break;
429
672
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
5.01k
        if (digit >= base) {
433
0
            break;
434
0
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
5.01k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
112
            *result = PARSE_OVERFLOW;
439
112
            return static_cast<T>(negative ? -max_val : max_val);
440
112
        }
441
4.90k
        val = val * base + digit;
442
4.90k
    }
443
343
    *result = PARSE_SUCCESS;
444
343
    return static_cast<T>(negative ? -val : val);
445
833
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKciiPNS0_11ParseResultE
Line
Count
Source
389
1
                                       ParseResult* result) {
390
1
    typedef typename std::make_unsigned<T>::type UnsignedT;
391
1
    UnsignedT val = 0;
392
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
393
1
    bool negative = false;
394
1
    if (UNLIKELY(len <= 0)) {
395
0
        *result = PARSE_FAILURE;
396
0
        return 0;
397
0
    }
398
1
    int i = 0;
399
1
    switch (*s) {
400
0
    case '-':
401
0
        negative = true;
402
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
403
0
        [[fallthrough]];
404
0
    case '+':
405
0
        i = 1;
406
1
    }
407
408
1
    const T max_div_base = max_val / base;
409
1
    const T max_mod_base = max_val % base;
410
411
1
    int first = i;
412
3
    for (; i < len; ++i) {
413
2
        T digit;
414
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
415
0
            digit = s[i] - '0';
416
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
417
2
            digit = (s[i] - 'a' + 10);
418
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
419
0
            digit = (s[i] - 'A' + 10);
420
0
        } else {
421
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
422
                // Reject the string because either the first char was not an alpha/digit,
423
                // or the remaining chars are not all whitespace
424
0
                *result = PARSE_FAILURE;
425
0
                return 0;
426
0
            }
427
            // skip trailing whitespace.
428
0
            break;
429
0
        }
430
431
        // Bail, if we encounter a digit that is not available in base.
432
2
        if (digit >= base) {
433
0
            break;
434
0
        }
435
436
        // This is a tricky check to see if adding this digit will cause an overflow.
437
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
438
0
            *result = PARSE_OVERFLOW;
439
0
            return static_cast<T>(negative ? -max_val : max_val);
440
0
        }
441
2
        val = val * base + digit;
442
2
    }
443
1
    *result = PARSE_SUCCESS;
444
1
    return static_cast<T>(negative ? -val : val);
445
1
}
446
447
template <typename T>
448
4.23k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
4.23k
    T val = 0;
450
4.23k
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
4.23k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
3.14k
        val = s[0] - '0';
457
3.14k
    } else {
458
1.09k
        *result = PARSE_FAILURE;
459
1.09k
        return 0;
460
1.09k
    }
461
9.83k
    for (int i = 1; i < len; ++i) {
462
7.30k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
6.69k
            T digit = s[i] - '0';
464
6.69k
            val = val * 10 + digit;
465
6.69k
        } else {
466
605
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
605
                          !is_float_suffix(s + i, len - i)))) {
468
16
                *result = PARSE_FAILURE;
469
16
                return 0;
470
16
            }
471
589
            *result = PARSE_SUCCESS;
472
589
            return val;
473
605
        }
474
7.30k
    }
475
2.53k
    *result = PARSE_SUCCESS;
476
2.53k
    return val;
477
3.14k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
448
1.50k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
1.50k
    T val = 0;
450
1.50k
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
1.50k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
1.45k
        val = s[0] - '0';
457
1.45k
    } else {
458
50
        *result = PARSE_FAILURE;
459
50
        return 0;
460
50
    }
461
2.72k
    for (int i = 1; i < len; ++i) {
462
1.27k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
1.27k
            T digit = s[i] - '0';
464
1.27k
            val = val * 10 + digit;
465
1.27k
        } else {
466
0
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
0
                          !is_float_suffix(s + i, len - i)))) {
468
0
                *result = PARSE_FAILURE;
469
0
                return 0;
470
0
            }
471
0
            *result = PARSE_SUCCESS;
472
0
            return val;
473
0
        }
474
1.27k
    }
475
1.45k
    *result = PARSE_SUCCESS;
476
1.45k
    return val;
477
1.45k
}
_ZN5doris12StringParser25string_to_int_no_overflowItEET_PKciPNS0_11ParseResultE
Line
Count
Source
448
143
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
143
    T val = 0;
450
143
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
143
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
119
        val = s[0] - '0';
457
119
    } else {
458
24
        *result = PARSE_FAILURE;
459
24
        return 0;
460
24
    }
461
154
    for (int i = 1; i < len; ++i) {
462
119
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
35
            T digit = s[i] - '0';
464
35
            val = val * 10 + digit;
465
84
        } else {
466
84
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
84
                          !is_float_suffix(s + i, len - i)))) {
468
0
                *result = PARSE_FAILURE;
469
0
                return 0;
470
0
            }
471
84
            *result = PARSE_SUCCESS;
472
84
            return val;
473
84
        }
474
119
    }
475
35
    *result = PARSE_SUCCESS;
476
35
    return val;
477
119
}
_ZN5doris12StringParser25string_to_int_no_overflowIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
448
784
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
784
    T val = 0;
450
784
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
784
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
280
        val = s[0] - '0';
457
504
    } else {
458
504
        *result = PARSE_FAILURE;
459
504
        return 0;
460
504
    }
461
641
    for (int i = 1; i < len; ++i) {
462
571
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
361
            T digit = s[i] - '0';
464
361
            val = val * 10 + digit;
465
361
        } else {
466
210
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
210
                          !is_float_suffix(s + i, len - i)))) {
468
0
                *result = PARSE_FAILURE;
469
0
                return 0;
470
0
            }
471
210
            *result = PARSE_SUCCESS;
472
210
            return val;
473
210
        }
474
571
    }
475
70
    *result = PARSE_SUCCESS;
476
70
    return val;
477
280
}
_ZN5doris12StringParser25string_to_int_no_overflowImEET_PKciPNS0_11ParseResultE
Line
Count
Source
448
1.09k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
1.09k
    T val = 0;
450
1.09k
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
1.09k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
575
        val = s[0] - '0';
457
575
    } else {
458
518
        *result = PARSE_FAILURE;
459
518
        return 0;
460
518
    }
461
2.96k
    for (int i = 1; i < len; ++i) {
462
2.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
2.38k
            T digit = s[i] - '0';
464
2.38k
            val = val * 10 + digit;
465
2.38k
        } else {
466
311
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
311
                          !is_float_suffix(s + i, len - i)))) {
468
16
                *result = PARSE_FAILURE;
469
16
                return 0;
470
16
            }
471
295
            *result = PARSE_SUCCESS;
472
295
            return val;
473
311
        }
474
2.69k
    }
475
264
    *result = PARSE_SUCCESS;
476
264
    return val;
477
575
}
_ZN5doris12StringParser25string_to_int_no_overflowIoEET_PKciPNS0_11ParseResultE
Line
Count
Source
448
710
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
449
710
    T val = 0;
450
710
    if (UNLIKELY(len == 0)) {
451
0
        *result = PARSE_SUCCESS;
452
0
        return val;
453
0
    }
454
    // Factor out the first char for error handling speeds up the loop.
455
710
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
456
710
        val = s[0] - '0';
457
710
    } else {
458
0
        *result = PARSE_FAILURE;
459
0
        return 0;
460
0
    }
461
3.35k
    for (int i = 1; i < len; ++i) {
462
2.64k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
463
2.64k
            T digit = s[i] - '0';
464
2.64k
            val = val * 10 + digit;
465
2.64k
        } else {
466
0
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
467
0
                          !is_float_suffix(s + i, len - i)))) {
468
0
                *result = PARSE_FAILURE;
469
0
                return 0;
470
0
            }
471
0
            *result = PARSE_SUCCESS;
472
0
            return val;
473
0
        }
474
2.64k
    }
475
710
    *result = PARSE_SUCCESS;
476
710
    return val;
477
710
}
Unexecuted instantiation: _ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEEEET_PKciPNS0_11ParseResultE
478
479
template <typename T>
480
19.1k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
481
19.1k
    int i = 0;
482
    // skip leading spaces
483
60.0k
    for (; i < len; ++i) {
484
60.0k
        if (!is_whitespace(s[i])) {
485
19.1k
            break;
486
19.1k
        }
487
60.0k
    }
488
489
    // skip back spaces
490
19.1k
    int j = len - 1;
491
60.3k
    for (; j >= i; j--) {
492
60.3k
        if (!is_whitespace(s[j])) {
493
19.1k
            break;
494
19.1k
        }
495
60.3k
    }
496
497
    // skip leading '+', from_chars can handle '-'
498
19.1k
    if (i < len && s[i] == '+') {
499
5.29k
        i++;
500
5.29k
    }
501
19.1k
    if (UNLIKELY(i > j)) {
502
3
        *result = PARSE_FAILURE;
503
3
        return 0;
504
3
    }
505
506
    // Use double here to not lose precision while accumulating the result
507
19.1k
    double val = 0;
508
19.1k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
509
510
19.1k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
511
15.3k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
512
886
            auto contain_inf = false;
513
1.27k
            for (int k = i; k < j + 1; k++) {
514
1.27k
                if (s[k] == 'i' || s[k] == 'I') {
515
882
                    contain_inf = true;
516
882
                    break;
517
882
                }
518
1.27k
            }
519
520
886
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
521
14.4k
        } else {
522
14.4k
            *result = PARSE_SUCCESS;
523
14.4k
        }
524
15.3k
        return val;
525
15.3k
    } else {
526
3.86k
        *result = PARSE_FAILURE;
527
3.86k
    }
528
3.86k
    return 0;
529
19.1k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
8.25k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
481
8.25k
    int i = 0;
482
    // skip leading spaces
483
28.6k
    for (; i < len; ++i) {
484
28.6k
        if (!is_whitespace(s[i])) {
485
8.25k
            break;
486
8.25k
        }
487
28.6k
    }
488
489
    // skip back spaces
490
8.25k
    int j = len - 1;
491
28.8k
    for (; j >= i; j--) {
492
28.8k
        if (!is_whitespace(s[j])) {
493
8.25k
            break;
494
8.25k
        }
495
28.8k
    }
496
497
    // skip leading '+', from_chars can handle '-'
498
8.25k
    if (i < len && s[i] == '+') {
499
2.64k
        i++;
500
2.64k
    }
501
8.25k
    if (UNLIKELY(i > j)) {
502
0
        *result = PARSE_FAILURE;
503
0
        return 0;
504
0
    }
505
506
    // Use double here to not lose precision while accumulating the result
507
8.25k
    double val = 0;
508
8.25k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
509
510
8.25k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
511
6.32k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
512
443
            auto contain_inf = false;
513
629
            for (int k = i; k < j + 1; k++) {
514
627
                if (s[k] == 'i' || s[k] == 'I') {
515
441
                    contain_inf = true;
516
441
                    break;
517
441
                }
518
627
            }
519
520
443
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
521
5.88k
        } else {
522
5.88k
            *result = PARSE_SUCCESS;
523
5.88k
        }
524
6.32k
        return val;
525
6.32k
    } else {
526
1.92k
        *result = PARSE_FAILURE;
527
1.92k
    }
528
1.92k
    return 0;
529
8.25k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
480
10.9k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
481
10.9k
    int i = 0;
482
    // skip leading spaces
483
31.3k
    for (; i < len; ++i) {
484
31.3k
        if (!is_whitespace(s[i])) {
485
10.9k
            break;
486
10.9k
        }
487
31.3k
    }
488
489
    // skip back spaces
490
10.9k
    int j = len - 1;
491
31.5k
    for (; j >= i; j--) {
492
31.5k
        if (!is_whitespace(s[j])) {
493
10.9k
            break;
494
10.9k
        }
495
31.5k
    }
496
497
    // skip leading '+', from_chars can handle '-'
498
10.9k
    if (i < len && s[i] == '+') {
499
2.64k
        i++;
500
2.64k
    }
501
10.9k
    if (UNLIKELY(i > j)) {
502
3
        *result = PARSE_FAILURE;
503
3
        return 0;
504
3
    }
505
506
    // Use double here to not lose precision while accumulating the result
507
10.9k
    double val = 0;
508
10.9k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
509
510
10.9k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
511
9.00k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
512
443
            auto contain_inf = false;
513
647
            for (int k = i; k < j + 1; k++) {
514
645
                if (s[k] == 'i' || s[k] == 'I') {
515
441
                    contain_inf = true;
516
441
                    break;
517
441
                }
518
645
            }
519
520
443
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
521
8.56k
        } else {
522
8.56k
            *result = PARSE_SUCCESS;
523
8.56k
        }
524
9.00k
        return val;
525
9.00k
    } else {
526
1.93k
        *result = PARSE_FAILURE;
527
1.93k
    }
528
1.93k
    return 0;
529
10.9k
}
530
531
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
532
618
                                                  ParseResult* result) {
533
618
    *result = PARSE_SUCCESS;
534
535
618
    if (len >= 4 && (s[0] == 't' || s[0] == 'T')) {
536
170
        bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') &&
537
170
                     (s[3] == 'e' || s[3] == 'E');
538
170
        if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) {
539
58
            return true;
540
58
        }
541
448
    } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) {
542
170
        bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') &&
543
170
                     (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E');
544
170
        if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) {
545
52
            return false;
546
52
        }
547
170
    }
548
549
508
    *result = PARSE_FAILURE;
550
508
    return false;
551
618
}
552
553
template <PrimitiveType P, typename T, typename DecimalType>
554
T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision,
555
218
                                  int type_scale, ParseResult* result) {
556
218
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
218
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
218
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
218
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
218
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
218
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
218
    bool is_negative = false;
576
218
    if (len > 0) {
577
218
        switch (*s) {
578
32
        case '-':
579
32
            is_negative = true;
580
32
            [[fallthrough]];
581
32
        case '+':
582
32
            ++s;
583
32
            --len;
584
218
        }
585
218
    }
586
587
    // Ignore leading zeros.
588
218
    bool found_value = false;
589
244
    while (len > 0 && UNLIKELY(*s == '0')) {
590
26
        found_value = true;
591
26
        ++s;
592
26
        --len;
593
26
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
218
    int scale = 0;
599
218
    int found_dot = 0;
600
218
    if (len > 0 && *s == '.') {
601
9
        found_dot = 1;
602
9
        ++s;
603
9
        --len;
604
14
        while (len > 0 && UNLIKELY(*s == '0')) {
605
5
            found_value = true;
606
5
            ++scale;
607
5
            ++s;
608
5
            --len;
609
5
        }
610
9
    }
611
612
218
    int precision = 0;
613
218
    int max_digit = type_precision - type_scale;
614
218
    int cur_digit = 0;
615
218
    bool found_exponent = false;
616
218
    int8_t exponent = 0;
617
218
    T value = 0;
618
218
    bool has_round = false;
619
4.07k
    for (int i = 0; i < len; ++i) {
620
3.88k
        const char& c = s[i];
621
3.88k
        if (LIKELY('0' <= c && c <= '9')) {
622
3.68k
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
3.68k
            if (LIKELY(type_precision > precision) && !has_round) {
628
3.64k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
3.64k
                ++precision;
630
3.64k
                scale += found_dot;
631
3.64k
                cur_digit = precision - scale;
632
3.64k
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
34
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
20
                if (c > '4') {
640
8
                    value += 1;
641
8
                }
642
20
                has_round = true;
643
20
                continue;
644
20
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
3.66k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
3.66k
        } else if (c == '.' && LIKELY(!found_dot)) {
649
179
            found_dot = 1;
650
179
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
21
        } else {
661
21
            if (value == 0) {
662
9
                *result = StringParser::PARSE_FAILURE;
663
9
                return 0;
664
9
            }
665
            // here to handle
666
12
            *result = StringParser::PARSE_SUCCESS;
667
12
            if (type_scale >= scale) {
668
12
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
12
                if (!is_numeric_ascii(c)) {
673
12
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
12
                    return is_negative ? T(-value) : T(value);
682
12
                }
683
12
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
12
        }
687
3.88k
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
197
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
197
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
197
        scale -= exponent;
702
197
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
197
    if (scale > precision) {
706
3
        precision = scale;
707
3
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
130
    *result = StringParser::PARSE_SUCCESS;
712
130
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
19
        *result = StringParser::PARSE_OVERFLOW;
714
19
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
10
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
10
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
10
            return value;
719
10
        }
720
178
    } else if (UNLIKELY(scale > type_scale)) {
721
29
        *result = StringParser::PARSE_UNDERFLOW;
722
29
        int shift = scale - type_scale;
723
29
        T divisor = get_scale_multiplier<T>(shift);
724
29
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
29
        } else {
727
29
            T remainder = value % divisor;
728
29
            value /= divisor;
729
29
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
25
                value += 1;
731
25
            }
732
29
        }
733
29
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
149
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
187
    if (type_scale > scale) {
739
88
        value *= get_scale_multiplier<T>(type_scale - scale);
740
88
    }
741
742
187
    return is_negative ? T(-value) : T(value);
743
67
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EiNS_10vectorized7DecimalIiEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
555
22
                                  int type_scale, ParseResult* result) {
556
22
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
22
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
22
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
22
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
22
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
22
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
22
    bool is_negative = false;
576
22
    if (len > 0) {
577
22
        switch (*s) {
578
3
        case '-':
579
3
            is_negative = true;
580
3
            [[fallthrough]];
581
3
        case '+':
582
3
            ++s;
583
3
            --len;
584
22
        }
585
22
    }
586
587
    // Ignore leading zeros.
588
22
    bool found_value = false;
589
22
    while (len > 0 && UNLIKELY(*s == '0')) {
590
0
        found_value = true;
591
0
        ++s;
592
0
        --len;
593
0
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
22
    int scale = 0;
599
22
    int found_dot = 0;
600
22
    if (len > 0 && *s == '.') {
601
0
        found_dot = 1;
602
0
        ++s;
603
0
        --len;
604
0
        while (len > 0 && UNLIKELY(*s == '0')) {
605
0
            found_value = true;
606
0
            ++scale;
607
0
            ++s;
608
0
            --len;
609
0
        }
610
0
    }
611
612
22
    int precision = 0;
613
22
    int max_digit = type_precision - type_scale;
614
22
    int cur_digit = 0;
615
22
    bool found_exponent = false;
616
22
    int8_t exponent = 0;
617
22
    T value = 0;
618
22
    bool has_round = false;
619
256
    for (int i = 0; i < len; ++i) {
620
240
        const char& c = s[i];
621
240
        if (LIKELY('0' <= c && c <= '9')) {
622
212
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
212
            if (LIKELY(type_precision > precision) && !has_round) {
628
198
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
198
                ++precision;
630
198
                scale += found_dot;
631
198
                cur_digit = precision - scale;
632
198
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
14
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
4
                if (c > '4') {
640
0
                    value += 1;
641
0
                }
642
4
                has_round = true;
643
4
                continue;
644
10
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
208
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
208
        } else if (c == '.' && LIKELY(!found_dot)) {
649
22
            found_dot = 1;
650
22
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
6
        } else {
661
6
            if (value == 0) {
662
0
                *result = StringParser::PARSE_FAILURE;
663
0
                return 0;
664
0
            }
665
            // here to handle
666
6
            *result = StringParser::PARSE_SUCCESS;
667
6
            if (type_scale >= scale) {
668
6
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
6
                if (!is_numeric_ascii(c)) {
673
6
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
6
                    return is_negative ? T(-value) : T(value);
682
6
                }
683
6
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
6
        }
687
240
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
16
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
16
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
16
        scale -= exponent;
702
16
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
16
    if (scale > precision) {
706
0
        precision = scale;
707
0
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
16
    *result = StringParser::PARSE_SUCCESS;
712
16
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
4
        *result = StringParser::PARSE_OVERFLOW;
714
4
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
4
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
4
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
4
            return value;
719
4
        }
720
12
    } else if (UNLIKELY(scale > type_scale)) {
721
4
        *result = StringParser::PARSE_UNDERFLOW;
722
4
        int shift = scale - type_scale;
723
4
        T divisor = get_scale_multiplier<T>(shift);
724
4
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
4
        } else {
727
4
            T remainder = value % divisor;
728
4
            value /= divisor;
729
4
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
0
                value += 1;
731
0
            }
732
4
        }
733
4
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
8
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
12
    if (type_scale > scale) {
739
0
        value *= get_scale_multiplier<T>(type_scale - scale);
740
0
    }
741
742
12
    return is_negative ? T(-value) : T(value);
743
16
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29ElNS_10vectorized7DecimalIlEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
555
22
                                  int type_scale, ParseResult* result) {
556
22
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
22
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
22
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
22
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
22
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
22
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
22
    bool is_negative = false;
576
22
    if (len > 0) {
577
22
        switch (*s) {
578
3
        case '-':
579
3
            is_negative = true;
580
3
            [[fallthrough]];
581
3
        case '+':
582
3
            ++s;
583
3
            --len;
584
22
        }
585
22
    }
586
587
    // Ignore leading zeros.
588
22
    bool found_value = false;
589
22
    while (len > 0 && UNLIKELY(*s == '0')) {
590
0
        found_value = true;
591
0
        ++s;
592
0
        --len;
593
0
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
22
    int scale = 0;
599
22
    int found_dot = 0;
600
22
    if (len > 0 && *s == '.') {
601
0
        found_dot = 1;
602
0
        ++s;
603
0
        --len;
604
0
        while (len > 0 && UNLIKELY(*s == '0')) {
605
0
            found_value = true;
606
0
            ++scale;
607
0
            ++s;
608
0
            --len;
609
0
        }
610
0
    }
611
612
22
    int precision = 0;
613
22
    int max_digit = type_precision - type_scale;
614
22
    int cur_digit = 0;
615
22
    bool found_exponent = false;
616
22
    int8_t exponent = 0;
617
22
    T value = 0;
618
22
    bool has_round = false;
619
396
    for (int i = 0; i < len; ++i) {
620
374
        const char& c = s[i];
621
374
        if (LIKELY('0' <= c && c <= '9')) {
622
352
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
352
            if (LIKELY(type_precision > precision) && !has_round) {
628
344
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
344
                ++precision;
630
344
                scale += found_dot;
631
344
                cur_digit = precision - scale;
632
344
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
8
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
4
                if (c > '4') {
640
4
                    value += 1;
641
4
                }
642
4
                has_round = true;
643
4
                continue;
644
4
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
348
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
348
        } else if (c == '.' && LIKELY(!found_dot)) {
649
22
            found_dot = 1;
650
22
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
0
        } else {
661
0
            if (value == 0) {
662
0
                *result = StringParser::PARSE_FAILURE;
663
0
                return 0;
664
0
            }
665
            // here to handle
666
0
            *result = StringParser::PARSE_SUCCESS;
667
0
            if (type_scale >= scale) {
668
0
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
0
                if (!is_numeric_ascii(c)) {
673
0
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
0
                    return is_negative ? T(-value) : T(value);
682
0
                }
683
0
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
0
        }
687
374
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
22
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
22
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
22
        scale -= exponent;
702
22
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
22
    if (scale > precision) {
706
0
        precision = scale;
707
0
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
22
    *result = StringParser::PARSE_SUCCESS;
712
22
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
4
        *result = StringParser::PARSE_OVERFLOW;
714
4
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
4
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
4
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
4
            return value;
719
4
        }
720
18
    } else if (UNLIKELY(scale > type_scale)) {
721
0
        *result = StringParser::PARSE_UNDERFLOW;
722
0
        int shift = scale - type_scale;
723
0
        T divisor = get_scale_multiplier<T>(shift);
724
0
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
0
        } else {
727
0
            T remainder = value % divisor;
728
0
            value /= divisor;
729
0
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
0
                value += 1;
731
0
            }
732
0
        }
733
0
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
18
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
18
    if (type_scale > scale) {
739
4
        value *= get_scale_multiplier<T>(type_scale - scale);
740
4
    }
741
742
18
    return is_negative ? T(-value) : T(value);
743
22
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EnNS_10vectorized12Decimal128V3EEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
555
28
                                  int type_scale, ParseResult* result) {
556
28
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
28
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
28
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
28
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
28
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
28
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
28
    bool is_negative = false;
576
28
    if (len > 0) {
577
28
        switch (*s) {
578
1
        case '-':
579
1
            is_negative = true;
580
1
            [[fallthrough]];
581
1
        case '+':
582
1
            ++s;
583
1
            --len;
584
28
        }
585
28
    }
586
587
    // Ignore leading zeros.
588
28
    bool found_value = false;
589
32
    while (len > 0 && UNLIKELY(*s == '0')) {
590
4
        found_value = true;
591
4
        ++s;
592
4
        --len;
593
4
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
28
    int scale = 0;
599
28
    int found_dot = 0;
600
28
    if (len > 0 && *s == '.') {
601
0
        found_dot = 1;
602
0
        ++s;
603
0
        --len;
604
0
        while (len > 0 && UNLIKELY(*s == '0')) {
605
0
            found_value = true;
606
0
            ++scale;
607
0
            ++s;
608
0
            --len;
609
0
        }
610
0
    }
611
612
28
    int precision = 0;
613
28
    int max_digit = type_precision - type_scale;
614
28
    int cur_digit = 0;
615
28
    bool found_exponent = false;
616
28
    int8_t exponent = 0;
617
28
    T value = 0;
618
28
    bool has_round = false;
619
1.07k
    for (int i = 0; i < len; ++i) {
620
1.04k
        const char& c = s[i];
621
1.04k
        if (LIKELY('0' <= c && c <= '9')) {
622
1.01k
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
1.01k
            if (LIKELY(type_precision > precision) && !has_round) {
628
1.01k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
1.01k
                ++precision;
630
1.01k
                scale += found_dot;
631
1.01k
                cur_digit = precision - scale;
632
1.01k
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
4
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
4
                if (c > '4') {
640
4
                    value += 1;
641
4
                }
642
4
                has_round = true;
643
4
                continue;
644
4
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
1.01k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
1.01k
        } else if (c == '.' && LIKELY(!found_dot)) {
649
26
            found_dot = 1;
650
26
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
0
        } else {
661
0
            if (value == 0) {
662
0
                *result = StringParser::PARSE_FAILURE;
663
0
                return 0;
664
0
            }
665
            // here to handle
666
0
            *result = StringParser::PARSE_SUCCESS;
667
0
            if (type_scale >= scale) {
668
0
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
0
                if (!is_numeric_ascii(c)) {
673
0
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
0
                    return is_negative ? T(-value) : T(value);
682
0
                }
683
0
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
0
        }
687
1.04k
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
28
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
28
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
28
        scale -= exponent;
702
28
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
28
    if (scale > precision) {
706
0
        precision = scale;
707
0
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
28
    *result = StringParser::PARSE_SUCCESS;
712
28
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
2
        *result = StringParser::PARSE_OVERFLOW;
714
2
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
2
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
2
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
2
            return value;
719
2
        }
720
26
    } else if (UNLIKELY(scale > type_scale)) {
721
8
        *result = StringParser::PARSE_UNDERFLOW;
722
8
        int shift = scale - type_scale;
723
8
        T divisor = get_scale_multiplier<T>(shift);
724
8
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
8
        } else {
727
8
            T remainder = value % divisor;
728
8
            value /= divisor;
729
8
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
8
                value += 1;
731
8
            }
732
8
        }
733
8
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
18
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
26
    if (type_scale > scale) {
739
4
        value *= get_scale_multiplier<T>(type_scale - scale);
740
4
    }
741
742
26
    return is_negative ? T(-value) : T(value);
743
28
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EN4wide7integerILm256EiEENS_10vectorized7DecimalIS5_EEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
555
1
                                  int type_scale, ParseResult* result) {
556
1
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
1
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
1
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
1
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
1
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
1
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
1
    bool is_negative = false;
576
1
    if (len > 0) {
577
1
        switch (*s) {
578
0
        case '-':
579
0
            is_negative = true;
580
0
            [[fallthrough]];
581
0
        case '+':
582
0
            ++s;
583
0
            --len;
584
1
        }
585
1
    }
586
587
    // Ignore leading zeros.
588
1
    bool found_value = false;
589
1
    while (len > 0 && UNLIKELY(*s == '0')) {
590
0
        found_value = true;
591
0
        ++s;
592
0
        --len;
593
0
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
1
    int scale = 0;
599
1
    int found_dot = 0;
600
1
    if (len > 0 && *s == '.') {
601
0
        found_dot = 1;
602
0
        ++s;
603
0
        --len;
604
0
        while (len > 0 && UNLIKELY(*s == '0')) {
605
0
            found_value = true;
606
0
            ++scale;
607
0
            ++s;
608
0
            --len;
609
0
        }
610
0
    }
611
612
1
    int precision = 0;
613
1
    int max_digit = type_precision - type_scale;
614
1
    int cur_digit = 0;
615
1
    bool found_exponent = false;
616
1
    int8_t exponent = 0;
617
1
    T value = 0;
618
1
    bool has_round = false;
619
78
    for (int i = 0; i < len; ++i) {
620
77
        const char& c = s[i];
621
77
        if (LIKELY('0' <= c && c <= '9')) {
622
76
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
76
            if (LIKELY(type_precision > precision) && !has_round) {
628
76
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
76
                ++precision;
630
76
                scale += found_dot;
631
76
                cur_digit = precision - scale;
632
76
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
0
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
0
                if (c > '4') {
640
0
                    value += 1;
641
0
                }
642
0
                has_round = true;
643
0
                continue;
644
0
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
76
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
76
        } else if (c == '.' && LIKELY(!found_dot)) {
649
1
            found_dot = 1;
650
1
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
0
        } else {
661
0
            if (value == 0) {
662
0
                *result = StringParser::PARSE_FAILURE;
663
0
                return 0;
664
0
            }
665
            // here to handle
666
0
            *result = StringParser::PARSE_SUCCESS;
667
0
            if (type_scale >= scale) {
668
0
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
0
                if (!is_numeric_ascii(c)) {
673
0
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
0
                    return is_negative ? T(-value) : T(value);
682
0
                }
683
0
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
0
        }
687
77
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
1
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
1
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
1
        scale -= exponent;
702
1
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
1
    if (scale > precision) {
706
0
        precision = scale;
707
0
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
1
    *result = StringParser::PARSE_SUCCESS;
712
1
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
0
        *result = StringParser::PARSE_OVERFLOW;
714
0
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
0
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
0
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
0
            return value;
719
0
        }
720
1
    } else if (UNLIKELY(scale > type_scale)) {
721
0
        *result = StringParser::PARSE_UNDERFLOW;
722
0
        int shift = scale - type_scale;
723
0
        T divisor = get_scale_multiplier<T>(shift);
724
0
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
0
        } else {
727
0
            T remainder = value % divisor;
728
0
            value /= divisor;
729
0
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
0
                value += 1;
731
0
            }
732
0
        }
733
0
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
1
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
1
    if (type_scale > scale) {
739
0
        value *= get_scale_multiplier<T>(type_scale - scale);
740
0
    }
741
742
1
    return is_negative ? T(-value) : T(value);
743
1
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EnNS_10vectorized7DecimalInEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
555
145
                                  int type_scale, ParseResult* result) {
556
145
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
557
145
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
558
145
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
559
145
                  "wide::Int256.");
560
    // Special cases:
561
    //   1) '' == Fail, an empty string fails to parse.
562
    //   2) '   #   ' == #, leading and trailing white space is ignored.
563
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
564
    //   4) '#.' == '#', a trailing dot is ignored.
565
566
    // Ignore leading and trailing spaces.
567
145
    while (len > 0 && is_whitespace(*s)) {
568
0
        ++s;
569
0
        --len;
570
0
    }
571
145
    while (len > 0 && is_whitespace(s[len - 1])) {
572
0
        --len;
573
0
    }
574
575
145
    bool is_negative = false;
576
145
    if (len > 0) {
577
145
        switch (*s) {
578
25
        case '-':
579
25
            is_negative = true;
580
25
            [[fallthrough]];
581
25
        case '+':
582
25
            ++s;
583
25
            --len;
584
145
        }
585
145
    }
586
587
    // Ignore leading zeros.
588
145
    bool found_value = false;
589
167
    while (len > 0 && UNLIKELY(*s == '0')) {
590
22
        found_value = true;
591
22
        ++s;
592
22
        --len;
593
22
    }
594
595
    // Ignore leading zeros even after a dot. This allows for differentiating between
596
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
597
    // overflow.
598
145
    int scale = 0;
599
145
    int found_dot = 0;
600
145
    if (len > 0 && *s == '.') {
601
9
        found_dot = 1;
602
9
        ++s;
603
9
        --len;
604
14
        while (len > 0 && UNLIKELY(*s == '0')) {
605
5
            found_value = true;
606
5
            ++scale;
607
5
            ++s;
608
5
            --len;
609
5
        }
610
9
    }
611
612
145
    int precision = 0;
613
145
    int max_digit = type_precision - type_scale;
614
145
    int cur_digit = 0;
615
145
    bool found_exponent = false;
616
145
    int8_t exponent = 0;
617
145
    T value = 0;
618
145
    bool has_round = false;
619
2.27k
    for (int i = 0; i < len; ++i) {
620
2.14k
        const char& c = s[i];
621
2.14k
        if (LIKELY('0' <= c && c <= '9')) {
622
2.02k
            found_value = true;
623
            // Ignore digits once the type's precision limit is reached. This avoids
624
            // overflowing the underlying storage while handling a string like
625
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
626
            // an exponent will be made later.
627
2.02k
            if (LIKELY(type_precision > precision) && !has_round) {
628
2.01k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
629
2.01k
                ++precision;
630
2.01k
                scale += found_dot;
631
2.01k
                cur_digit = precision - scale;
632
2.01k
            } else if (!found_dot && max_digit < (precision - scale)) {
633
0
                *result = StringParser::PARSE_OVERFLOW;
634
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
635
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
636
0
                return value;
637
8
            } else if (found_dot && scale >= type_scale && !has_round) {
638
                // make rounding cases
639
8
                if (c > '4') {
640
0
                    value += 1;
641
0
                }
642
8
                has_round = true;
643
8
                continue;
644
8
            } else if (!found_dot) {
645
0
                ++cur_digit;
646
0
            }
647
2.01k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
648
2.01k
        } else if (c == '.' && LIKELY(!found_dot)) {
649
108
            found_dot = 1;
650
108
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
651
0
            found_exponent = true;
652
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
653
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
654
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
655
0
                    *result = StringParser::PARSE_UNDERFLOW;
656
0
                }
657
0
                return 0;
658
0
            }
659
0
            break;
660
15
        } else {
661
15
            if (value == 0) {
662
9
                *result = StringParser::PARSE_FAILURE;
663
9
                return 0;
664
9
            }
665
            // here to handle
666
6
            *result = StringParser::PARSE_SUCCESS;
667
6
            if (type_scale >= scale) {
668
6
                value *= get_scale_multiplier<T>(type_scale - scale);
669
                // here meet non-valid character, should return the value, keep going to meet
670
                // the E/e character because we make right user-given type_precision
671
                // not max number type_precision
672
6
                if (!is_numeric_ascii(c)) {
673
6
                    if (cur_digit > type_precision) {
674
0
                        *result = StringParser::PARSE_OVERFLOW;
675
0
                        value = is_negative
676
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
677
0
                                        : vectorized::max_decimal_value<DecimalType>(
678
0
                                                  type_precision);
679
0
                        return value;
680
0
                    }
681
6
                    return is_negative ? T(-value) : T(value);
682
6
                }
683
6
            }
684
685
0
            return is_negative ? T(-value) : T(value);
686
6
        }
687
2.14k
    }
688
689
    // Find the number of truncated digits before adjusting the precision for an exponent.
690
130
    if (exponent > scale) {
691
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
692
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
693
0
        precision += exponent - scale;
694
695
0
        value *= get_scale_multiplier<T>(exponent - scale);
696
0
        scale = 0;
697
130
    } else {
698
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
699
        //     the precision must also be set to 4 but that will be done below for the
700
        //     non-exponent case anyways.
701
130
        scale -= exponent;
702
130
    }
703
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
704
    //     were ignored during previous parsing.
705
130
    if (scale > precision) {
706
3
        precision = scale;
707
3
    }
708
709
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
710
    // than just letting the function run out.
711
130
    *result = StringParser::PARSE_SUCCESS;
712
130
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
713
9
        *result = StringParser::PARSE_OVERFLOW;
714
9
        if constexpr (TYPE_DECIMALV2 != P) {
715
            // decimalv3 overflow will return max min value for type precision
716
9
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
717
9
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
718
9
            return value;
719
9
        }
720
121
    } else if (UNLIKELY(scale > type_scale)) {
721
17
        *result = StringParser::PARSE_UNDERFLOW;
722
17
        int shift = scale - type_scale;
723
17
        T divisor = get_scale_multiplier<T>(shift);
724
17
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
725
0
            value = 0;
726
17
        } else {
727
17
            T remainder = value % divisor;
728
17
            value /= divisor;
729
17
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
730
17
                value += 1;
731
17
            }
732
17
        }
733
17
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
734
104
    } else if (UNLIKELY(!found_value && !found_dot)) {
735
0
        *result = StringParser::PARSE_FAILURE;
736
0
    }
737
738
130
    if (type_scale > scale) {
739
80
        value *= get_scale_multiplier<T>(type_scale - scale);
740
80
    }
741
742
130
    return is_negative ? T(-value) : T(value);
743
145
}
744
745
} // end namespace doris