Coverage Report

Created: 2025-04-24 12:05

/root/doris/be/src/util/string_parser.hpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/string-parser.hpp
19
// and modified by Doris
20
21
#pragma once
22
23
#include <fast_float/fast_float.h>
24
#include <fast_float/parse_number.h>
25
#include <glog/logging.h>
26
27
#include <cstdlib>
28
// IWYU pragma: no_include <bits/std_abs.h>
29
#include <cmath> // IWYU pragma: keep
30
#include <cstdint>
31
#include <limits>
32
#include <map>
33
#include <string>
34
#include <system_error>
35
#include <type_traits>
36
#include <utility>
37
38
#include "common/compiler_util.h" // IWYU pragma: keep
39
#include "common/status.h"
40
#include "runtime/large_int_value.h"
41
#include "runtime/primitive_type.h"
42
#include "vec/common/int_exp.h"
43
#include "vec/common/string_utils/string_utils.h"
44
#include "vec/core/extended_types.h"
45
#include "vec/core/wide_integer.h"
46
#include "vec/data_types/data_type_decimal.h"
47
#include "vec/data_types/number_traits.h"
48
49
namespace doris {
50
namespace vectorized {
51
template <DecimalNativeTypeConcept T>
52
struct Decimal;
53
} // namespace vectorized
54
55
// Utility functions for doing atoi/atof on non-null terminated strings.  On micro benchmarks,
56
// this is significantly faster than libc (atoi/strtol and atof/strtod).
57
//
58
// Strings with leading and trailing whitespaces are accepted.
59
// Branching is heavily optimized for the non-whitespace successful case.
60
// All the StringTo* functions first parse the input string assuming it has no leading whitespace.
61
// If that first attempt was unsuccessful, these functions retry the parsing after removing
62
// whitespace. Therefore, strings with whitespace take a perf hit on branch mis-prediction.
63
//
64
// For overflows, we are following the mysql behavior, to cap values at the max/min value for that
65
// data type.  This is different from hive, which returns NULL for overflow slots for int types
66
// and inf/-inf for float types.
67
//
68
// Things we tried that did not work:
69
//  - lookup table for converting character to digit
70
// Improvements (TODO):
71
//  - Validate input using _sidd_compare_ranges
72
//  - Since we know the length, we can parallelize this: i.e. result = 100*s[0] + 10*s[1] + s[2]
73
class StringParser {
74
public:
75
    enum ParseResult { PARSE_SUCCESS = 0, PARSE_FAILURE, PARSE_OVERFLOW, PARSE_UNDERFLOW };
76
77
    template <typename T>
78
568k
    static T numeric_limits(bool negative) {
79
568k
        if constexpr (std::is_same_v<T, __int128>) {
80
521k
            return negative ? MIN_INT128 : MAX_INT128;
81
521k
        } else {
82
521k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
521k
        }
84
568k
    }
_ZN5doris12StringParser14numeric_limitsIaEET_b
Line
Count
Source
78
268k
    static T numeric_limits(bool negative) {
79
268k
        if constexpr (std::is_same_v<T, __int128>) {
80
268k
            return negative ? MIN_INT128 : MAX_INT128;
81
268k
        } else {
82
268k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
268k
        }
84
268k
    }
_ZN5doris12StringParser14numeric_limitsIlEET_b
Line
Count
Source
78
88.6k
    static T numeric_limits(bool negative) {
79
88.6k
        if constexpr (std::is_same_v<T, __int128>) {
80
88.6k
            return negative ? MIN_INT128 : MAX_INT128;
81
88.6k
        } else {
82
88.6k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
88.6k
        }
84
88.6k
    }
_ZN5doris12StringParser14numeric_limitsIsEET_b
Line
Count
Source
78
77.3k
    static T numeric_limits(bool negative) {
79
77.3k
        if constexpr (std::is_same_v<T, __int128>) {
80
77.3k
            return negative ? MIN_INT128 : MAX_INT128;
81
77.3k
        } else {
82
77.3k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
77.3k
        }
84
77.3k
    }
_ZN5doris12StringParser14numeric_limitsIiEET_b
Line
Count
Source
78
65.9k
    static T numeric_limits(bool negative) {
79
65.9k
        if constexpr (std::is_same_v<T, __int128>) {
80
65.9k
            return negative ? MIN_INT128 : MAX_INT128;
81
65.9k
        } else {
82
65.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
65.9k
        }
84
65.9k
    }
_ZN5doris12StringParser14numeric_limitsInEET_b
Line
Count
Source
78
46.8k
    static T numeric_limits(bool negative) {
79
46.8k
        if constexpr (std::is_same_v<T, __int128>) {
80
46.8k
            return negative ? MIN_INT128 : MAX_INT128;
81
46.8k
        } else {
82
46.8k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
46.8k
        }
84
46.8k
    }
_ZN5doris12StringParser14numeric_limitsIhEET_b
Line
Count
Source
78
19.9k
    static T numeric_limits(bool negative) {
79
19.9k
        if constexpr (std::is_same_v<T, __int128>) {
80
19.9k
            return negative ? MIN_INT128 : MAX_INT128;
81
19.9k
        } else {
82
19.9k
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
19.9k
        }
84
19.9k
    }
_ZN5doris12StringParser14numeric_limitsItEET_b
Line
Count
Source
78
672
    static T numeric_limits(bool negative) {
79
672
        if constexpr (std::is_same_v<T, __int128>) {
80
672
            return negative ? MIN_INT128 : MAX_INT128;
81
672
        } else {
82
672
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
672
        }
84
672
    }
_ZN5doris12StringParser14numeric_limitsIjEET_b
Line
Count
Source
78
780
    static T numeric_limits(bool negative) {
79
780
        if constexpr (std::is_same_v<T, __int128>) {
80
780
            return negative ? MIN_INT128 : MAX_INT128;
81
780
        } else {
82
780
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
780
        }
84
780
    }
_ZN5doris12StringParser14numeric_limitsImEET_b
Line
Count
Source
78
729
    static T numeric_limits(bool negative) {
79
729
        if constexpr (std::is_same_v<T, __int128>) {
80
729
            return negative ? MIN_INT128 : MAX_INT128;
81
729
        } else {
82
729
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
729
        }
84
729
    }
_ZN5doris12StringParser14numeric_limitsIoEET_b
Line
Count
Source
78
4
    static T numeric_limits(bool negative) {
79
4
        if constexpr (std::is_same_v<T, __int128>) {
80
4
            return negative ? MIN_INT128 : MAX_INT128;
81
4
        } else {
82
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
4
        }
84
4
    }
_ZN5doris12StringParser14numeric_limitsIN4wide7integerILm256EiEEEET_b
Line
Count
Source
78
4
    static T numeric_limits(bool negative) {
79
4
        if constexpr (std::is_same_v<T, __int128>) {
80
4
            return negative ? MIN_INT128 : MAX_INT128;
81
4
        } else {
82
4
            return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
83
4
        }
84
4
    }
85
86
    template <typename T>
87
247k
    static T get_scale_multiplier(int scale) {
88
247k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
89
247k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
90
247k
                      "You can only instantiate as int32_t, int64_t, __int128.");
91
247k
        if constexpr (std::is_same_v<T, int32_t>) {
92
225k
            return common::exp10_i32(scale);
93
225k
        } else if constexpr (std::is_same_v<T, int64_t>) {
94
198k
            return common::exp10_i64(scale);
95
198k
        } else if constexpr (std::is_same_v<T, __int128>) {
96
154k
            return common::exp10_i128(scale);
97
154k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
98
154k
            return common::exp10_i256(scale);
99
154k
        }
100
247k
    }
_ZN5doris12StringParser20get_scale_multiplierIiEET_i
Line
Count
Source
87
21.6k
    static T get_scale_multiplier(int scale) {
88
21.6k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
89
21.6k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
90
21.6k
                      "You can only instantiate as int32_t, int64_t, __int128.");
91
21.6k
        if constexpr (std::is_same_v<T, int32_t>) {
92
21.6k
            return common::exp10_i32(scale);
93
21.6k
        } else if constexpr (std::is_same_v<T, int64_t>) {
94
21.6k
            return common::exp10_i64(scale);
95
21.6k
        } else if constexpr (std::is_same_v<T, __int128>) {
96
21.6k
            return common::exp10_i128(scale);
97
21.6k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
98
21.6k
            return common::exp10_i256(scale);
99
21.6k
        }
100
21.6k
    }
_ZN5doris12StringParser20get_scale_multiplierIlEET_i
Line
Count
Source
87
26.6k
    static T get_scale_multiplier(int scale) {
88
26.6k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
89
26.6k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
90
26.6k
                      "You can only instantiate as int32_t, int64_t, __int128.");
91
26.6k
        if constexpr (std::is_same_v<T, int32_t>) {
92
26.6k
            return common::exp10_i32(scale);
93
26.6k
        } else if constexpr (std::is_same_v<T, int64_t>) {
94
26.6k
            return common::exp10_i64(scale);
95
26.6k
        } else if constexpr (std::is_same_v<T, __int128>) {
96
26.6k
            return common::exp10_i128(scale);
97
26.6k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
98
26.6k
            return common::exp10_i256(scale);
99
26.6k
        }
100
26.6k
    }
_ZN5doris12StringParser20get_scale_multiplierInEET_i
Line
Count
Source
87
44.3k
    static T get_scale_multiplier(int scale) {
88
44.3k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
89
44.3k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
90
44.3k
                      "You can only instantiate as int32_t, int64_t, __int128.");
91
44.3k
        if constexpr (std::is_same_v<T, int32_t>) {
92
44.3k
            return common::exp10_i32(scale);
93
44.3k
        } else if constexpr (std::is_same_v<T, int64_t>) {
94
44.3k
            return common::exp10_i64(scale);
95
44.3k
        } else if constexpr (std::is_same_v<T, __int128>) {
96
44.3k
            return common::exp10_i128(scale);
97
44.3k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
98
44.3k
            return common::exp10_i256(scale);
99
44.3k
        }
100
44.3k
    }
_ZN5doris12StringParser20get_scale_multiplierIN4wide7integerILm256EiEEEET_i
Line
Count
Source
87
154k
    static T get_scale_multiplier(int scale) {
88
154k
        static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
89
154k
                              std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
90
154k
                      "You can only instantiate as int32_t, int64_t, __int128.");
91
154k
        if constexpr (std::is_same_v<T, int32_t>) {
92
154k
            return common::exp10_i32(scale);
93
154k
        } else if constexpr (std::is_same_v<T, int64_t>) {
94
154k
            return common::exp10_i64(scale);
95
154k
        } else if constexpr (std::is_same_v<T, __int128>) {
96
154k
            return common::exp10_i128(scale);
97
154k
        } else if constexpr (std::is_same_v<T, wide::Int256>) {
98
154k
            return common::exp10_i256(scale);
99
154k
        }
100
154k
    }
101
102
    // This is considerably faster than glibc's implementation (25x).
103
    // In the case of overflow, the max/min value for the data type will be returned.
104
    // Assumes s represents a decimal number.
105
    template <typename T>
106
370k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
370k
        T ans = string_to_int_internal<T>(s, len, result);
108
370k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
317k
            return ans;
110
317k
        }
111
112
53.1k
        int i = skip_leading_whitespace(s, len);
113
53.1k
        return string_to_int_internal<T>(s + i, len - i, result);
114
370k
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
83.5k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
83.5k
        T ans = string_to_int_internal<T>(s, len, result);
108
83.5k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
79.6k
            return ans;
110
79.6k
        }
111
112
3.97k
        int i = skip_leading_whitespace(s, len);
113
3.97k
        return string_to_int_internal<T>(s + i, len - i, result);
114
83.5k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
97.5k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
97.5k
        T ans = string_to_int_internal<T>(s, len, result);
108
97.5k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
68.8k
            return ans;
110
68.8k
        }
111
112
28.7k
        int i = skip_leading_whitespace(s, len);
113
28.7k
        return string_to_int_internal<T>(s + i, len - i, result);
114
97.5k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
68.3k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
68.3k
        T ans = string_to_int_internal<T>(s, len, result);
108
68.3k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
60.4k
            return ans;
110
60.4k
        }
111
112
7.84k
        int i = skip_leading_whitespace(s, len);
113
7.84k
        return string_to_int_internal<T>(s + i, len - i, result);
114
68.3k
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
60.9k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
60.9k
        T ans = string_to_int_internal<T>(s, len, result);
108
60.9k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
57.0k
            return ans;
110
57.0k
        }
111
112
3.91k
        int i = skip_leading_whitespace(s, len);
113
3.91k
        return string_to_int_internal<T>(s + i, len - i, result);
114
60.9k
    }
_ZN5doris12StringParser13string_to_intInEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
46.2k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
46.2k
        T ans = string_to_int_internal<T>(s, len, result);
108
46.2k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
45.5k
            return ans;
110
45.5k
        }
111
112
663
        int i = skip_leading_whitespace(s, len);
113
663
        return string_to_int_internal<T>(s + i, len - i, result);
114
46.2k
    }
_ZN5doris12StringParser13string_to_intIhEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
11.9k
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
11.9k
        T ans = string_to_int_internal<T>(s, len, result);
108
11.9k
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
3.89k
            return ans;
110
3.89k
        }
111
112
8.02k
        int i = skip_leading_whitespace(s, len);
113
8.02k
        return string_to_int_internal<T>(s + i, len - i, result);
114
11.9k
    }
_ZN5doris12StringParser13string_to_intItEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
669
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
669
        T ans = string_to_int_internal<T>(s, len, result);
108
669
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
666
            return ans;
110
666
        }
111
112
3
        int i = skip_leading_whitespace(s, len);
113
3
        return string_to_int_internal<T>(s + i, len - i, result);
114
669
    }
_ZN5doris12StringParser13string_to_intIjEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
777
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
777
        T ans = string_to_int_internal<T>(s, len, result);
108
777
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
774
            return ans;
110
774
        }
111
112
3
        int i = skip_leading_whitespace(s, len);
113
3
        return string_to_int_internal<T>(s + i, len - i, result);
114
777
    }
_ZN5doris12StringParser13string_to_intImEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
725
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
725
        T ans = string_to_int_internal<T>(s, len, result);
108
725
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
722
            return ans;
110
722
        }
111
112
3
        int i = skip_leading_whitespace(s, len);
113
3
        return string_to_int_internal<T>(s + i, len - i, result);
114
725
    }
_ZN5doris12StringParser13string_to_intIoEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
4
        T ans = string_to_int_internal<T>(s, len, result);
108
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
4
            return ans;
110
4
        }
111
112
0
        int i = skip_leading_whitespace(s, len);
113
0
        return string_to_int_internal<T>(s + i, len - i, result);
114
4
    }
_ZN5doris12StringParser13string_to_intIN4wide7integerILm256EiEEEET_PKcmPNS0_11ParseResultE
Line
Count
Source
106
4
    static inline T string_to_int(const char* __restrict s, size_t len, ParseResult* result) {
107
4
        T ans = string_to_int_internal<T>(s, len, result);
108
4
        if (LIKELY(*result == PARSE_SUCCESS)) {
109
4
            return ans;
110
4
        }
111
112
0
        int i = skip_leading_whitespace(s, len);
113
0
        return string_to_int_internal<T>(s + i, len - i, result);
114
4
    }
115
116
    // This is considerably faster than glibc's implementation.
117
    // In the case of overflow, the max/min value for the data type will be returned.
118
    // Assumes s represents a decimal number.
119
    template <typename T>
120
1.38k
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
121
1.38k
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
122
1.38k
        if (LIKELY(*result == PARSE_SUCCESS)) {
123
92
            return ans;
124
92
        }
125
126
1.29k
        int i = skip_leading_whitespace(s, len);
127
1.29k
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
128
1.38k
    }
_ZN5doris12StringParser22string_to_unsigned_intImEET_PKciPNS0_11ParseResultE
Line
Count
Source
120
355
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
121
355
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
122
355
        if (LIKELY(*result == PARSE_SUCCESS)) {
123
29
            return ans;
124
29
        }
125
126
326
        int i = skip_leading_whitespace(s, len);
127
326
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
128
355
    }
_ZN5doris12StringParser22string_to_unsigned_intIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
120
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
121
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
122
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
123
21
            return ans;
124
21
        }
125
126
322
        int i = skip_leading_whitespace(s, len);
127
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
128
343
    }
_ZN5doris12StringParser22string_to_unsigned_intItEET_PKciPNS0_11ParseResultE
Line
Count
Source
120
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
121
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
122
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
123
21
            return ans;
124
21
        }
125
126
322
        int i = skip_leading_whitespace(s, len);
127
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
128
343
    }
_ZN5doris12StringParser22string_to_unsigned_intIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
120
343
    static inline T string_to_unsigned_int(const char* __restrict s, int len, ParseResult* result) {
121
343
        T ans = string_to_unsigned_int_internal<T>(s, len, result);
122
343
        if (LIKELY(*result == PARSE_SUCCESS)) {
123
21
            return ans;
124
21
        }
125
126
322
        int i = skip_leading_whitespace(s, len);
127
322
        return string_to_unsigned_int_internal<T>(s + i, len - i, result);
128
343
    }
129
130
    // Convert a string s representing a number in given base into a decimal number.
131
    template <typename T>
132
    static inline T string_to_int(const char* __restrict s, int64_t len, int base,
133
27.8k
                                  ParseResult* result) {
134
27.8k
        T ans = string_to_int_internal<T>(s, len, base, result);
135
27.8k
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
2.06k
            return ans;
137
2.06k
        }
138
139
25.7k
        int i = skip_leading_whitespace(s, len);
140
25.7k
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
27.8k
    }
_ZN5doris12StringParser13string_to_intIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
133
26.4k
                                  ParseResult* result) {
134
26.4k
        T ans = string_to_int_internal<T>(s, len, base, result);
135
26.4k
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
1.91k
            return ans;
137
1.91k
        }
138
139
24.5k
        int i = skip_leading_whitespace(s, len);
140
24.5k
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
26.4k
    }
_ZN5doris12StringParser13string_to_intIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
133
490
                                  ParseResult* result) {
134
490
        T ans = string_to_int_internal<T>(s, len, base, result);
135
490
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
56
            return ans;
137
56
        }
138
139
434
        int i = skip_leading_whitespace(s, len);
140
434
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
490
    }
_ZN5doris12StringParser13string_to_intIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
133
441
                                  ParseResult* result) {
134
441
        T ans = string_to_int_internal<T>(s, len, base, result);
135
441
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
49
            return ans;
137
49
        }
138
139
392
        int i = skip_leading_whitespace(s, len);
140
392
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
441
    }
_ZN5doris12StringParser13string_to_intIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
133
441
                                  ParseResult* result) {
134
441
        T ans = string_to_int_internal<T>(s, len, base, result);
135
441
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
49
            return ans;
137
49
        }
138
139
392
        int i = skip_leading_whitespace(s, len);
140
392
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
441
    }
_ZN5doris12StringParser13string_to_intImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
133
1
                                  ParseResult* result) {
134
1
        T ans = string_to_int_internal<T>(s, len, base, result);
135
1
        if (LIKELY(*result == PARSE_SUCCESS)) {
136
1
            return ans;
137
1
        }
138
139
0
        int i = skip_leading_whitespace(s, len);
140
0
        return string_to_int_internal<T>(s + i, len - i, base, result);
141
1
    }
142
143
    template <typename T>
144
137k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
145
137k
        return string_to_float_internal<T>(s, len, result);
146
137k
    }
_ZN5doris12StringParser15string_to_floatIdEET_PKcmPNS0_11ParseResultE
Line
Count
Source
144
73.8k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
145
73.8k
        return string_to_float_internal<T>(s, len, result);
146
73.8k
    }
_ZN5doris12StringParser15string_to_floatIfEET_PKcmPNS0_11ParseResultE
Line
Count
Source
144
63.8k
    static inline T string_to_float(const char* __restrict s, size_t len, ParseResult* result) {
145
63.8k
        return string_to_float_internal<T>(s, len, result);
146
63.8k
    }
147
148
    // Parses a string for 'true' or 'false', case insensitive.
149
8.31k
    static inline bool string_to_bool(const char* __restrict s, int len, ParseResult* result) {
150
8.31k
        bool ans = string_to_bool_internal(s, len, result);
151
8.31k
        if (LIKELY(*result == PARSE_SUCCESS)) {
152
7.28k
            return ans;
153
7.28k
        }
154
155
1.03k
        int i = skip_leading_whitespace(s, len);
156
1.03k
        return string_to_bool_internal(s + i, len - i, result);
157
8.31k
    }
158
159
    template <PrimitiveType P, typename T = PrimitiveTypeTraits<P>::CppType::NativeType,
160
              typename DecimalType = PrimitiveTypeTraits<P>::ColumnType::value_type>
161
    static inline T string_to_decimal(const char* __restrict s, int len, int type_precision,
162
                                      int type_scale, ParseResult* result);
163
164
    template <typename T>
165
    static Status split_string_to_map(const std::string& base, const T element_separator,
166
                                      const T key_value_separator,
167
                                      std::map<std::string, std::string>* result) {
168
        int key_pos = 0;
169
        int key_end;
170
        int val_pos;
171
        int val_end;
172
173
        while ((key_end = base.find(key_value_separator, key_pos)) != std::string::npos) {
174
            if ((val_pos = base.find_first_not_of(key_value_separator, key_end)) ==
175
                std::string::npos) {
176
                break;
177
            }
178
            if ((val_end = base.find(element_separator, val_pos)) == std::string::npos) {
179
                val_end = base.size();
180
            }
181
            result->insert(std::make_pair(base.substr(key_pos, key_end - key_pos),
182
                                          base.substr(val_pos, val_end - val_pos)));
183
            key_pos = val_end;
184
            if (key_pos != std::string::npos) {
185
                ++key_pos;
186
            }
187
        }
188
189
        return Status::OK();
190
    }
191
192
private:
193
    // This is considerably faster than glibc's implementation.
194
    // In the case of overflow, the max/min value for the data type will be returned.
195
    // Assumes s represents a decimal number.
196
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
197
    template <typename T>
198
    static inline T string_to_int_internal(const char* __restrict s, int len, ParseResult* result);
199
200
    // This is considerably faster than glibc's implementation.
201
    // In the case of overflow, the max/min value for the data type will be returned.
202
    // Assumes s represents a decimal number.
203
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
204
    template <typename T>
205
    static inline T string_to_unsigned_int_internal(const char* __restrict s, int len,
206
                                                    ParseResult* result);
207
208
    // Convert a string s representing a number in given base into a decimal number.
209
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
210
    template <typename T>
211
    static inline T string_to_int_internal(const char* __restrict s, int64_t len, int base,
212
                                           ParseResult* result);
213
214
    // Converts an ascii string to an integer of type T assuming it cannot overflow
215
    // and the number is positive.
216
    // Leading whitespace is not allowed. Trailing whitespace will be skipped.
217
    template <typename T>
218
    static inline T string_to_int_no_overflow(const char* __restrict s, int len,
219
                                              ParseResult* result);
220
221
    // This is considerably faster than glibc's implementation (>100x why???)
222
    // No special case handling needs to be done for overflows, the floating point spec
223
    // already does it and will cap the values to -inf/inf
224
    // To avoid inaccurate conversions this function falls back to strtod for
225
    // scientific notation.
226
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
227
    // TODO: Investigate using intrinsics to speed up the slow strtod path.
228
    template <typename T>
229
    static inline T string_to_float_internal(const char* __restrict s, int len,
230
                                             ParseResult* result);
231
232
    // parses a string for 'true' or 'false', case insensitive
233
    // Return PARSE_FAILURE on leading whitespace. Trailing whitespace is allowed.
234
    static inline bool string_to_bool_internal(const char* __restrict s, int len,
235
                                               ParseResult* result);
236
237
    // Returns true if s only contains whitespace.
238
33.8k
    static inline bool is_all_whitespace(const char* __restrict s, int len) {
239
108k
        for (int i = 0; i < len; ++i) {
240
76.6k
            if (!LIKELY(is_whitespace(s[i]))) {
241
1.77k
                return false;
242
1.77k
            }
243
76.6k
        }
244
32.0k
        return true;
245
33.8k
    }
246
247
    // For strings like "3.0", "3.123", and "3.", can parse them as 3.
248
1.49k
    static inline bool is_float_suffix(const char* __restrict s, int len) {
249
1.49k
        return (s[0] == '.' && is_all_digit(s + 1, len - 1));
250
1.49k
    }
251
252
826
    static inline bool is_all_digit(const char* __restrict s, int len) {
253
1.65k
        for (int i = 0; i < len; ++i) {
254
837
            if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
255
11
                return false;
256
11
            }
257
837
        }
258
815
        return true;
259
826
    }
260
261
    // Returns the position of the first non-whitespace character in s.
262
81.2k
    static inline int skip_leading_whitespace(const char* __restrict s, int len) {
263
81.2k
        int i = 0;
264
234k
        while (i < len && is_whitespace(s[i])) {
265
153k
            ++i;
266
153k
        }
267
81.2k
        return i;
268
81.2k
    }
269
270
    // Our own definition of "isspace" that optimize on the ' ' branch.
271
1.17M
    static inline bool is_whitespace(const char& c) {
272
1.17M
        return LIKELY(c == ' ') ||
273
1.17M
               UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r');
274
1.17M
    }
275
276
}; // end of class StringParser
277
278
template <typename T>
279
501k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
501k
    if (UNLIKELY(len <= 0)) {
281
242
        *result = PARSE_FAILURE;
282
242
        return 0;
283
242
    }
284
285
500k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
500k
    UnsignedT val = 0;
287
500k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
500k
    bool negative = false;
289
500k
    int i = 0;
290
500k
    switch (*s) {
291
107k
    case '-':
292
107k
        negative = true;
293
107k
        max_val += 1;
294
107k
        [[fallthrough]];
295
178k
    case '+':
296
178k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
178k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
500k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
500k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
327k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
327k
        return static_cast<T>(negative ? -val : val);
308
327k
    }
309
310
173k
    const T max_div_10 = max_val / 10;
311
173k
    const T max_mod_10 = max_val % 10;
312
313
173k
    int first = i;
314
1.63M
    for (; i < len; ++i) {
315
1.56M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
1.51M
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
1.51M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
46.3k
                *result = PARSE_OVERFLOW;
320
46.3k
                return negative ? -max_val : max_val;
321
46.3k
            }
322
1.46M
            val = val * 10 + digit;
323
1.46M
        } else {
324
52.1k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
52.1k
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
40.7k
                *result = PARSE_FAILURE;
329
40.7k
                return 0;
330
40.7k
            }
331
            // Returning here is slightly faster than breaking the loop.
332
11.4k
            *result = PARSE_SUCCESS;
333
11.4k
            return static_cast<T>(negative ? -val : val);
334
52.1k
        }
335
1.56M
    }
336
74.6k
    *result = PARSE_SUCCESS;
337
74.6k
    return static_cast<T>(negative ? -val : val);
338
173k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
203k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
203k
    if (UNLIKELY(len <= 0)) {
281
206
        *result = PARSE_FAILURE;
282
206
        return 0;
283
206
    }
284
285
203k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
203k
    UnsignedT val = 0;
287
203k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
203k
    bool negative = false;
289
203k
    int i = 0;
290
203k
    switch (*s) {
291
28.1k
    case '-':
292
28.1k
        negative = true;
293
28.1k
        max_val += 1;
294
28.1k
        [[fallthrough]];
295
99.6k
    case '+':
296
99.6k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
99.6k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
203k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
203k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
140k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
140k
        return static_cast<T>(negative ? -val : val);
308
140k
    }
309
310
63.0k
    const T max_div_10 = max_val / 10;
311
63.0k
    const T max_mod_10 = max_val % 10;
312
313
63.0k
    int first = i;
314
150k
    for (; i < len; ++i) {
315
143k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
108k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
108k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
21.0k
                *result = PARSE_OVERFLOW;
320
21.0k
                return negative ? -max_val : max_val;
321
21.0k
            }
322
87.9k
            val = val * 10 + digit;
323
87.9k
        } else {
324
34.9k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
34.9k
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
23.9k
                *result = PARSE_FAILURE;
329
23.9k
                return 0;
330
23.9k
            }
331
            // Returning here is slightly faster than breaking the loop.
332
11.0k
            *result = PARSE_SUCCESS;
333
11.0k
            return static_cast<T>(negative ? -val : val);
334
34.9k
        }
335
143k
    }
336
7.01k
    *result = PARSE_SUCCESS;
337
7.01k
    return static_cast<T>(negative ? -val : val);
338
63.0k
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
87.5k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
87.5k
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
87.5k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
87.5k
    UnsignedT val = 0;
287
87.5k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
87.5k
    bool negative = false;
289
87.5k
    int i = 0;
290
87.5k
    switch (*s) {
291
51.9k
    case '-':
292
51.9k
        negative = true;
293
51.9k
        max_val += 1;
294
51.9k
        [[fallthrough]];
295
52.0k
    case '+':
296
52.0k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
52.0k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
87.5k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
87.5k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
33.0k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
33.0k
        return static_cast<T>(negative ? -val : val);
308
33.0k
    }
309
310
54.5k
    const T max_div_10 = max_val / 10;
311
54.5k
    const T max_mod_10 = max_val % 10;
312
313
54.5k
    int first = i;
314
1.06M
    for (; i < len; ++i) {
315
1.01M
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
1.01M
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
1.01M
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
5.02k
                *result = PARSE_OVERFLOW;
320
5.02k
                return negative ? -max_val : max_val;
321
5.02k
            }
322
1.01M
            val = val * 10 + digit;
323
1.01M
        } else {
324
1.03k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
1.03k
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
949
                *result = PARSE_FAILURE;
329
949
                return 0;
330
949
            }
331
            // Returning here is slightly faster than breaking the loop.
332
84
            *result = PARSE_SUCCESS;
333
84
            return static_cast<T>(negative ? -val : val);
334
1.03k
        }
335
1.01M
    }
336
48.4k
    *result = PARSE_SUCCESS;
337
48.4k
    return static_cast<T>(negative ? -val : val);
338
54.5k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
76.1k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
76.1k
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
76.1k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
76.1k
    UnsignedT val = 0;
287
76.1k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
76.1k
    bool negative = false;
289
76.1k
    int i = 0;
290
76.1k
    switch (*s) {
291
12.9k
    case '-':
292
12.9k
        negative = true;
293
12.9k
        max_val += 1;
294
12.9k
        [[fallthrough]];
295
12.9k
    case '+':
296
12.9k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
12.9k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
76.1k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
76.1k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
52.8k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
52.8k
        return static_cast<T>(negative ? -val : val);
308
52.8k
    }
309
310
23.3k
    const T max_div_10 = max_val / 10;
311
23.3k
    const T max_mod_10 = max_val % 10;
312
313
23.3k
    int first = i;
314
122k
    for (; i < len; ++i) {
315
113k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
112k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
112k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
13.2k
                *result = PARSE_OVERFLOW;
320
13.2k
                return negative ? -max_val : max_val;
321
13.2k
            }
322
99.5k
            val = val * 10 + digit;
323
99.5k
        } else {
324
975
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
975
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
765
                *result = PARSE_FAILURE;
329
765
                return 0;
330
765
            }
331
            // Returning here is slightly faster than breaking the loop.
332
210
            *result = PARSE_SUCCESS;
333
210
            return static_cast<T>(negative ? -val : val);
334
975
        }
335
113k
    }
336
9.14k
    *result = PARSE_SUCCESS;
337
9.14k
    return static_cast<T>(negative ? -val : val);
338
23.3k
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
64.9k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
64.9k
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
64.9k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
64.9k
    UnsignedT val = 0;
287
64.9k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
64.9k
    bool negative = false;
289
64.9k
    int i = 0;
290
64.9k
    switch (*s) {
291
10.1k
    case '-':
292
10.1k
        negative = true;
293
10.1k
        max_val += 1;
294
10.1k
        [[fallthrough]];
295
10.2k
    case '+':
296
10.2k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
10.2k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
64.9k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
64.9k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
52.7k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
52.7k
        return static_cast<T>(negative ? -val : val);
308
52.7k
    }
309
310
12.1k
    const T max_div_10 = max_val / 10;
311
12.1k
    const T max_mod_10 = max_val % 10;
312
313
12.1k
    int first = i;
314
124k
    for (; i < len; ++i) {
315
118k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
117k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
117k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
5.61k
                *result = PARSE_OVERFLOW;
320
5.61k
                return negative ? -max_val : max_val;
321
5.61k
            }
322
112k
            val = val * 10 + digit;
323
112k
        } else {
324
509
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
509
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
383
                *result = PARSE_FAILURE;
329
383
                return 0;
330
383
            }
331
            // Returning here is slightly faster than breaking the loop.
332
126
            *result = PARSE_SUCCESS;
333
126
            return static_cast<T>(negative ? -val : val);
334
509
        }
335
118k
    }
336
6.04k
    *result = PARSE_SUCCESS;
337
6.04k
    return static_cast<T>(negative ? -val : val);
338
12.1k
}
_ZN5doris12StringParser22string_to_int_internalInEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
46.8k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
46.8k
    if (UNLIKELY(len <= 0)) {
281
36
        *result = PARSE_FAILURE;
282
36
        return 0;
283
36
    }
284
285
46.8k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
46.8k
    UnsignedT val = 0;
287
46.8k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
46.8k
    bool negative = false;
289
46.8k
    int i = 0;
290
46.8k
    switch (*s) {
291
3.15k
    case '-':
292
3.15k
        negative = true;
293
3.15k
        max_val += 1;
294
3.15k
        [[fallthrough]];
295
3.15k
    case '+':
296
3.15k
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
3.15k
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
46.8k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
46.8k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
43.0k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
43.0k
        return static_cast<T>(negative ? -val : val);
308
43.0k
    }
309
310
3.83k
    const T max_div_10 = max_val / 10;
311
3.83k
    const T max_mod_10 = max_val % 10;
312
313
3.83k
    int first = i;
314
150k
    for (; i < len; ++i) {
315
146k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
146k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
146k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
6
                *result = PARSE_OVERFLOW;
320
6
                return negative ? -max_val : max_val;
321
6
            }
322
146k
            val = val * 10 + digit;
323
146k
        } else {
324
106
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
106
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
106
                *result = PARSE_FAILURE;
329
106
                return 0;
330
106
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
106
        }
335
146k
    }
336
3.72k
    *result = PARSE_SUCCESS;
337
3.72k
    return static_cast<T>(negative ? -val : val);
338
3.83k
}
_ZN5doris12StringParser22string_to_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
19.9k
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
19.9k
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
19.9k
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
19.9k
    UnsignedT val = 0;
287
19.9k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
19.9k
    bool negative = false;
289
19.9k
    int i = 0;
290
19.9k
    switch (*s) {
291
862
    case '-':
292
862
        negative = true;
293
862
        max_val += 1;
294
862
        [[fallthrough]];
295
862
    case '+':
296
862
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
862
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
19.9k
    }
303
304
    // This is the fast path where the string cannot overflow.
305
19.9k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
3.89k
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
3.89k
        return static_cast<T>(negative ? -val : val);
308
3.89k
    }
309
310
16.0k
    const T max_div_10 = max_val / 10;
311
16.0k
    const T max_mod_10 = max_val % 10;
312
313
16.0k
    int first = i;
314
18.2k
    for (; i < len; ++i) {
315
18.2k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
3.69k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
3.69k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
1.46k
                *result = PARSE_OVERFLOW;
320
1.46k
                return negative ? -max_val : max_val;
321
1.46k
            }
322
2.23k
            val = val * 10 + digit;
323
14.5k
        } else {
324
14.5k
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
14.5k
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
14.5k
                *result = PARSE_FAILURE;
329
14.5k
                return 0;
330
14.5k
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
14.5k
        }
335
18.2k
    }
336
12
    *result = PARSE_SUCCESS;
337
12
    return static_cast<T>(negative ? -val : val);
338
16.0k
}
_ZN5doris12StringParser22string_to_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
672
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
672
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
672
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
672
    UnsignedT val = 0;
287
672
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
672
    bool negative = false;
289
672
    int i = 0;
290
672
    switch (*s) {
291
0
    case '-':
292
0
        negative = true;
293
0
        max_val += 1;
294
0
        [[fallthrough]];
295
0
    case '+':
296
0
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
0
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
672
    }
303
304
    // This is the fast path where the string cannot overflow.
305
672
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
600
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
600
        return static_cast<T>(negative ? -val : val);
308
600
    }
309
310
72
    const T max_div_10 = max_val / 10;
311
72
    const T max_mod_10 = max_val % 10;
312
313
72
    int first = i;
314
432
    for (; i < len; ++i) {
315
360
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
360
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
360
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
0
                *result = PARSE_OVERFLOW;
320
0
                return negative ? -max_val : max_val;
321
0
            }
322
360
            val = val * 10 + digit;
323
360
        } else {
324
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
0
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
0
                *result = PARSE_FAILURE;
329
0
                return 0;
330
0
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
0
        }
335
360
    }
336
72
    *result = PARSE_SUCCESS;
337
72
    return static_cast<T>(negative ? -val : val);
338
72
}
_ZN5doris12StringParser22string_to_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
780
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
780
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
780
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
780
    UnsignedT val = 0;
287
780
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
780
    bool negative = false;
289
780
    int i = 0;
290
780
    switch (*s) {
291
0
    case '-':
292
0
        negative = true;
293
0
        max_val += 1;
294
0
        [[fallthrough]];
295
0
    case '+':
296
0
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
0
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
780
    }
303
304
    // This is the fast path where the string cannot overflow.
305
780
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
654
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
654
        return static_cast<T>(negative ? -val : val);
308
654
    }
309
310
126
    const T max_div_10 = max_val / 10;
311
126
    const T max_mod_10 = max_val % 10;
312
313
126
    int first = i;
314
1.38k
    for (; i < len; ++i) {
315
1.26k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
1.26k
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
1.26k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
0
                *result = PARSE_OVERFLOW;
320
0
                return negative ? -max_val : max_val;
321
0
            }
322
1.26k
            val = val * 10 + digit;
323
1.26k
        } else {
324
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
0
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
0
                *result = PARSE_FAILURE;
329
0
                return 0;
330
0
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
0
        }
335
1.26k
    }
336
126
    *result = PARSE_SUCCESS;
337
126
    return static_cast<T>(negative ? -val : val);
338
126
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
728
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
728
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
728
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
728
    UnsignedT val = 0;
287
728
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
728
    bool negative = false;
289
728
    int i = 0;
290
728
    switch (*s) {
291
0
    case '-':
292
0
        negative = true;
293
0
        max_val += 1;
294
0
        [[fallthrough]];
295
0
    case '+':
296
0
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
0
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
728
    }
303
304
    // This is the fast path where the string cannot overflow.
305
728
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
692
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
692
        return static_cast<T>(negative ? -val : val);
308
692
    }
309
310
36
    const T max_div_10 = max_val / 10;
311
36
    const T max_mod_10 = max_val % 10;
312
313
36
    int first = i;
314
756
    for (; i < len; ++i) {
315
720
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
720
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
720
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
0
                *result = PARSE_OVERFLOW;
320
0
                return negative ? -max_val : max_val;
321
0
            }
322
720
            val = val * 10 + digit;
323
720
        } else {
324
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
0
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
0
                *result = PARSE_FAILURE;
329
0
                return 0;
330
0
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
0
        }
335
720
    }
336
36
    *result = PARSE_SUCCESS;
337
36
    return static_cast<T>(negative ? -val : val);
338
36
}
_ZN5doris12StringParser22string_to_int_internalIoEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
4
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
4
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
4
    UnsignedT val = 0;
287
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
4
    bool negative = false;
289
4
    int i = 0;
290
4
    switch (*s) {
291
0
    case '-':
292
0
        negative = true;
293
0
        max_val += 1;
294
0
        [[fallthrough]];
295
0
    case '+':
296
0
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
0
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
4
    }
303
304
    // This is the fast path where the string cannot overflow.
305
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
0
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
0
        return static_cast<T>(negative ? -val : val);
308
0
    }
309
310
4
    const T max_div_10 = max_val / 10;
311
4
    const T max_mod_10 = max_val % 10;
312
313
4
    int first = i;
314
84
    for (; i < len; ++i) {
315
80
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
80
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
80
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
0
                *result = PARSE_OVERFLOW;
320
0
                return negative ? -max_val : max_val;
321
0
            }
322
80
            val = val * 10 + digit;
323
80
        } else {
324
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
0
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
0
                *result = PARSE_FAILURE;
329
0
                return 0;
330
0
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
0
        }
335
80
    }
336
4
    *result = PARSE_SUCCESS;
337
4
    return static_cast<T>(negative ? -val : val);
338
4
}
_ZN5doris12StringParser22string_to_int_internalIN4wide7integerILm256EiEEEET_PKciPNS0_11ParseResultE
Line
Count
Source
279
4
T StringParser::string_to_int_internal(const char* __restrict s, int len, ParseResult* result) {
280
4
    if (UNLIKELY(len <= 0)) {
281
0
        *result = PARSE_FAILURE;
282
0
        return 0;
283
0
    }
284
285
4
    typedef typename std::make_unsigned<T>::type UnsignedT;
286
4
    UnsignedT val = 0;
287
4
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
288
4
    bool negative = false;
289
4
    int i = 0;
290
4
    switch (*s) {
291
0
    case '-':
292
0
        negative = true;
293
0
        max_val += 1;
294
0
        [[fallthrough]];
295
0
    case '+':
296
0
        ++i;
297
        // only one '+'/'-' char, so could return failure directly
298
0
        if (UNLIKELY(len == 1)) {
299
0
            *result = PARSE_FAILURE;
300
0
            return 0;
301
0
        }
302
4
    }
303
304
    // This is the fast path where the string cannot overflow.
305
4
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<T>())) {
306
4
        val = string_to_int_no_overflow<UnsignedT>(s + i, len - i, result);
307
4
        return static_cast<T>(negative ? -val : val);
308
4
    }
309
310
0
    const T max_div_10 = max_val / 10;
311
0
    const T max_mod_10 = max_val % 10;
312
313
0
    int first = i;
314
0
    for (; i < len; ++i) {
315
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
316
0
            T digit = s[i] - '0';
317
            // This is a tricky check to see if adding this digit will cause an overflow.
318
0
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
319
0
                *result = PARSE_OVERFLOW;
320
0
                return negative ? -max_val : max_val;
321
0
            }
322
0
            val = val * 10 + digit;
323
0
        } else {
324
0
            if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
325
0
                                         !is_float_suffix(s + i, len - i))))) {
326
                // Reject the string because either the first char was not a digit,
327
                // or the remaining chars are not all whitespace
328
0
                *result = PARSE_FAILURE;
329
0
                return 0;
330
0
            }
331
            // Returning here is slightly faster than breaking the loop.
332
0
            *result = PARSE_SUCCESS;
333
0
            return static_cast<T>(negative ? -val : val);
334
0
        }
335
0
    }
336
0
    *result = PARSE_SUCCESS;
337
0
    return static_cast<T>(negative ? -val : val);
338
0
}
339
340
template <typename T>
341
T StringParser::string_to_unsigned_int_internal(const char* __restrict s, int len,
342
2.67k
                                                ParseResult* result) {
343
2.67k
    if (UNLIKELY(len <= 0)) {
344
0
        *result = PARSE_FAILURE;
345
0
        return 0;
346
0
    }
347
348
2.67k
    T val = 0;
349
2.67k
    T max_val = std::numeric_limits<T>::max();
350
2.67k
    int i = 0;
351
352
2.67k
    typedef typename std::make_signed<T>::type signedT;
353
    // This is the fast path where the string cannot overflow.
354
2.67k
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
355
895
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
356
895
        return val;
357
895
    }
358
359
1.78k
    const T max_div_10 = max_val / 10;
360
1.78k
    const T max_mod_10 = max_val % 10;
361
362
1.78k
    int first = i;
363
6.54k
    for (; i < len; ++i) {
364
6.49k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
365
4.99k
            T digit = s[i] - '0';
366
            // This is a tricky check to see if adding this digit will cause an overflow.
367
4.99k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
368
224
                *result = PARSE_OVERFLOW;
369
224
                return max_val;
370
224
            }
371
4.76k
            val = val * 10 + digit;
372
4.76k
        } else {
373
1.50k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
374
                // Reject the string because either the first char was not a digit,
375
                // or the remaining chars are not all whitespace
376
1.13k
                *result = PARSE_FAILURE;
377
1.13k
                return 0;
378
1.13k
            }
379
            // Returning here is slightly faster than breaking the loop.
380
378
            *result = PARSE_SUCCESS;
381
378
            return val;
382
1.50k
        }
383
6.49k
    }
384
49
    *result = PARSE_SUCCESS;
385
49
    return val;
386
1.78k
}
_ZN5doris12StringParser31string_to_unsigned_int_internalImEET_PKciPNS0_11ParseResultE
Line
Count
Source
342
681
                                                ParseResult* result) {
343
681
    if (UNLIKELY(len <= 0)) {
344
0
        *result = PARSE_FAILURE;
345
0
        return 0;
346
0
    }
347
348
681
    T val = 0;
349
681
    T max_val = std::numeric_limits<T>::max();
350
681
    int i = 0;
351
352
681
    typedef typename std::make_signed<T>::type signedT;
353
    // This is the fast path where the string cannot overflow.
354
681
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
355
456
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
356
456
        return val;
357
456
    }
358
359
225
    const T max_div_10 = max_val / 10;
360
225
    const T max_mod_10 = max_val % 10;
361
362
225
    int first = i;
363
2.26k
    for (; i < len; ++i) {
364
2.26k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
365
2.10k
            T digit = s[i] - '0';
366
            // This is a tricky check to see if adding this digit will cause an overflow.
367
2.10k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
368
56
                *result = PARSE_OVERFLOW;
369
56
                return max_val;
370
56
            }
371
2.04k
            val = val * 10 + digit;
372
2.04k
        } else {
373
162
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
374
                // Reject the string because either the first char was not a digit,
375
                // or the remaining chars are not all whitespace
376
120
                *result = PARSE_FAILURE;
377
120
                return 0;
378
120
            }
379
            // Returning here is slightly faster than breaking the loop.
380
42
            *result = PARSE_SUCCESS;
381
42
            return val;
382
162
        }
383
2.26k
    }
384
7
    *result = PARSE_SUCCESS;
385
7
    return val;
386
225
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
342
665
                                                ParseResult* result) {
343
665
    if (UNLIKELY(len <= 0)) {
344
0
        *result = PARSE_FAILURE;
345
0
        return 0;
346
0
    }
347
348
665
    T val = 0;
349
665
    T max_val = std::numeric_limits<T>::max();
350
665
    int i = 0;
351
352
665
    typedef typename std::make_signed<T>::type signedT;
353
    // This is the fast path where the string cannot overflow.
354
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
355
16
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
356
16
        return val;
357
16
    }
358
359
649
    const T max_div_10 = max_val / 10;
360
649
    const T max_mod_10 = max_val % 10;
361
362
649
    int first = i;
363
1.20k
    for (; i < len; ++i) {
364
1.18k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
365
609
            T digit = s[i] - '0';
366
            // This is a tricky check to see if adding this digit will cause an overflow.
367
609
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
368
56
                *result = PARSE_OVERFLOW;
369
56
                return max_val;
370
56
            }
371
553
            val = val * 10 + digit;
372
572
        } else {
373
572
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
374
                // Reject the string because either the first char was not a digit,
375
                // or the remaining chars are not all whitespace
376
446
                *result = PARSE_FAILURE;
377
446
                return 0;
378
446
            }
379
            // Returning here is slightly faster than breaking the loop.
380
126
            *result = PARSE_SUCCESS;
381
126
            return val;
382
572
        }
383
1.18k
    }
384
21
    *result = PARSE_SUCCESS;
385
21
    return val;
386
649
}
_ZN5doris12StringParser31string_to_unsigned_int_internalItEET_PKciPNS0_11ParseResultE
Line
Count
Source
342
665
                                                ParseResult* result) {
343
665
    if (UNLIKELY(len <= 0)) {
344
0
        *result = PARSE_FAILURE;
345
0
        return 0;
346
0
    }
347
348
665
    T val = 0;
349
665
    T max_val = std::numeric_limits<T>::max();
350
665
    int i = 0;
351
352
665
    typedef typename std::make_signed<T>::type signedT;
353
    // This is the fast path where the string cannot overflow.
354
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
355
31
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
356
31
        return val;
357
31
    }
358
359
634
    const T max_div_10 = max_val / 10;
360
634
    const T max_mod_10 = max_val % 10;
361
362
634
    int first = i;
363
1.47k
    for (; i < len; ++i) {
364
1.46k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
365
896
            T digit = s[i] - '0';
366
            // This is a tricky check to see if adding this digit will cause an overflow.
367
896
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
368
56
                *result = PARSE_OVERFLOW;
369
56
                return max_val;
370
56
            }
371
840
            val = val * 10 + digit;
372
840
        } else {
373
564
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
374
                // Reject the string because either the first char was not a digit,
375
                // or the remaining chars are not all whitespace
376
438
                *result = PARSE_FAILURE;
377
438
                return 0;
378
438
            }
379
            // Returning here is slightly faster than breaking the loop.
380
126
            *result = PARSE_SUCCESS;
381
126
            return val;
382
564
        }
383
1.46k
    }
384
14
    *result = PARSE_SUCCESS;
385
14
    return val;
386
634
}
_ZN5doris12StringParser31string_to_unsigned_int_internalIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
342
665
                                                ParseResult* result) {
343
665
    if (UNLIKELY(len <= 0)) {
344
0
        *result = PARSE_FAILURE;
345
0
        return 0;
346
0
    }
347
348
665
    T val = 0;
349
665
    T max_val = std::numeric_limits<T>::max();
350
665
    int i = 0;
351
352
665
    typedef typename std::make_signed<T>::type signedT;
353
    // This is the fast path where the string cannot overflow.
354
665
    if (LIKELY(len - i < vectorized::NumberTraits::max_ascii_len<signedT>())) {
355
392
        val = string_to_int_no_overflow<T>(s + i, len - i, result);
356
392
        return val;
357
392
    }
358
359
273
    const T max_div_10 = max_val / 10;
360
273
    const T max_mod_10 = max_val % 10;
361
362
273
    int first = i;
363
1.60k
    for (; i < len; ++i) {
364
1.59k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
365
1.38k
            T digit = s[i] - '0';
366
            // This is a tricky check to see if adding this digit will cause an overflow.
367
1.38k
            if (UNLIKELY(val > (max_div_10 - (digit > max_mod_10)))) {
368
56
                *result = PARSE_OVERFLOW;
369
56
                return max_val;
370
56
            }
371
1.33k
            val = val * 10 + digit;
372
1.33k
        } else {
373
210
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
374
                // Reject the string because either the first char was not a digit,
375
                // or the remaining chars are not all whitespace
376
126
                *result = PARSE_FAILURE;
377
126
                return 0;
378
126
            }
379
            // Returning here is slightly faster than breaking the loop.
380
84
            *result = PARSE_SUCCESS;
381
84
            return val;
382
210
        }
383
1.59k
    }
384
7
    *result = PARSE_SUCCESS;
385
7
    return val;
386
273
}
387
388
template <typename T>
389
T StringParser::string_to_int_internal(const char* __restrict s, int64_t len, int base,
390
53.6k
                                       ParseResult* result) {
391
53.6k
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
53.6k
    UnsignedT val = 0;
393
53.6k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
53.6k
    bool negative = false;
395
53.6k
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
53.6k
    int i = 0;
400
53.6k
    switch (*s) {
401
14.3k
    case '-':
402
14.3k
        negative = true;
403
14.3k
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
14.3k
        [[fallthrough]];
405
14.6k
    case '+':
406
14.6k
        i = 1;
407
53.6k
    }
408
409
53.6k
    const T max_div_base = max_val / base;
410
53.6k
    const T max_mod_base = max_val % base;
411
412
53.6k
    int first = i;
413
120k
    for (; i < len; ++i) {
414
118k
        T digit;
415
118k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
81.6k
            digit = s[i] - '0';
417
81.6k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
639
            digit = (s[i] - 'a' + 10);
419
36.4k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
98
            digit = (s[i] - 'A' + 10);
421
36.3k
        } else {
422
36.3k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
24.0k
                *result = PARSE_FAILURE;
426
24.0k
                return 0;
427
24.0k
            }
428
            // skip trailing whitespace.
429
12.2k
            break;
430
36.3k
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
82.4k
        if (digit >= base) {
434
392
            break;
435
392
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
82.0k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
14.8k
            *result = PARSE_OVERFLOW;
440
14.8k
            return static_cast<T>(negative ? -max_val : max_val);
441
14.8k
        }
442
67.2k
        val = val * base + digit;
443
67.2k
    }
444
14.7k
    *result = PARSE_SUCCESS;
445
14.7k
    return static_cast<T>(negative ? -val : val);
446
53.6k
}
_ZN5doris12StringParser22string_to_int_internalIaEET_PKcliPNS0_11ParseResultE
Line
Count
Source
390
51.0k
                                       ParseResult* result) {
391
51.0k
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
51.0k
    UnsignedT val = 0;
393
51.0k
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
51.0k
    bool negative = false;
395
51.0k
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
51.0k
    int i = 0;
400
51.0k
    switch (*s) {
401
13.7k
    case '-':
402
13.7k
        negative = true;
403
13.7k
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
13.7k
        [[fallthrough]];
405
13.8k
    case '+':
406
13.8k
        i = 1;
407
51.0k
    }
408
409
51.0k
    const T max_div_base = max_val / base;
410
51.0k
    const T max_mod_base = max_val % base;
411
412
51.0k
    int first = i;
413
108k
    for (; i < len; ++i) {
414
107k
        T digit;
415
107k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
72.1k
            digit = s[i] - '0';
417
72.1k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
539
            digit = (s[i] - 'a' + 10);
419
34.3k
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
98
            digit = (s[i] - 'A' + 10);
421
34.2k
        } else {
422
34.2k
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
22.8k
                *result = PARSE_FAILURE;
426
22.8k
                return 0;
427
22.8k
            }
428
            // skip trailing whitespace.
429
11.3k
            break;
430
34.2k
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
72.7k
        if (digit >= base) {
434
392
            break;
435
392
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
72.4k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
14.5k
            *result = PARSE_OVERFLOW;
440
14.5k
            return static_cast<T>(negative ? -max_val : max_val);
441
14.5k
        }
442
57.8k
        val = val * base + digit;
443
57.8k
    }
444
13.6k
    *result = PARSE_SUCCESS;
445
13.6k
    return static_cast<T>(negative ? -val : val);
446
51.0k
}
_ZN5doris12StringParser22string_to_int_internalIsEET_PKcliPNS0_11ParseResultE
Line
Count
Source
390
924
                                       ParseResult* result) {
391
924
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
924
    UnsignedT val = 0;
393
924
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
924
    bool negative = false;
395
924
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
924
    int i = 0;
400
924
    switch (*s) {
401
203
    case '-':
402
203
        negative = true;
403
203
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
203
        [[fallthrough]];
405
252
    case '+':
406
252
        i = 1;
407
924
    }
408
409
924
    const T max_div_base = max_val / base;
410
924
    const T max_mod_base = max_val % base;
411
412
924
    int first = i;
413
2.59k
    for (; i < len; ++i) {
414
2.54k
        T digit;
415
2.54k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
1.68k
            digit = s[i] - '0';
417
1.68k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
98
            digit = (s[i] - 'a' + 10);
419
756
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
0
            digit = (s[i] - 'A' + 10);
421
756
        } else {
422
756
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
420
                *result = PARSE_FAILURE;
426
420
                return 0;
427
420
            }
428
            // skip trailing whitespace.
429
336
            break;
430
756
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
1.78k
        if (digit >= base) {
434
0
            break;
435
0
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
1.78k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
112
            *result = PARSE_OVERFLOW;
440
112
            return static_cast<T>(negative ? -max_val : max_val);
441
112
        }
442
1.67k
        val = val * base + digit;
443
1.67k
    }
444
392
    *result = PARSE_SUCCESS;
445
392
    return static_cast<T>(negative ? -val : val);
446
924
}
_ZN5doris12StringParser22string_to_int_internalIiEET_PKcliPNS0_11ParseResultE
Line
Count
Source
390
833
                                       ParseResult* result) {
391
833
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
833
    UnsignedT val = 0;
393
833
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
833
    bool negative = false;
395
833
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
833
    int i = 0;
400
833
    switch (*s) {
401
154
    case '-':
402
154
        negative = true;
403
154
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
154
        [[fallthrough]];
405
252
    case '+':
406
252
        i = 1;
407
833
    }
408
409
833
    const T max_div_base = max_val / base;
410
833
    const T max_mod_base = max_val % base;
411
412
833
    int first = i;
413
3.55k
    for (; i < len; ++i) {
414
3.50k
        T digit;
415
3.50k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
2.83k
            digit = s[i] - '0';
417
2.83k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
0
            digit = (s[i] - 'a' + 10);
419
672
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
0
            digit = (s[i] - 'A' + 10);
421
672
        } else {
422
672
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
378
                *result = PARSE_FAILURE;
426
378
                return 0;
427
378
            }
428
            // skip trailing whitespace.
429
294
            break;
430
672
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
2.83k
        if (digit >= base) {
434
0
            break;
435
0
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
2.83k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
112
            *result = PARSE_OVERFLOW;
440
112
            return static_cast<T>(negative ? -max_val : max_val);
441
112
        }
442
2.72k
        val = val * base + digit;
443
2.72k
    }
444
343
    *result = PARSE_SUCCESS;
445
343
    return static_cast<T>(negative ? -val : val);
446
833
}
_ZN5doris12StringParser22string_to_int_internalIlEET_PKcliPNS0_11ParseResultE
Line
Count
Source
390
833
                                       ParseResult* result) {
391
833
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
833
    UnsignedT val = 0;
393
833
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
833
    bool negative = false;
395
833
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
833
    int i = 0;
400
833
    switch (*s) {
401
203
    case '-':
402
203
        negative = true;
403
203
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
203
        [[fallthrough]];
405
252
    case '+':
406
252
        i = 1;
407
833
    }
408
409
833
    const T max_div_base = max_val / base;
410
833
    const T max_mod_base = max_val % base;
411
412
833
    int first = i;
413
5.74k
    for (; i < len; ++i) {
414
5.69k
        T digit;
415
5.69k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
5.01k
            digit = s[i] - '0';
417
5.01k
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
0
            digit = (s[i] - 'a' + 10);
419
672
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
0
            digit = (s[i] - 'A' + 10);
421
672
        } else {
422
672
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
378
                *result = PARSE_FAILURE;
426
378
                return 0;
427
378
            }
428
            // skip trailing whitespace.
429
294
            break;
430
672
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
5.01k
        if (digit >= base) {
434
0
            break;
435
0
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
5.01k
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
112
            *result = PARSE_OVERFLOW;
440
112
            return static_cast<T>(negative ? -max_val : max_val);
441
112
        }
442
4.90k
        val = val * base + digit;
443
4.90k
    }
444
343
    *result = PARSE_SUCCESS;
445
343
    return static_cast<T>(negative ? -val : val);
446
833
}
_ZN5doris12StringParser22string_to_int_internalImEET_PKcliPNS0_11ParseResultE
Line
Count
Source
390
1
                                       ParseResult* result) {
391
1
    typedef typename std::make_unsigned<T>::type UnsignedT;
392
1
    UnsignedT val = 0;
393
1
    UnsignedT max_val = StringParser::numeric_limits<T>(false);
394
1
    bool negative = false;
395
1
    if (UNLIKELY(len <= 0)) {
396
0
        *result = PARSE_FAILURE;
397
0
        return 0;
398
0
    }
399
1
    int i = 0;
400
1
    switch (*s) {
401
0
    case '-':
402
0
        negative = true;
403
0
        max_val = StringParser::numeric_limits<T>(false) + 1;
404
0
        [[fallthrough]];
405
0
    case '+':
406
0
        i = 1;
407
1
    }
408
409
1
    const T max_div_base = max_val / base;
410
1
    const T max_mod_base = max_val % base;
411
412
1
    int first = i;
413
3
    for (; i < len; ++i) {
414
2
        T digit;
415
2
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
416
0
            digit = s[i] - '0';
417
2
        } else if (s[i] >= 'a' && s[i] <= 'z') {
418
2
            digit = (s[i] - 'a' + 10);
419
2
        } else if (s[i] >= 'A' && s[i] <= 'Z') {
420
0
            digit = (s[i] - 'A' + 10);
421
0
        } else {
422
0
            if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
423
                // Reject the string because either the first char was not an alpha/digit,
424
                // or the remaining chars are not all whitespace
425
0
                *result = PARSE_FAILURE;
426
0
                return 0;
427
0
            }
428
            // skip trailing whitespace.
429
0
            break;
430
0
        }
431
432
        // Bail, if we encounter a digit that is not available in base.
433
2
        if (digit >= base) {
434
0
            break;
435
0
        }
436
437
        // This is a tricky check to see if adding this digit will cause an overflow.
438
2
        if (UNLIKELY(val > (max_div_base - (digit > max_mod_base)))) {
439
0
            *result = PARSE_OVERFLOW;
440
0
            return static_cast<T>(negative ? -max_val : max_val);
441
0
        }
442
2
        val = val * base + digit;
443
2
    }
444
1
    *result = PARSE_SUCCESS;
445
1
    return static_cast<T>(negative ? -val : val);
446
1
}
447
448
template <typename T>
449
328k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
328k
    T val = 0;
451
328k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
328k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
324k
        val = s[0] - '0';
458
324k
    } else {
459
4.26k
        *result = PARSE_FAILURE;
460
4.26k
        return 0;
461
4.26k
    }
462
546k
    for (int i = 1; i < len; ++i) {
463
223k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
221k
            T digit = s[i] - '0';
465
221k
            val = val * 10 + digit;
466
221k
        } else {
467
1.59k
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
1.59k
                          !is_float_suffix(s + i, len - i)))) {
469
194
                *result = PARSE_FAILURE;
470
194
                return 0;
471
194
            }
472
1.40k
            *result = PARSE_SUCCESS;
473
1.40k
            return val;
474
1.59k
        }
475
223k
    }
476
322k
    *result = PARSE_SUCCESS;
477
322k
    return val;
478
324k
}
_ZN5doris12StringParser25string_to_int_no_overflowIhEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
144k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
144k
    T val = 0;
451
144k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
144k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
143k
        val = s[0] - '0';
458
143k
    } else {
459
492
        *result = PARSE_FAILURE;
460
492
        return 0;
461
492
    }
462
248k
    for (int i = 1; i < len; ++i) {
463
104k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
104k
            T digit = s[i] - '0';
465
104k
            val = val * 10 + digit;
466
104k
        } else {
467
0
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
0
                          !is_float_suffix(s + i, len - i)))) {
469
0
                *result = PARSE_FAILURE;
470
0
                return 0;
471
0
            }
472
0
            *result = PARSE_SUCCESS;
473
0
            return val;
474
0
        }
475
104k
    }
476
143k
    *result = PARSE_SUCCESS;
477
143k
    return val;
478
143k
}
_ZN5doris12StringParser25string_to_int_no_overflowImEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
34.2k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
34.2k
    T val = 0;
451
34.2k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
34.2k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
33.1k
        val = s[0] - '0';
458
33.1k
    } else {
459
1.06k
        *result = PARSE_FAILURE;
460
1.06k
        return 0;
461
1.06k
    }
462
73.6k
    for (int i = 1; i < len; ++i) {
463
40.8k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
40.4k
            T digit = s[i] - '0';
465
40.4k
            val = val * 10 + digit;
466
40.4k
        } else {
467
359
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
359
                          !is_float_suffix(s + i, len - i)))) {
469
64
                *result = PARSE_FAILURE;
470
64
                return 0;
471
64
            }
472
295
            *result = PARSE_SUCCESS;
473
295
            return val;
474
359
        }
475
40.8k
    }
476
32.7k
    *result = PARSE_SUCCESS;
477
32.7k
    return val;
478
33.1k
}
_ZN5doris12StringParser25string_to_int_no_overflowItEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
53.4k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
53.4k
    T val = 0;
451
53.4k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
53.4k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
52.7k
        val = s[0] - '0';
458
52.7k
    } else {
459
761
        *result = PARSE_FAILURE;
460
761
        return 0;
461
761
    }
462
76.7k
    for (int i = 1; i < len; ++i) {
463
24.9k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
24.0k
            T digit = s[i] - '0';
465
24.0k
            val = val * 10 + digit;
466
24.0k
        } else {
467
950
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
950
                          !is_float_suffix(s + i, len - i)))) {
469
52
                *result = PARSE_FAILURE;
470
52
                return 0;
471
52
            }
472
898
            *result = PARSE_SUCCESS;
473
898
            return val;
474
950
        }
475
24.9k
    }
476
51.7k
    *result = PARSE_SUCCESS;
477
51.7k
    return val;
478
52.7k
}
_ZN5doris12StringParser25string_to_int_no_overflowIjEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
53.7k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
53.7k
    T val = 0;
451
53.7k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
53.7k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
52.5k
        val = s[0] - '0';
458
52.5k
    } else {
459
1.26k
        *result = PARSE_FAILURE;
460
1.26k
        return 0;
461
1.26k
    }
462
88.7k
    for (int i = 1; i < len; ++i) {
463
36.5k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
36.2k
            T digit = s[i] - '0';
465
36.2k
            val = val * 10 + digit;
466
36.2k
        } else {
467
254
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
254
                          !is_float_suffix(s + i, len - i)))) {
469
44
                *result = PARSE_FAILURE;
470
44
                return 0;
471
44
            }
472
210
            *result = PARSE_SUCCESS;
473
210
            return val;
474
254
        }
475
36.5k
    }
476
52.2k
    *result = PARSE_SUCCESS;
477
52.2k
    return val;
478
52.5k
}
_ZN5doris12StringParser25string_to_int_no_overflowIoEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
43.0k
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
43.0k
    T val = 0;
451
43.0k
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
43.0k
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
42.3k
        val = s[0] - '0';
458
42.3k
    } else {
459
674
        *result = PARSE_FAILURE;
460
674
        return 0;
461
674
    }
462
58.7k
    for (int i = 1; i < len; ++i) {
463
16.4k
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
16.4k
            T digit = s[i] - '0';
465
16.4k
            val = val * 10 + digit;
466
16.4k
        } else {
467
34
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
34
                          !is_float_suffix(s + i, len - i)))) {
469
34
                *result = PARSE_FAILURE;
470
34
                return 0;
471
34
            }
472
0
            *result = PARSE_SUCCESS;
473
0
            return val;
474
34
        }
475
16.4k
    }
476
42.3k
    *result = PARSE_SUCCESS;
477
42.3k
    return val;
478
42.3k
}
_ZN5doris12StringParser25string_to_int_no_overflowIN4wide7integerILm256EjEEEET_PKciPNS0_11ParseResultE
Line
Count
Source
449
4
T StringParser::string_to_int_no_overflow(const char* __restrict s, int len, ParseResult* result) {
450
4
    T val = 0;
451
4
    if (UNLIKELY(len == 0)) {
452
0
        *result = PARSE_SUCCESS;
453
0
        return val;
454
0
    }
455
    // Factor out the first char for error handling speeds up the loop.
456
4
    if (LIKELY(s[0] >= '0' && s[0] <= '9')) {
457
4
        val = s[0] - '0';
458
4
    } else {
459
0
        *result = PARSE_FAILURE;
460
0
        return 0;
461
0
    }
462
4
    for (int i = 1; i < len; ++i) {
463
0
        if (LIKELY(s[i] >= '0' && s[i] <= '9')) {
464
0
            T digit = s[i] - '0';
465
0
            val = val * 10 + digit;
466
0
        } else {
467
0
            if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
468
0
                          !is_float_suffix(s + i, len - i)))) {
469
0
                *result = PARSE_FAILURE;
470
0
                return 0;
471
0
            }
472
0
            *result = PARSE_SUCCESS;
473
0
            return val;
474
0
        }
475
0
    }
476
4
    *result = PARSE_SUCCESS;
477
4
    return val;
478
4
}
479
480
template <typename T>
481
137k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
482
137k
    int i = 0;
483
    // skip leading spaces
484
180k
    for (; i < len; ++i) {
485
180k
        if (!is_whitespace(s[i])) {
486
137k
            break;
487
137k
        }
488
180k
    }
489
490
    // skip back spaces
491
137k
    int j = len - 1;
492
178k
    for (; j >= i; j--) {
493
178k
        if (!is_whitespace(s[j])) {
494
137k
            break;
495
137k
        }
496
178k
    }
497
498
    // skip leading '+', from_chars can handle '-'
499
137k
    if (i < len && s[i] == '+') {
500
5.29k
        i++;
501
5.29k
    }
502
137k
    if (UNLIKELY(i > j)) {
503
3
        *result = PARSE_FAILURE;
504
3
        return 0;
505
3
    }
506
507
    // Use double here to not lose precision while accumulating the result
508
137k
    double val = 0;
509
137k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
510
511
137k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
512
132k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
513
898
            auto contain_inf = false;
514
1.29k
            for (int k = i; k < j + 1; k++) {
515
1.29k
                if (s[k] == 'i' || s[k] == 'I') {
516
894
                    contain_inf = true;
517
894
                    break;
518
894
                }
519
1.29k
            }
520
521
898
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
522
132k
        } else {
523
132k
            *result = PARSE_SUCCESS;
524
132k
        }
525
132k
        return val;
526
132k
    } else {
527
4.74k
        *result = PARSE_FAILURE;
528
4.74k
    }
529
4.74k
    return 0;
530
137k
}
_ZN5doris12StringParser24string_to_float_internalIdEET_PKciPNS0_11ParseResultE
Line
Count
Source
481
73.8k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
482
73.8k
    int i = 0;
483
    // skip leading spaces
484
94.9k
    for (; i < len; ++i) {
485
94.9k
        if (!is_whitespace(s[i])) {
486
73.8k
            break;
487
73.8k
        }
488
94.9k
    }
489
490
    // skip back spaces
491
73.8k
    int j = len - 1;
492
94.3k
    for (; j >= i; j--) {
493
94.3k
        if (!is_whitespace(s[j])) {
494
73.8k
            break;
495
73.8k
        }
496
94.3k
    }
497
498
    // skip leading '+', from_chars can handle '-'
499
73.8k
    if (i < len && s[i] == '+') {
500
2.64k
        i++;
501
2.64k
    }
502
73.8k
    if (UNLIKELY(i > j)) {
503
3
        *result = PARSE_FAILURE;
504
3
        return 0;
505
3
    }
506
507
    // Use double here to not lose precision while accumulating the result
508
73.8k
    double val = 0;
509
73.8k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
510
511
73.8k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
512
71.2k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
513
449
            auto contain_inf = false;
514
656
            for (int k = i; k < j + 1; k++) {
515
654
                if (s[k] == 'i' || s[k] == 'I') {
516
447
                    contain_inf = true;
517
447
                    break;
518
447
                }
519
654
            }
520
521
449
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
522
70.7k
        } else {
523
70.7k
            *result = PARSE_SUCCESS;
524
70.7k
        }
525
71.2k
        return val;
526
71.2k
    } else {
527
2.57k
        *result = PARSE_FAILURE;
528
2.57k
    }
529
2.57k
    return 0;
530
73.8k
}
_ZN5doris12StringParser24string_to_float_internalIfEET_PKciPNS0_11ParseResultE
Line
Count
Source
481
63.8k
T StringParser::string_to_float_internal(const char* __restrict s, int len, ParseResult* result) {
482
63.8k
    int i = 0;
483
    // skip leading spaces
484
85.0k
    for (; i < len; ++i) {
485
85.0k
        if (!is_whitespace(s[i])) {
486
63.8k
            break;
487
63.8k
        }
488
85.0k
    }
489
490
    // skip back spaces
491
63.8k
    int j = len - 1;
492
84.4k
    for (; j >= i; j--) {
493
84.4k
        if (!is_whitespace(s[j])) {
494
63.8k
            break;
495
63.8k
        }
496
84.4k
    }
497
498
    // skip leading '+', from_chars can handle '-'
499
63.8k
    if (i < len && s[i] == '+') {
500
2.64k
        i++;
501
2.64k
    }
502
63.8k
    if (UNLIKELY(i > j)) {
503
0
        *result = PARSE_FAILURE;
504
0
        return 0;
505
0
    }
506
507
    // Use double here to not lose precision while accumulating the result
508
63.8k
    double val = 0;
509
63.8k
    auto res = fast_float::from_chars(s + i, s + j + 1, val);
510
511
63.8k
    if (res.ec == std::errc() && res.ptr == s + j + 1) {
512
61.7k
        if (abs(val) == std::numeric_limits<T>::infinity()) {
513
449
            auto contain_inf = false;
514
638
            for (int k = i; k < j + 1; k++) {
515
636
                if (s[k] == 'i' || s[k] == 'I') {
516
447
                    contain_inf = true;
517
447
                    break;
518
447
                }
519
636
            }
520
521
449
            *result = contain_inf ? PARSE_SUCCESS : PARSE_OVERFLOW;
522
61.2k
        } else {
523
61.2k
            *result = PARSE_SUCCESS;
524
61.2k
        }
525
61.7k
        return val;
526
61.7k
    } else {
527
2.16k
        *result = PARSE_FAILURE;
528
2.16k
    }
529
2.16k
    return 0;
530
63.8k
}
531
532
inline bool StringParser::string_to_bool_internal(const char* __restrict s, int len,
533
9.34k
                                                  ParseResult* result) {
534
9.34k
    *result = PARSE_SUCCESS;
535
536
9.34k
    if (len >= 4 && (s[0] == 't' || s[0] == 'T')) {
537
3.76k
        bool match = (s[1] == 'r' || s[1] == 'R') && (s[2] == 'u' || s[2] == 'U') &&
538
3.76k
                     (s[3] == 'e' || s[3] == 'E');
539
3.76k
        if (match && LIKELY(is_all_whitespace(s + 4, len - 4))) {
540
3.65k
            return true;
541
3.65k
        }
542
5.57k
    } else if (len >= 5 && (s[0] == 'f' || s[0] == 'F')) {
543
3.83k
        bool match = (s[1] == 'a' || s[1] == 'A') && (s[2] == 'l' || s[2] == 'L') &&
544
3.83k
                     (s[3] == 's' || s[3] == 'S') && (s[4] == 'e' || s[4] == 'E');
545
3.83k
        if (match && LIKELY(is_all_whitespace(s + 5, len - 5))) {
546
3.71k
            return false;
547
3.71k
        }
548
3.83k
    }
549
550
1.97k
    *result = PARSE_FAILURE;
551
1.97k
    return false;
552
9.34k
}
553
554
template <PrimitiveType P, typename T, typename DecimalType>
555
T StringParser::string_to_decimal(const char* __restrict s, int len, int type_precision,
556
249k
                                  int type_scale, ParseResult* result) {
557
249k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
249k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
249k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
249k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
251k
    while (len > 0 && is_whitespace(*s)) {
569
2.44k
        ++s;
570
2.44k
        --len;
571
2.44k
    }
572
249k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
249k
    bool is_negative = false;
577
249k
    if (len > 0) {
578
249k
        switch (*s) {
579
37.5k
        case '-':
580
37.5k
            is_negative = true;
581
37.5k
            [[fallthrough]];
582
37.5k
        case '+':
583
37.5k
            ++s;
584
37.5k
            --len;
585
249k
        }
586
249k
    }
587
588
    // Ignore leading zeros.
589
249k
    bool found_value = false;
590
396k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
147k
        found_value = true;
592
147k
        ++s;
593
147k
        --len;
594
147k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
249k
    int scale = 0;
600
249k
    int found_dot = 0;
601
249k
    if (len > 0 && *s == '.') {
602
29.6k
        found_dot = 1;
603
29.6k
        ++s;
604
29.6k
        --len;
605
125k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
95.6k
            found_value = true;
607
95.6k
            ++scale;
608
95.6k
            ++s;
609
95.6k
            --len;
610
95.6k
        }
611
29.6k
    }
612
613
249k
    int precision = 0;
614
249k
    int max_digit = type_precision - type_scale;
615
249k
    int cur_digit = 0;
616
249k
    bool found_exponent = false;
617
249k
    int8_t exponent = 0;
618
249k
    T value = 0;
619
249k
    bool has_round = false;
620
4.38M
    for (int i = 0; i < len; ++i) {
621
4.21M
        const char& c = s[i];
622
4.21M
        if (LIKELY('0' <= c && c <= '9')) {
623
3.94M
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
3.94M
            if (LIKELY(type_precision > precision) && !has_round) {
629
3.94M
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
3.94M
                ++precision;
631
3.94M
                scale += found_dot;
632
3.94M
                cur_digit = precision - scale;
633
3.94M
            } else if (!found_dot && max_digit < (precision - scale)) {
634
438
                *result = StringParser::PARSE_OVERFLOW;
635
438
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
438
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
438
                return value;
638
438
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
24
                if (c > '4') {
641
8
                    value += 1;
642
8
                }
643
24
                has_round = true;
644
24
                continue;
645
24
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
3.94M
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
3.94M
        } else if (c == '.' && LIKELY(!found_dot)) {
650
189k
            found_dot = 1;
651
189k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
77.1k
            found_exponent = true;
653
77.1k
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
77.1k
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
10
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
10
                return 0;
659
10
            }
660
77.1k
            break;
661
77.1k
        } else {
662
344
            if (value == 0) {
663
271
                *result = StringParser::PARSE_FAILURE;
664
271
                return 0;
665
271
            }
666
            // here to handle
667
73
            *result = StringParser::PARSE_SUCCESS;
668
73
            if (type_scale >= scale) {
669
71
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
71
                if (!is_numeric_ascii(c)) {
674
71
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
71
                    return is_negative ? T(-value) : T(value);
683
71
                }
684
71
            }
685
686
2
            return is_negative ? T(-value) : T(value);
687
73
        }
688
4.21M
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
248k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
69.1k
        precision += exponent - scale;
695
696
69.1k
        value *= get_scale_multiplier<T>(exponent - scale);
697
69.1k
        scale = 0;
698
179k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
179k
        scale -= exponent;
703
179k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
248k
    if (scale > precision) {
707
14.2k
        precision = scale;
708
14.2k
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
13.5k
    *result = StringParser::PARSE_SUCCESS;
713
234k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
11.4k
        *result = StringParser::PARSE_OVERFLOW;
715
11.4k
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
11.4k
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
11.4k
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
11.4k
            return value;
720
11.4k
        }
721
236k
    } else if (UNLIKELY(scale > type_scale)) {
722
4.15k
        *result = StringParser::PARSE_UNDERFLOW;
723
4.15k
        int shift = scale - type_scale;
724
4.15k
        T divisor = get_scale_multiplier<T>(shift);
725
4.15k
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
4.15k
        } else {
728
4.15k
            T remainder = value % divisor;
729
4.15k
            value /= divisor;
730
4.15k
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
100
                value += 1;
732
100
            }
733
4.15k
        }
734
4.15k
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
232k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
1
        *result = StringParser::PARSE_FAILURE;
737
1
    }
738
739
237k
    if (type_scale > scale) {
740
173k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
173k
    }
742
743
237k
    return is_negative ? T(-value) : T(value);
744
234k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EiNS_10vectorized7DecimalIiEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
556
25.4k
                                  int type_scale, ParseResult* result) {
557
25.4k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
25.4k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
25.4k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
25.4k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
25.6k
    while (len > 0 && is_whitespace(*s)) {
569
220
        ++s;
570
220
        --len;
571
220
    }
572
25.4k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
25.4k
    bool is_negative = false;
577
25.4k
    if (len > 0) {
578
25.4k
        switch (*s) {
579
10.5k
        case '-':
580
10.5k
            is_negative = true;
581
10.5k
            [[fallthrough]];
582
10.5k
        case '+':
583
10.5k
            ++s;
584
10.5k
            --len;
585
25.4k
        }
586
25.4k
    }
587
588
    // Ignore leading zeros.
589
25.4k
    bool found_value = false;
590
29.1k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
3.73k
        found_value = true;
592
3.73k
        ++s;
593
3.73k
        --len;
594
3.73k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
25.4k
    int scale = 0;
600
25.4k
    int found_dot = 0;
601
25.4k
    if (len > 0 && *s == '.') {
602
2.80k
        found_dot = 1;
603
2.80k
        ++s;
604
2.80k
        --len;
605
3.27k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
470
            found_value = true;
607
470
            ++scale;
608
470
            ++s;
609
470
            --len;
610
470
        }
611
2.80k
    }
612
613
25.4k
    int precision = 0;
614
25.4k
    int max_digit = type_precision - type_scale;
615
25.4k
    int cur_digit = 0;
616
25.4k
    bool found_exponent = false;
617
25.4k
    int8_t exponent = 0;
618
25.4k
    T value = 0;
619
25.4k
    bool has_round = false;
620
76.5k
    for (int i = 0; i < len; ++i) {
621
51.6k
        const char& c = s[i];
622
51.6k
        if (LIKELY('0' <= c && c <= '9')) {
623
47.8k
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
47.8k
            if (LIKELY(type_precision > precision) && !has_round) {
629
47.3k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
47.3k
                ++precision;
631
47.3k
                scale += found_dot;
632
47.3k
                cur_digit = precision - scale;
633
47.3k
            } else if (!found_dot && max_digit < (precision - scale)) {
634
438
                *result = StringParser::PARSE_OVERFLOW;
635
438
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
438
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
438
                return value;
638
438
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
4
                if (c > '4') {
641
0
                    value += 1;
642
0
                }
643
4
                has_round = true;
644
4
                continue;
645
10
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
47.3k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
47.3k
        } else if (c == '.' && LIKELY(!found_dot)) {
650
3.76k
            found_dot = 1;
651
3.76k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
0
            found_exponent = true;
653
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
0
                return 0;
659
0
            }
660
0
            break;
661
92
        } else {
662
92
            if (value == 0) {
663
66
                *result = StringParser::PARSE_FAILURE;
664
66
                return 0;
665
66
            }
666
            // here to handle
667
26
            *result = StringParser::PARSE_SUCCESS;
668
26
            if (type_scale >= scale) {
669
26
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
26
                if (!is_numeric_ascii(c)) {
674
26
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
26
                    return is_negative ? T(-value) : T(value);
683
26
                }
684
26
            }
685
686
0
            return is_negative ? T(-value) : T(value);
687
26
        }
688
51.6k
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
24.8k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
0
        precision += exponent - scale;
695
696
0
        value *= get_scale_multiplier<T>(exponent - scale);
697
0
        scale = 0;
698
24.8k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
24.8k
        scale -= exponent;
703
24.8k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
24.8k
    if (scale > precision) {
707
354
        precision = scale;
708
354
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
24.8k
    *result = StringParser::PARSE_SUCCESS;
713
24.8k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
736
        *result = StringParser::PARSE_OVERFLOW;
715
736
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
736
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
736
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
736
            return value;
720
736
        }
721
24.1k
    } else if (UNLIKELY(scale > type_scale)) {
722
4
        *result = StringParser::PARSE_UNDERFLOW;
723
4
        int shift = scale - type_scale;
724
4
        T divisor = get_scale_multiplier<T>(shift);
725
4
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
4
        } else {
728
4
            T remainder = value % divisor;
729
4
            value /= divisor;
730
4
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
0
                value += 1;
732
0
            }
733
4
        }
734
4
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
24.1k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
0
        *result = StringParser::PARSE_FAILURE;
737
0
    }
738
739
24.1k
    if (type_scale > scale) {
740
21.6k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
21.6k
    }
742
743
24.1k
    return is_negative ? T(-value) : T(value);
744
24.8k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29ElNS_10vectorized7DecimalIlEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
556
60.8k
                                  int type_scale, ParseResult* result) {
557
60.8k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
60.8k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
60.8k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
60.8k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
61.4k
    while (len > 0 && is_whitespace(*s)) {
569
593
        ++s;
570
593
        --len;
571
593
    }
572
60.8k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
60.8k
    bool is_negative = false;
577
60.8k
    if (len > 0) {
578
60.8k
        switch (*s) {
579
8.26k
        case '-':
580
8.26k
            is_negative = true;
581
8.26k
            [[fallthrough]];
582
8.26k
        case '+':
583
8.26k
            ++s;
584
8.26k
            --len;
585
60.8k
        }
586
60.8k
    }
587
588
    // Ignore leading zeros.
589
60.8k
    bool found_value = false;
590
90.4k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
29.5k
        found_value = true;
592
29.5k
        ++s;
593
29.5k
        --len;
594
29.5k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
60.8k
    int scale = 0;
600
60.8k
    int found_dot = 0;
601
60.8k
    if (len > 0 && *s == '.') {
602
10.6k
        found_dot = 1;
603
10.6k
        ++s;
604
10.6k
        --len;
605
23.4k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
12.7k
            found_value = true;
607
12.7k
            ++scale;
608
12.7k
            ++s;
609
12.7k
            --len;
610
12.7k
        }
611
10.6k
    }
612
613
60.8k
    int precision = 0;
614
60.8k
    int max_digit = type_precision - type_scale;
615
60.8k
    int cur_digit = 0;
616
60.8k
    bool found_exponent = false;
617
60.8k
    int8_t exponent = 0;
618
60.8k
    T value = 0;
619
60.8k
    bool has_round = false;
620
779k
    for (int i = 0; i < len; ++i) {
621
721k
        const char& c = s[i];
622
721k
        if (LIKELY('0' <= c && c <= '9')) {
623
673k
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
673k
            if (LIKELY(type_precision > precision) && !has_round) {
629
673k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
673k
                ++precision;
631
673k
                scale += found_dot;
632
673k
                cur_digit = precision - scale;
633
673k
            } else if (!found_dot && max_digit < (precision - scale)) {
634
0
                *result = StringParser::PARSE_OVERFLOW;
635
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
0
                return value;
638
10
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
4
                if (c > '4') {
641
4
                    value += 1;
642
4
                }
643
4
                has_round = true;
644
4
                continue;
645
6
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
673k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
673k
        } else if (c == '.' && LIKELY(!found_dot)) {
650
45.4k
            found_dot = 1;
651
45.4k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
2.47k
            found_exponent = true;
653
2.47k
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
2.47k
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
0
                return 0;
659
0
            }
660
2.47k
            break;
661
2.47k
        } else {
662
91
            if (value == 0) {
663
68
                *result = StringParser::PARSE_FAILURE;
664
68
                return 0;
665
68
            }
666
            // here to handle
667
23
            *result = StringParser::PARSE_SUCCESS;
668
23
            if (type_scale >= scale) {
669
22
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
22
                if (!is_numeric_ascii(c)) {
674
22
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
22
                    return is_negative ? T(-value) : T(value);
683
22
                }
684
22
            }
685
686
1
            return is_negative ? T(-value) : T(value);
687
23
        }
688
721k
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
60.7k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
1
        precision += exponent - scale;
695
696
1
        value *= get_scale_multiplier<T>(exponent - scale);
697
1
        scale = 0;
698
60.7k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
60.7k
        scale -= exponent;
703
60.7k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
60.7k
    if (scale > precision) {
707
5.31k
        precision = scale;
708
5.31k
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
60.7k
    *result = StringParser::PARSE_SUCCESS;
713
60.7k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
10.7k
        *result = StringParser::PARSE_OVERFLOW;
715
10.7k
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
10.7k
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
10.7k
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
10.7k
            return value;
720
10.7k
        }
721
50.0k
    } else if (UNLIKELY(scale > type_scale)) {
722
1.15k
        *result = StringParser::PARSE_UNDERFLOW;
723
1.15k
        int shift = scale - type_scale;
724
1.15k
        T divisor = get_scale_multiplier<T>(shift);
725
1.15k
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
1.15k
        } else {
728
1.15k
            T remainder = value % divisor;
729
1.15k
            value /= divisor;
730
1.15k
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
0
                value += 1;
732
0
            }
733
1.15k
        }
734
1.15k
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
48.9k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
1
        *result = StringParser::PARSE_FAILURE;
737
1
    }
738
739
50.0k
    if (type_scale > scale) {
740
25.5k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
25.5k
    }
742
743
50.0k
    return is_negative ? T(-value) : T(value);
744
60.7k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EnNS_10vectorized12Decimal128V3EEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
556
57.1k
                                  int type_scale, ParseResult* result) {
557
57.1k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
57.1k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
57.1k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
57.1k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
57.6k
    while (len > 0 && is_whitespace(*s)) {
569
503
        ++s;
570
503
        --len;
571
503
    }
572
57.1k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
57.1k
    bool is_negative = false;
577
57.1k
    if (len > 0) {
578
57.1k
        switch (*s) {
579
8.26k
        case '-':
580
8.26k
            is_negative = true;
581
8.26k
            [[fallthrough]];
582
8.26k
        case '+':
583
8.26k
            ++s;
584
8.26k
            --len;
585
57.1k
        }
586
57.1k
    }
587
588
    // Ignore leading zeros.
589
57.1k
    bool found_value = false;
590
85.3k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
28.2k
        found_value = true;
592
28.2k
        ++s;
593
28.2k
        --len;
594
28.2k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
57.1k
    int scale = 0;
600
57.1k
    int found_dot = 0;
601
57.1k
    if (len > 0 && *s == '.') {
602
11.7k
        found_dot = 1;
603
11.7k
        ++s;
604
11.7k
        --len;
605
45.0k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
33.2k
            found_value = true;
607
33.2k
            ++scale;
608
33.2k
            ++s;
609
33.2k
            --len;
610
33.2k
        }
611
11.7k
    }
612
613
57.1k
    int precision = 0;
614
57.1k
    int max_digit = type_precision - type_scale;
615
57.1k
    int cur_digit = 0;
616
57.1k
    bool found_exponent = false;
617
57.1k
    int8_t exponent = 0;
618
57.1k
    T value = 0;
619
57.1k
    bool has_round = false;
620
1.04M
    for (int i = 0; i < len; ++i) {
621
995k
        const char& c = s[i];
622
995k
        if (LIKELY('0' <= c && c <= '9')) {
623
950k
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
950k
            if (LIKELY(type_precision > precision) && !has_round) {
629
950k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
950k
                ++precision;
631
950k
                scale += found_dot;
632
950k
                cur_digit = precision - scale;
633
950k
            } else if (!found_dot && max_digit < (precision - scale)) {
634
0
                *result = StringParser::PARSE_OVERFLOW;
635
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
0
                return value;
638
16
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
8
                if (c > '4') {
641
4
                    value += 1;
642
4
                }
643
8
                has_round = true;
644
8
                continue;
645
8
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
950k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
950k
        } else if (c == '.' && LIKELY(!found_dot)) {
650
41.4k
            found_dot = 1;
651
41.4k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
3.30k
            found_exponent = true;
653
3.30k
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
3.30k
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
0
                return 0;
659
0
            }
660
3.30k
            break;
661
3.30k
        } else {
662
65
            if (value == 0) {
663
53
                *result = StringParser::PARSE_FAILURE;
664
53
                return 0;
665
53
            }
666
            // here to handle
667
12
            *result = StringParser::PARSE_SUCCESS;
668
12
            if (type_scale >= scale) {
669
11
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
11
                if (!is_numeric_ascii(c)) {
674
11
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
11
                    return is_negative ? T(-value) : T(value);
683
11
                }
684
11
            }
685
686
1
            return is_negative ? T(-value) : T(value);
687
12
        }
688
995k
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
57.0k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
0
        precision += exponent - scale;
695
696
0
        value *= get_scale_multiplier<T>(exponent - scale);
697
0
        scale = 0;
698
57.0k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
57.0k
        scale -= exponent;
703
57.0k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
57.0k
    if (scale > precision) {
707
6.66k
        precision = scale;
708
6.66k
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
57.0k
    *result = StringParser::PARSE_SUCCESS;
713
57.0k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
2
        *result = StringParser::PARSE_OVERFLOW;
715
2
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
2
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
2
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
2
            return value;
720
2
        }
721
57.0k
    } else if (UNLIKELY(scale > type_scale)) {
722
2.88k
        *result = StringParser::PARSE_UNDERFLOW;
723
2.88k
        int shift = scale - type_scale;
724
2.88k
        T divisor = get_scale_multiplier<T>(shift);
725
2.88k
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
2.88k
        } else {
728
2.88k
            T remainder = value % divisor;
729
2.88k
            value /= divisor;
730
2.88k
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
83
                value += 1;
732
83
            }
733
2.88k
        }
734
2.88k
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
54.1k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
0
        *result = StringParser::PARSE_FAILURE;
737
0
    }
738
739
57.0k
    if (type_scale > scale) {
740
39.4k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
39.4k
    }
742
743
57.0k
    return is_negative ? T(-value) : T(value);
744
57.0k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EN4wide7integerILm256EiEENS_10vectorized7DecimalIS5_EEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
556
92.2k
                                  int type_scale, ParseResult* result) {
557
92.2k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
92.2k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
92.2k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
92.2k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
93.4k
    while (len > 0 && is_whitespace(*s)) {
569
1.12k
        ++s;
570
1.12k
        --len;
571
1.12k
    }
572
92.2k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
92.2k
    bool is_negative = false;
577
92.2k
    if (len > 0) {
578
92.2k
        switch (*s) {
579
3.87k
        case '-':
580
3.87k
            is_negative = true;
581
3.87k
            [[fallthrough]];
582
3.87k
        case '+':
583
3.87k
            ++s;
584
3.87k
            --len;
585
92.2k
        }
586
92.2k
    }
587
588
    // Ignore leading zeros.
589
92.2k
    bool found_value = false;
590
139k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
47.3k
        found_value = true;
592
47.3k
        ++s;
593
47.3k
        --len;
594
47.3k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
92.2k
    int scale = 0;
600
92.2k
    int found_dot = 0;
601
92.2k
    if (len > 0 && *s == '.') {
602
2.43k
        found_dot = 1;
603
2.43k
        ++s;
604
2.43k
        --len;
605
47.0k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
44.6k
            found_value = true;
607
44.6k
            ++scale;
608
44.6k
            ++s;
609
44.6k
            --len;
610
44.6k
        }
611
2.43k
    }
612
613
92.2k
    int precision = 0;
614
92.2k
    int max_digit = type_precision - type_scale;
615
92.2k
    int cur_digit = 0;
616
92.2k
    bool found_exponent = false;
617
92.2k
    int8_t exponent = 0;
618
92.2k
    T value = 0;
619
92.2k
    bool has_round = false;
620
2.20M
    for (int i = 0; i < len; ++i) {
621
2.18M
        const char& c = s[i];
622
2.18M
        if (LIKELY('0' <= c && c <= '9')) {
623
2.02M
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
2.02M
            if (LIKELY(type_precision > precision) && !has_round) {
629
2.02M
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
2.02M
                ++precision;
631
2.02M
                scale += found_dot;
632
2.02M
                cur_digit = precision - scale;
633
2.02M
            } else if (!found_dot && max_digit < (precision - scale)) {
634
0
                *result = StringParser::PARSE_OVERFLOW;
635
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
0
                return value;
638
0
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
0
                if (c > '4') {
641
0
                    value += 1;
642
0
                }
643
0
                has_round = true;
644
0
                continue;
645
0
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
2.02M
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
2.02M
        } else if (c == '.' && LIKELY(!found_dot)) {
650
87.5k
            found_dot = 1;
651
87.5k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
71.4k
            found_exponent = true;
653
71.4k
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
71.4k
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
10
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
10
                return 0;
659
10
            }
660
71.3k
            break;
661
71.4k
        } else {
662
80
            if (value == 0) {
663
74
                *result = StringParser::PARSE_FAILURE;
664
74
                return 0;
665
74
            }
666
            // here to handle
667
6
            *result = StringParser::PARSE_SUCCESS;
668
6
            if (type_scale >= scale) {
669
6
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
6
                if (!is_numeric_ascii(c)) {
674
6
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
6
                    return is_negative ? T(-value) : T(value);
683
6
                }
684
6
            }
685
686
0
            return is_negative ? T(-value) : T(value);
687
6
        }
688
2.18M
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
92.2k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
69.1k
        precision += exponent - scale;
695
696
69.1k
        value *= get_scale_multiplier<T>(exponent - scale);
697
69.1k
        scale = 0;
698
69.1k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
23.0k
        scale -= exponent;
703
23.0k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
92.2k
    if (scale > precision) {
707
1.26k
        precision = scale;
708
1.26k
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
92.2k
    *result = StringParser::PARSE_SUCCESS;
713
92.2k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
0
        *result = StringParser::PARSE_OVERFLOW;
715
0
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
0
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
0
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
0
            return value;
720
0
        }
721
92.2k
    } else if (UNLIKELY(scale > type_scale)) {
722
96
        *result = StringParser::PARSE_UNDERFLOW;
723
96
        int shift = scale - type_scale;
724
96
        T divisor = get_scale_multiplier<T>(shift);
725
96
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
96
        } else {
728
96
            T remainder = value % divisor;
729
96
            value /= divisor;
730
96
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
0
                value += 1;
732
0
            }
733
96
        }
734
96
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
92.1k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
0
        *result = StringParser::PARSE_FAILURE;
737
0
    }
738
739
92.2k
    if (type_scale > scale) {
740
85.1k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
85.1k
    }
742
743
92.2k
    return is_negative ? T(-value) : T(value);
744
92.2k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EnNS_10vectorized7DecimalInEEEET0_PKciiiPNS0_11ParseResultE
Line
Count
Source
556
13.5k
                                  int type_scale, ParseResult* result) {
557
13.5k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
558
13.5k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
559
13.5k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
560
13.5k
                  "wide::Int256.");
561
    // Special cases:
562
    //   1) '' == Fail, an empty string fails to parse.
563
    //   2) '   #   ' == #, leading and trailing white space is ignored.
564
    //   3) '.' == 0, a single dot parses as zero (for consistency with other types).
565
    //   4) '#.' == '#', a trailing dot is ignored.
566
567
    // Ignore leading and trailing spaces.
568
13.5k
    while (len > 0 && is_whitespace(*s)) {
569
0
        ++s;
570
0
        --len;
571
0
    }
572
13.5k
    while (len > 0 && is_whitespace(s[len - 1])) {
573
0
        --len;
574
0
    }
575
576
13.5k
    bool is_negative = false;
577
13.5k
    if (len > 0) {
578
13.5k
        switch (*s) {
579
6.68k
        case '-':
580
6.68k
            is_negative = true;
581
6.68k
            [[fallthrough]];
582
6.68k
        case '+':
583
6.68k
            ++s;
584
6.68k
            --len;
585
13.5k
        }
586
13.5k
    }
587
588
    // Ignore leading zeros.
589
13.5k
    bool found_value = false;
590
52.3k
    while (len > 0 && UNLIKELY(*s == '0')) {
591
38.8k
        found_value = true;
592
38.8k
        ++s;
593
38.8k
        --len;
594
38.8k
    }
595
596
    // Ignore leading zeros even after a dot. This allows for differentiating between
597
    // cases like 0.01e2, which would fit in a DECIMAL(1, 0), and 0.10e2, which would
598
    // overflow.
599
13.5k
    int scale = 0;
600
13.5k
    int found_dot = 0;
601
13.5k
    if (len > 0 && *s == '.') {
602
2.00k
        found_dot = 1;
603
2.00k
        ++s;
604
2.00k
        --len;
605
6.43k
        while (len > 0 && UNLIKELY(*s == '0')) {
606
4.42k
            found_value = true;
607
4.42k
            ++scale;
608
4.42k
            ++s;
609
4.42k
            --len;
610
4.42k
        }
611
2.00k
    }
612
613
13.5k
    int precision = 0;
614
13.5k
    int max_digit = type_precision - type_scale;
615
13.5k
    int cur_digit = 0;
616
13.5k
    bool found_exponent = false;
617
13.5k
    int8_t exponent = 0;
618
13.5k
    T value = 0;
619
13.5k
    bool has_round = false;
620
275k
    for (int i = 0; i < len; ++i) {
621
261k
        const char& c = s[i];
622
261k
        if (LIKELY('0' <= c && c <= '9')) {
623
250k
            found_value = true;
624
            // Ignore digits once the type's precision limit is reached. This avoids
625
            // overflowing the underlying storage while handling a string like
626
            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored digits and
627
            // an exponent will be made later.
628
250k
            if (LIKELY(type_precision > precision) && !has_round) {
629
250k
                value = (value * 10) + (c - '0'); // Benchmarks are faster with parenthesis...
630
250k
                ++precision;
631
250k
                scale += found_dot;
632
250k
                cur_digit = precision - scale;
633
250k
            } else if (!found_dot && max_digit < (precision - scale)) {
634
0
                *result = StringParser::PARSE_OVERFLOW;
635
0
                value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
636
0
                                    : vectorized::max_decimal_value<DecimalType>(type_precision);
637
0
                return value;
638
8
            } else if (found_dot && scale >= type_scale && !has_round) {
639
                // make rounding cases
640
8
                if (c > '4') {
641
0
                    value += 1;
642
0
                }
643
8
                has_round = true;
644
8
                continue;
645
8
            } else if (!found_dot) {
646
0
                ++cur_digit;
647
0
            }
648
250k
            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work with __int128.
649
250k
        } else if (c == '.' && LIKELY(!found_dot)) {
650
11.4k
            found_dot = 1;
651
11.4k
        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
652
0
            found_exponent = true;
653
0
            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, result);
654
0
            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
655
0
                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
656
0
                    *result = StringParser::PARSE_UNDERFLOW;
657
0
                }
658
0
                return 0;
659
0
            }
660
0
            break;
661
16
        } else {
662
16
            if (value == 0) {
663
10
                *result = StringParser::PARSE_FAILURE;
664
10
                return 0;
665
10
            }
666
            // here to handle
667
6
            *result = StringParser::PARSE_SUCCESS;
668
6
            if (type_scale >= scale) {
669
6
                value *= get_scale_multiplier<T>(type_scale - scale);
670
                // here meet non-valid character, should return the value, keep going to meet
671
                // the E/e character because we make right user-given type_precision
672
                // not max number type_precision
673
6
                if (!is_numeric_ascii(c)) {
674
6
                    if (cur_digit > type_precision) {
675
0
                        *result = StringParser::PARSE_OVERFLOW;
676
0
                        value = is_negative
677
0
                                        ? vectorized::min_decimal_value<DecimalType>(type_precision)
678
0
                                        : vectorized::max_decimal_value<DecimalType>(
679
0
                                                  type_precision);
680
0
                        return value;
681
0
                    }
682
6
                    return is_negative ? T(-value) : T(value);
683
6
                }
684
6
            }
685
686
0
            return is_negative ? T(-value) : T(value);
687
6
        }
688
261k
    }
689
690
    // Find the number of truncated digits before adjusting the precision for an exponent.
691
13.5k
    if (exponent > scale) {
692
        // Ex: 0.1e3 (which at this point would have precision == 1 and scale == 1), the
693
        //     scale must be set to 0 and the value set to 100 which means a precision of 3.
694
0
        precision += exponent - scale;
695
696
0
        value *= get_scale_multiplier<T>(exponent - scale);
697
0
        scale = 0;
698
13.5k
    } else {
699
        // Ex: 100e-4, the scale must be set to 4 but no adjustment to the value is needed,
700
        //     the precision must also be set to 4 but that will be done below for the
701
        //     non-exponent case anyways.
702
13.5k
        scale -= exponent;
703
13.5k
    }
704
    // Ex: 0.001, at this point would have precision 1 and scale 3 since leading zeros
705
    //     were ignored during previous parsing.
706
13.5k
    if (scale > precision) {
707
675
        precision = scale;
708
675
    }
709
710
    // Microbenchmarks show that beyond this point, returning on parse failure is slower
711
    // than just letting the function run out.
712
13.5k
    *result = StringParser::PARSE_SUCCESS;
713
13.5k
    if (UNLIKELY(precision - scale > type_precision - type_scale)) {
714
9
        *result = StringParser::PARSE_OVERFLOW;
715
9
        if constexpr (TYPE_DECIMALV2 != P) {
716
            // decimalv3 overflow will return max min value for type precision
717
9
            value = is_negative ? vectorized::min_decimal_value<DecimalType>(type_precision)
718
9
                                : vectorized::max_decimal_value<DecimalType>(type_precision);
719
9
            return value;
720
9
        }
721
13.5k
    } else if (UNLIKELY(scale > type_scale)) {
722
17
        *result = StringParser::PARSE_UNDERFLOW;
723
17
        int shift = scale - type_scale;
724
17
        T divisor = get_scale_multiplier<T>(shift);
725
17
        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
726
0
            value = 0;
727
17
        } else {
728
17
            T remainder = value % divisor;
729
17
            value /= divisor;
730
17
            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 1)) {
731
17
                value += 1;
732
17
            }
733
17
        }
734
17
        DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
735
13.5k
    } else if (UNLIKELY(!found_value && !found_dot)) {
736
0
        *result = StringParser::PARSE_FAILURE;
737
0
    }
738
739
13.5k
    if (type_scale > scale) {
740
1.95k
        value *= get_scale_multiplier<T>(type_scale - scale);
741
1.95k
    }
742
743
13.5k
    return is_negative ? T(-value) : T(value);
744
13.5k
}
745
746
} // end namespace doris