Coverage Report

Created: 2026-03-12 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/string_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/string_parser.hpp"
19
20
#include <limits>
21
22
#include "core/extended_types.h"
23
#include "core/types.h"
24
namespace doris {
25
#include "common/compile_check_avoid_begin.h"
26
// Supported decimal number format:
27
// <decimal> ::= <whitespace>* <value> <whitespace>*
28
//
29
// <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v"
30
//
31
// <value> ::= <sign>? <significand> <exponent>?
32
//
33
// <sign> ::= "+" | "-"
34
//
35
// <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits>
36
//
37
// <digits> ::= <digit>+
38
//
39
// <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
40
//
41
// <exponent> ::= <e_marker> <sign>? <digits>
42
//
43
// <e_marker> ::= "e" | "E"
44
template <PrimitiveType P>
45
typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal(
46
        const char* __restrict s, size_t len, int type_precision, int type_scale,
47
3.81M
        ParseResult* result) {
48
3.81M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
3.81M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
3.81M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
3.81M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
3.81M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
3.81M
    s = skip_ascii_whitespaces(s, len);
55
56
3.81M
    bool is_negative = false;
57
3.81M
    if (len > 0) {
58
3.81M
        switch (*s) {
59
422k
        case '-':
60
422k
            is_negative = true;
61
422k
            [[fallthrough]];
62
449k
        case '+':
63
449k
            ++s;
64
449k
            --len;
65
3.81M
        }
66
3.81M
    }
67
    // Ignore leading zeros.
68
3.81M
    bool found_value = false;
69
4.15M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
339k
        found_value = true;
71
339k
        ++s;
72
339k
        --len;
73
339k
    }
74
75
3.81M
    int found_dot = 0;
76
3.81M
    if (len > 0 && *s == '.') {
77
84.7k
        found_dot = 1;
78
84.7k
        ++s;
79
84.7k
        --len;
80
84.7k
    }
81
3.81M
    int int_part_count = 0;
82
3.81M
    int i = 0;
83
43.4M
    for (; i != len; ++i) {
84
39.6M
        const char& c = s[i];
85
39.6M
        if (LIKELY('0' <= c && c <= '9')) {
86
36.0M
            found_value = true;
87
36.0M
            if (!found_dot) {
88
8.86M
                ++int_part_count;
89
8.86M
            }
90
36.0M
        } else if (c == '.') {
91
3.54M
            if (found_dot) {
92
2
                *result = StringParser::PARSE_FAILURE;
93
2
                return 0;
94
2
            }
95
3.54M
            found_dot = 1;
96
3.54M
        } else {
97
104k
            break;
98
104k
        }
99
39.6M
    }
100
3.81M
    if (!found_value) {
101
        // '', '.'
102
438
        *result = StringParser::PARSE_FAILURE;
103
438
        return 0;
104
438
    }
105
    // parse exponent if any
106
3.81M
    int64_t exponent = 0;
107
3.81M
    auto end_digit_index = i;
108
3.81M
    if (i != len) {
109
109k
        bool negative_exponent = false;
110
109k
        if (s[i] == 'e' || s[i] == 'E') {
111
109k
            ++i;
112
109k
            if (i != len) {
113
109k
                switch (s[i]) {
114
11.5k
                case '-':
115
11.5k
                    negative_exponent = true;
116
11.5k
                    [[fallthrough]];
117
78.1k
                case '+':
118
78.1k
                    ++i;
119
109k
                }
120
109k
            }
121
109k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
6
                *result = StringParser::PARSE_FAILURE;
124
6
                return 0;
125
6
            }
126
319k
            for (; i != len; ++i) {
127
209k
                const char& c = s[i];
128
209k
                if (LIKELY('0' <= c && c <= '9')) {
129
209k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
209k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
209k
                } else {
139
                    // '123e12abc', '123e1.2'
140
22
                    *result = StringParser::PARSE_FAILURE;
141
22
                    return 0;
142
22
                }
143
209k
            }
144
109k
            if (negative_exponent) {
145
11.5k
                exponent = -exponent;
146
11.5k
            }
147
109k
        } else {
148
120
            *result = StringParser::PARSE_FAILURE;
149
120
            return 0;
150
120
        }
151
109k
    }
152
3.81M
    T int_part_number = 0;
153
3.81M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
3.81M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
3.81M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
3.81M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
3.81M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
3.81M
    int actual_frac_part_count = 0;
167
3.81M
    int digit_index = 0;
168
3.81M
    if (result_int_part_digit_count >= 0) {
169
3.80M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
3.63M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
3.80M
                                           : result_int_part_digit_count,
172
3.80M
                                 end_digit_index);
173
3.80M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
4.65M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
855k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
3.80M
        if (digit_index != max_index &&
180
3.80M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
11.7k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
11.7k
            return 0;
183
11.7k
        }
184
        // get int part number
185
13.4M
        for (; digit_index != max_index; ++digit_index) {
186
9.63M
            if (UNLIKELY(s[digit_index] == '.')) {
187
71.1k
                continue;
188
71.1k
            }
189
9.56M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
9.56M
        }
191
3.79M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
3.79M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
2.42k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
2.42k
                                                       total_significant_digit_count);
195
2.42k
        }
196
3.79M
    } else {
197
        // leading zeros of fraction part
198
8.04k
        actual_frac_part_count = -result_int_part_digit_count;
199
8.04k
    }
200
    // get fraction part number
201
32.3M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
28.5M
        if (UNLIKELY(s[digit_index] == '.')) {
203
3.46M
            continue;
204
3.46M
        }
205
25.0M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
25.0M
        ++actual_frac_part_count;
207
25.0M
    }
208
3.80M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
3.80M
    if (digit_index != end_digit_index) {
211
83.4k
        if (UNLIKELY(s[digit_index] == '.')) {
212
3.46k
            ++digit_index;
213
3.46k
        }
214
83.4k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
82.4k
            if (s[digit_index] >= '5') {
217
33.3k
                ++frac_part_number;
218
33.3k
                if (frac_part_number == type_scale_multiplier) {
219
3.43k
                    frac_part_number = 0;
220
3.43k
                    ++int_part_number;
221
3.43k
                }
222
33.3k
            }
223
82.4k
        }
224
3.71M
    } else {
225
3.71M
        if (actual_frac_part_count < type_scale) {
226
201k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
201k
        }
228
3.71M
    }
229
3.80M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
72
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
72
        return 0;
232
72
    }
233
234
3.80M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
3.80M
    *result = StringParser::PARSE_SUCCESS;
236
3.80M
    return is_negative ? T(-value) : T(value);
237
3.80M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
1.77M
        ParseResult* result) {
48
1.77M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
1.77M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
1.77M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
1.77M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
1.77M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
1.77M
    s = skip_ascii_whitespaces(s, len);
55
56
1.77M
    bool is_negative = false;
57
1.77M
    if (len > 0) {
58
1.77M
        switch (*s) {
59
189k
        case '-':
60
189k
            is_negative = true;
61
189k
            [[fallthrough]];
62
197k
        case '+':
63
197k
            ++s;
64
197k
            --len;
65
1.77M
        }
66
1.77M
    }
67
    // Ignore leading zeros.
68
1.77M
    bool found_value = false;
69
1.83M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
53.7k
        found_value = true;
71
53.7k
        ++s;
72
53.7k
        --len;
73
53.7k
    }
74
75
1.77M
    int found_dot = 0;
76
1.77M
    if (len > 0 && *s == '.') {
77
17.9k
        found_dot = 1;
78
17.9k
        ++s;
79
17.9k
        --len;
80
17.9k
    }
81
1.77M
    int int_part_count = 0;
82
1.77M
    int i = 0;
83
10.9M
    for (; i != len; ++i) {
84
9.13M
        const char& c = s[i];
85
9.13M
        if (LIKELY('0' <= c && c <= '9')) {
86
7.51M
            found_value = true;
87
7.51M
            if (!found_dot) {
88
3.23M
                ++int_part_count;
89
3.23M
            }
90
7.51M
        } else if (c == '.') {
91
1.61M
            if (found_dot) {
92
2
                *result = StringParser::PARSE_FAILURE;
93
2
                return 0;
94
2
            }
95
1.61M
            found_dot = 1;
96
1.61M
        } else {
97
5.89k
            break;
98
5.89k
        }
99
9.13M
    }
100
1.77M
    if (!found_value) {
101
        // '', '.'
102
158
        *result = StringParser::PARSE_FAILURE;
103
158
        return 0;
104
158
    }
105
    // parse exponent if any
106
1.77M
    int64_t exponent = 0;
107
1.77M
    auto end_digit_index = i;
108
1.77M
    if (i != len) {
109
9.40k
        bool negative_exponent = false;
110
9.40k
        if (s[i] == 'e' || s[i] == 'E') {
111
9.33k
            ++i;
112
9.33k
            if (i != len) {
113
9.33k
                switch (s[i]) {
114
1.54k
                case '-':
115
1.54k
                    negative_exponent = true;
116
1.54k
                    [[fallthrough]];
117
1.54k
                case '+':
118
1.54k
                    ++i;
119
9.33k
                }
120
9.33k
            }
121
9.33k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
6
                *result = StringParser::PARSE_FAILURE;
124
6
                return 0;
125
6
            }
126
24.6k
            for (; i != len; ++i) {
127
15.3k
                const char& c = s[i];
128
15.3k
                if (LIKELY('0' <= c && c <= '9')) {
129
15.3k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
15.3k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
15.3k
                } else {
139
                    // '123e12abc', '123e1.2'
140
12
                    *result = StringParser::PARSE_FAILURE;
141
12
                    return 0;
142
12
                }
143
15.3k
            }
144
9.31k
            if (negative_exponent) {
145
1.53k
                exponent = -exponent;
146
1.53k
            }
147
9.31k
        } else {
148
66
            *result = StringParser::PARSE_FAILURE;
149
66
            return 0;
150
66
        }
151
9.40k
    }
152
1.77M
    T int_part_number = 0;
153
1.77M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
1.77M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
1.77M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
1.77M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
1.77M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
1.77M
    int actual_frac_part_count = 0;
167
1.77M
    int digit_index = 0;
168
1.77M
    if (result_int_part_digit_count >= 0) {
169
1.77M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
1.62M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
1.77M
                                           : result_int_part_digit_count,
172
1.77M
                                 end_digit_index);
173
1.77M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
1.98M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
212k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
1.77M
        if (digit_index != max_index &&
180
1.77M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
1.30k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
1.30k
            return 0;
183
1.30k
        }
184
        // get int part number
185
4.94M
        for (; digit_index != max_index; ++digit_index) {
186
3.17M
            if (UNLIKELY(s[digit_index] == '.')) {
187
1.60k
                continue;
188
1.60k
            }
189
3.17M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
3.17M
        }
191
1.77M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
1.77M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
100
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
100
                                                       total_significant_digit_count);
195
100
        }
196
1.77M
    } else {
197
        // leading zeros of fraction part
198
4.45k
        actual_frac_part_count = -result_int_part_digit_count;
199
4.45k
    }
200
    // get fraction part number
201
7.42M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
5.64M
        if (UNLIKELY(s[digit_index] == '.')) {
203
1.60M
            continue;
204
1.60M
        }
205
4.04M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
4.04M
        ++actual_frac_part_count;
207
4.04M
    }
208
1.77M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
1.77M
    if (digit_index != end_digit_index) {
211
21.4k
        if (UNLIKELY(s[digit_index] == '.')) {
212
904
            ++digit_index;
213
904
        }
214
21.4k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
21.1k
            if (s[digit_index] >= '5') {
217
8.96k
                ++frac_part_number;
218
8.96k
                if (frac_part_number == type_scale_multiplier) {
219
856
                    frac_part_number = 0;
220
856
                    ++int_part_number;
221
856
                }
222
8.96k
            }
223
21.1k
        }
224
1.75M
    } else {
225
1.75M
        if (actual_frac_part_count < type_scale) {
226
27.7k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
27.7k
        }
228
1.75M
    }
229
1.77M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
24
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
24
        return 0;
232
24
    }
233
234
1.77M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
1.77M
    *result = StringParser::PARSE_SUCCESS;
236
1.77M
    return is_negative ? T(-value) : T(value);
237
1.77M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
1.19M
        ParseResult* result) {
48
1.19M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
1.19M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
1.19M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
1.19M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
1.19M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
1.19M
    s = skip_ascii_whitespaces(s, len);
55
56
1.19M
    bool is_negative = false;
57
1.19M
    if (len > 0) {
58
1.19M
        switch (*s) {
59
104k
        case '-':
60
104k
            is_negative = true;
61
104k
            [[fallthrough]];
62
111k
        case '+':
63
111k
            ++s;
64
111k
            --len;
65
1.19M
        }
66
1.19M
    }
67
    // Ignore leading zeros.
68
1.19M
    bool found_value = false;
69
1.27M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
76.0k
        found_value = true;
71
76.0k
        ++s;
72
76.0k
        --len;
73
76.0k
    }
74
75
1.19M
    int found_dot = 0;
76
1.19M
    if (len > 0 && *s == '.') {
77
23.9k
        found_dot = 1;
78
23.9k
        ++s;
79
23.9k
        --len;
80
23.9k
    }
81
1.19M
    int int_part_count = 0;
82
1.19M
    int i = 0;
83
13.9M
    for (; i != len; ++i) {
84
12.8M
        const char& c = s[i];
85
12.8M
        if (LIKELY('0' <= c && c <= '9')) {
86
11.6M
            found_value = true;
87
11.6M
            if (!found_dot) {
88
2.26M
                ++int_part_count;
89
2.26M
            }
90
11.6M
        } else if (c == '.') {
91
1.16M
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
1.16M
            found_dot = 1;
96
1.16M
        } else {
97
10.6k
            break;
98
10.6k
        }
99
12.8M
    }
100
1.19M
    if (!found_value) {
101
        // '', '.'
102
73
        *result = StringParser::PARSE_FAILURE;
103
73
        return 0;
104
73
    }
105
    // parse exponent if any
106
1.19M
    int64_t exponent = 0;
107
1.19M
    auto end_digit_index = i;
108
1.19M
    if (i != len) {
109
11.6k
        bool negative_exponent = false;
110
11.6k
        if (s[i] == 'e' || s[i] == 'E') {
111
11.6k
            ++i;
112
11.6k
            if (i != len) {
113
11.6k
                switch (s[i]) {
114
3.84k
                case '-':
115
3.84k
                    negative_exponent = true;
116
3.84k
                    [[fallthrough]];
117
3.84k
                case '+':
118
3.84k
                    ++i;
119
11.6k
                }
120
11.6k
            }
121
11.6k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
32.3k
            for (; i != len; ++i) {
127
20.7k
                const char& c = s[i];
128
20.7k
                if (LIKELY('0' <= c && c <= '9')) {
129
20.7k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
20.7k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
20.7k
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
20.7k
            }
144
11.6k
            if (negative_exponent) {
145
3.84k
                exponent = -exponent;
146
3.84k
            }
147
11.6k
        } else {
148
25
            *result = StringParser::PARSE_FAILURE;
149
25
            return 0;
150
25
        }
151
11.6k
    }
152
1.19M
    T int_part_number = 0;
153
1.19M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
1.19M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
1.19M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
1.20M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
1.19M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
1.19M
    int actual_frac_part_count = 0;
167
1.19M
    int digit_index = 0;
168
1.19M
    if (result_int_part_digit_count >= 0) {
169
1.19M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
1.19M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
1.19M
                                           : result_int_part_digit_count,
172
1.19M
                                 end_digit_index);
173
1.19M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
1.41M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
212k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
1.19M
        if (digit_index != max_index &&
180
1.19M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
10.1k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
10.1k
            return 0;
183
10.1k
        }
184
        // get int part number
185
3.26M
        for (; digit_index != max_index; ++digit_index) {
186
2.07M
            if (UNLIKELY(s[digit_index] == '.')) {
187
960
                continue;
188
960
            }
189
2.07M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
2.07M
        }
191
1.18M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
1.18M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
76
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
76
                                                       total_significant_digit_count);
195
76
        }
196
18.4E
    } else {
197
        // leading zeros of fraction part
198
18.4E
        actual_frac_part_count = -result_int_part_digit_count;
199
18.4E
    }
200
    // get fraction part number
201
11.4M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
10.2M
        if (UNLIKELY(s[digit_index] == '.')) {
203
1.15M
            continue;
204
1.15M
        }
205
9.11M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
9.11M
        ++actual_frac_part_count;
207
9.11M
    }
208
1.18M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
1.18M
    if (digit_index != end_digit_index) {
211
21.2k
        if (UNLIKELY(s[digit_index] == '.')) {
212
852
            ++digit_index;
213
852
        }
214
21.2k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
20.9k
            if (s[digit_index] >= '5') {
217
8.42k
                ++frac_part_number;
218
8.42k
                if (frac_part_number == type_scale_multiplier) {
219
836
                    frac_part_number = 0;
220
836
                    ++int_part_number;
221
836
                }
222
8.42k
            }
223
20.9k
        }
224
1.16M
    } else {
225
1.16M
        if (actual_frac_part_count < type_scale) {
226
32.8k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
32.8k
        }
228
1.16M
    }
229
1.18M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
16
        return 0;
232
16
    }
233
234
1.18M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
1.18M
    *result = StringParser::PARSE_SUCCESS;
236
1.18M
    return is_negative ? T(-value) : T(value);
237
1.18M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
707k
        ParseResult* result) {
48
707k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
707k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
707k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
707k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
707k
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
707k
    s = skip_ascii_whitespaces(s, len);
55
56
707k
    bool is_negative = false;
57
707k
    if (len > 0) {
58
707k
        switch (*s) {
59
104k
        case '-':
60
104k
            is_negative = true;
61
104k
            [[fallthrough]];
62
110k
        case '+':
63
110k
            ++s;
64
110k
            --len;
65
707k
        }
66
707k
    }
67
    // Ignore leading zeros.
68
707k
    bool found_value = false;
69
784k
    while (len > 0 && UNLIKELY(*s == '0')) {
70
76.6k
        found_value = true;
71
76.6k
        ++s;
72
76.6k
        --len;
73
76.6k
    }
74
75
707k
    int found_dot = 0;
76
707k
    if (len > 0 && *s == '.') {
77
25.0k
        found_dot = 1;
78
25.0k
        ++s;
79
25.0k
        --len;
80
25.0k
    }
81
707k
    int int_part_count = 0;
82
707k
    int i = 0;
83
14.3M
    for (; i != len; ++i) {
84
13.6M
        const char& c = s[i];
85
13.6M
        if (LIKELY('0' <= c && c <= '9')) {
86
12.9M
            found_value = true;
87
12.9M
            if (!found_dot) {
88
2.24M
                ++int_part_count;
89
2.24M
            }
90
12.9M
        } else if (c == '.') {
91
666k
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
666k
            found_dot = 1;
96
666k
        } else {
97
11.6k
            break;
98
11.6k
        }
99
13.6M
    }
100
707k
    if (!found_value) {
101
        // '', '.'
102
131
        *result = StringParser::PARSE_FAILURE;
103
131
        return 0;
104
131
    }
105
    // parse exponent if any
106
707k
    int64_t exponent = 0;
107
707k
    auto end_digit_index = i;
108
707k
    if (i != len) {
109
12.3k
        bool negative_exponent = false;
110
12.3k
        if (s[i] == 'e' || s[i] == 'E') {
111
12.3k
            ++i;
112
12.3k
            if (i != len) {
113
12.3k
                switch (s[i]) {
114
4.61k
                case '-':
115
4.61k
                    negative_exponent = true;
116
4.61k
                    [[fallthrough]];
117
4.61k
                case '+':
118
4.61k
                    ++i;
119
12.3k
                }
120
12.3k
            }
121
12.3k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
35.6k
            for (; i != len; ++i) {
127
23.3k
                const char& c = s[i];
128
23.3k
                if (LIKELY('0' <= c && c <= '9')) {
129
23.3k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
23.3k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
23.3k
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
23.3k
            }
144
12.3k
            if (negative_exponent) {
145
4.61k
                exponent = -exponent;
146
4.61k
            }
147
12.3k
        } else {
148
14
            *result = StringParser::PARSE_FAILURE;
149
14
            return 0;
150
14
        }
151
12.3k
    }
152
707k
    T int_part_number = 0;
153
707k
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
707k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
707k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
707k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
707k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
707k
    int actual_frac_part_count = 0;
167
707k
    int digit_index = 0;
168
707k
    if (result_int_part_digit_count >= 0) {
169
703k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
689k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
703k
                                           : result_int_part_digit_count,
172
703k
                                 end_digit_index);
173
703k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
917k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
213k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
703k
        if (digit_index != max_index &&
180
703k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
142
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
142
            return 0;
183
142
        }
184
        // get int part number
185
2.90M
        for (; digit_index != max_index; ++digit_index) {
186
2.20M
            if (UNLIKELY(s[digit_index] == '.')) {
187
960
                continue;
188
960
            }
189
2.20M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
2.20M
        }
191
703k
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
703k
        if (result_int_part_digit_count > total_significant_digit_count) {
193
76
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
76
                                                       total_significant_digit_count);
195
76
        }
196
703k
    } else {
197
        // leading zeros of fraction part
198
3.65k
        actual_frac_part_count = -result_int_part_digit_count;
199
3.65k
    }
200
    // get fraction part number
201
11.8M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
11.1M
        if (UNLIKELY(s[digit_index] == '.')) {
203
663k
            continue;
204
663k
        }
205
10.4M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
10.4M
        ++actual_frac_part_count;
207
10.4M
    }
208
707k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
707k
    if (digit_index != end_digit_index) {
211
21.6k
        if (UNLIKELY(s[digit_index] == '.')) {
212
852
            ++digit_index;
213
852
        }
214
21.6k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
21.4k
            if (s[digit_index] >= '5') {
217
8.02k
                ++frac_part_number;
218
8.02k
                if (frac_part_number == type_scale_multiplier) {
219
906
                    frac_part_number = 0;
220
906
                    ++int_part_number;
221
906
                }
222
8.02k
            }
223
21.4k
        }
224
685k
    } else {
225
685k
        if (actual_frac_part_count < type_scale) {
226
51.9k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
51.9k
        }
228
685k
    }
229
707k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
16
        return 0;
232
16
    }
233
234
707k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
707k
    *result = StringParser::PARSE_SUCCESS;
236
707k
    return is_negative ? T(-value) : T(value);
237
707k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
13.5k
        ParseResult* result) {
48
13.5k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
13.5k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
13.5k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
13.5k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
13.5k
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
13.5k
    s = skip_ascii_whitespaces(s, len);
55
56
13.5k
    bool is_negative = false;
57
13.5k
    if (len > 0) {
58
13.5k
        switch (*s) {
59
6.68k
        case '-':
60
6.68k
            is_negative = true;
61
6.68k
            [[fallthrough]];
62
6.68k
        case '+':
63
6.68k
            ++s;
64
6.68k
            --len;
65
13.5k
        }
66
13.5k
    }
67
    // Ignore leading zeros.
68
13.5k
    bool found_value = false;
69
52.3k
    while (len > 0 && UNLIKELY(*s == '0')) {
70
38.8k
        found_value = true;
71
38.8k
        ++s;
72
38.8k
        --len;
73
38.8k
    }
74
75
13.5k
    int found_dot = 0;
76
13.5k
    if (len > 0 && *s == '.') {
77
2.00k
        found_dot = 1;
78
2.00k
        ++s;
79
2.00k
        --len;
80
2.00k
    }
81
13.5k
    int int_part_count = 0;
82
13.5k
    int i = 0;
83
279k
    for (; i != len; ++i) {
84
266k
        const char& c = s[i];
85
266k
        if (LIKELY('0' <= c && c <= '9')) {
86
254k
            found_value = true;
87
254k
            if (!found_dot) {
88
136k
                ++int_part_count;
89
136k
            }
90
254k
        } else if (c == '.') {
91
11.4k
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
11.4k
            found_dot = 1;
96
11.4k
        } else {
97
11
            break;
98
11
        }
99
266k
    }
100
13.5k
    if (!found_value) {
101
        // '', '.'
102
10
        *result = StringParser::PARSE_FAILURE;
103
10
        return 0;
104
10
    }
105
    // parse exponent if any
106
13.5k
    int64_t exponent = 0;
107
13.5k
    auto end_digit_index = i;
108
13.5k
    if (i != len) {
109
1
        bool negative_exponent = false;
110
1
        if (s[i] == 'e' || s[i] == 'E') {
111
0
            ++i;
112
0
            if (i != len) {
113
0
                switch (s[i]) {
114
0
                case '-':
115
0
                    negative_exponent = true;
116
0
                    [[fallthrough]];
117
0
                case '+':
118
0
                    ++i;
119
0
                }
120
0
            }
121
0
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
0
            for (; i != len; ++i) {
127
0
                const char& c = s[i];
128
0
                if (LIKELY('0' <= c && c <= '9')) {
129
0
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
0
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
0
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
0
            }
144
0
            if (negative_exponent) {
145
0
                exponent = -exponent;
146
0
            }
147
1
        } else {
148
1
            *result = StringParser::PARSE_FAILURE;
149
1
            return 0;
150
1
        }
151
1
    }
152
13.5k
    T int_part_number = 0;
153
13.5k
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
13.5k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
13.5k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
13.5k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
13.5k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
13.5k
    int actual_frac_part_count = 0;
167
13.5k
    int digit_index = 0;
168
13.5k
    if (result_int_part_digit_count >= 0) {
169
13.5k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
13.4k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
13.5k
                                           : result_int_part_digit_count,
172
13.5k
                                 end_digit_index);
173
13.5k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
13.5k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
0
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
13.5k
        if (digit_index != max_index &&
180
13.5k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
8
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
8
            return 0;
183
8
        }
184
        // get int part number
185
149k
        for (; digit_index != max_index; ++digit_index) {
186
136k
            if (UNLIKELY(s[digit_index] == '.')) {
187
0
                continue;
188
0
            }
189
136k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
136k
        }
191
13.5k
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
13.5k
        if (result_int_part_digit_count > total_significant_digit_count) {
193
0
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
0
                                                       total_significant_digit_count);
195
0
        }
196
13.5k
    } else {
197
        // leading zeros of fraction part
198
0
        actual_frac_part_count = -result_int_part_digit_count;
199
0
    }
200
    // get fraction part number
201
143k
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
129k
        if (UNLIKELY(s[digit_index] == '.')) {
203
11.4k
            continue;
204
11.4k
        }
205
118k
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
118k
        ++actual_frac_part_count;
207
118k
    }
208
13.5k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
13.5k
    if (digit_index != end_digit_index) {
211
17
        if (UNLIKELY(s[digit_index] == '.')) {
212
0
            ++digit_index;
213
0
        }
214
17
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
17
            if (s[digit_index] >= '5') {
217
17
                ++frac_part_number;
218
17
                if (frac_part_number == type_scale_multiplier) {
219
0
                    frac_part_number = 0;
220
0
                    ++int_part_number;
221
0
                }
222
17
            }
223
17
        }
224
13.5k
    } else {
225
13.5k
        if (actual_frac_part_count < type_scale) {
226
1.94k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
1.94k
        }
228
13.5k
    }
229
13.5k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
0
        return 0;
232
0
    }
233
234
13.5k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
13.5k
    *result = StringParser::PARSE_SUCCESS;
236
13.5k
    return is_negative ? T(-value) : T(value);
237
13.5k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
115k
        ParseResult* result) {
48
115k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
115k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
115k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
115k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
115k
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
115k
    s = skip_ascii_whitespaces(s, len);
55
56
115k
    bool is_negative = false;
57
115k
    if (len > 0) {
58
115k
        switch (*s) {
59
17.3k
        case '-':
60
17.3k
            is_negative = true;
61
17.3k
            [[fallthrough]];
62
23.9k
        case '+':
63
23.9k
            ++s;
64
23.9k
            --len;
65
115k
        }
66
115k
    }
67
    // Ignore leading zeros.
68
115k
    bool found_value = false;
69
209k
    while (len > 0 && UNLIKELY(*s == '0')) {
70
93.9k
        found_value = true;
71
93.9k
        ++s;
72
93.9k
        --len;
73
93.9k
    }
74
75
115k
    int found_dot = 0;
76
115k
    if (len > 0 && *s == '.') {
77
15.8k
        found_dot = 1;
78
15.8k
        ++s;
79
15.8k
        --len;
80
15.8k
    }
81
115k
    int int_part_count = 0;
82
115k
    int i = 0;
83
3.88M
    for (; i != len; ++i) {
84
3.84M
        const char& c = s[i];
85
3.84M
        if (LIKELY('0' <= c && c <= '9')) {
86
3.67M
            found_value = true;
87
3.67M
            if (!found_dot) {
88
982k
                ++int_part_count;
89
982k
            }
90
3.67M
        } else if (c == '.') {
91
93.4k
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
93.4k
            found_dot = 1;
96
93.4k
        } else {
97
76.0k
            break;
98
76.0k
        }
99
3.84M
    }
100
115k
    if (!found_value) {
101
        // '', '.'
102
66
        *result = StringParser::PARSE_FAILURE;
103
66
        return 0;
104
66
    }
105
    // parse exponent if any
106
115k
    int64_t exponent = 0;
107
115k
    auto end_digit_index = i;
108
115k
    if (i != len) {
109
75.9k
        bool negative_exponent = false;
110
75.9k
        if (s[i] == 'e' || s[i] == 'E') {
111
75.9k
            ++i;
112
75.9k
            if (i != len) {
113
75.9k
                switch (s[i]) {
114
1.53k
                case '-':
115
1.53k
                    negative_exponent = true;
116
1.53k
                    [[fallthrough]];
117
68.1k
                case '+':
118
68.1k
                    ++i;
119
75.9k
                }
120
75.9k
            }
121
75.9k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
226k
            for (; i != len; ++i) {
127
150k
                const char& c = s[i];
128
150k
                if (LIKELY('0' <= c && c <= '9')) {
129
150k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
150k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
150k
                } else {
139
                    // '123e12abc', '123e1.2'
140
10
                    *result = StringParser::PARSE_FAILURE;
141
10
                    return 0;
142
10
                }
143
150k
            }
144
75.9k
            if (negative_exponent) {
145
1.53k
                exponent = -exponent;
146
1.53k
            }
147
75.9k
        } else {
148
14
            *result = StringParser::PARSE_FAILURE;
149
14
            return 0;
150
14
        }
151
75.9k
    }
152
115k
    T int_part_number = 0;
153
115k
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
115k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
115k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
115k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
115k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
115k
    int actual_frac_part_count = 0;
167
115k
    int digit_index = 0;
168
115k
    if (result_int_part_digit_count >= 0) {
169
115k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
109k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
115k
                                           : result_int_part_digit_count,
172
115k
                                 end_digit_index);
173
115k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
331k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
216k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
115k
        if (digit_index != max_index &&
180
115k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
112
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
112
            return 0;
183
112
        }
184
        // get int part number
185
2.15M
        for (; digit_index != max_index; ++digit_index) {
186
2.03M
            if (UNLIKELY(s[digit_index] == '.')) {
187
67.5k
                continue;
188
67.5k
            }
189
1.97M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
1.97M
        }
191
115k
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
115k
        if (result_int_part_digit_count > total_significant_digit_count) {
193
2.17k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
2.17k
                                                       total_significant_digit_count);
195
2.17k
        }
196
115k
    } else {
197
        // leading zeros of fraction part
198
48
        actual_frac_part_count = -result_int_part_digit_count;
199
48
    }
200
    // get fraction part number
201
1.50M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
1.39M
        if (UNLIKELY(s[digit_index] == '.')) {
203
23.9k
            continue;
204
23.9k
        }
205
1.36M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
1.36M
        ++actual_frac_part_count;
207
1.36M
    }
208
115k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
115k
    if (digit_index != end_digit_index) {
211
19.0k
        if (UNLIKELY(s[digit_index] == '.')) {
212
852
            ++digit_index;
213
852
        }
214
19.0k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
18.8k
            if (s[digit_index] >= '5') {
217
7.94k
                ++frac_part_number;
218
7.94k
                if (frac_part_number == type_scale_multiplier) {
219
836
                    frac_part_number = 0;
220
836
                    ++int_part_number;
221
836
                }
222
7.94k
            }
223
18.8k
        }
224
96.3k
    } else {
225
96.3k
        if (actual_frac_part_count < type_scale) {
226
87.0k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
87.0k
        }
228
96.3k
    }
229
115k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
16
        return 0;
232
16
    }
233
234
115k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
115k
    *result = StringParser::PARSE_SUCCESS;
236
115k
    return is_negative ? T(-value) : T(value);
237
115k
}
238
239
template Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>(
240
        const char* __restrict s, size_t len, int type_precision, int type_scale,
241
        ParseResult* result);
242
template Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>(
243
        const char* __restrict s, size_t len, int type_precision, int type_scale,
244
        ParseResult* result);
245
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>(
246
        const char* __restrict s, size_t len, int type_precision, int type_scale,
247
        ParseResult* result);
248
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>(
249
        const char* __restrict s, size_t len, int type_precision, int type_scale,
250
        ParseResult* result);
251
template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>(
252
        const char* __restrict s, size_t len, int type_precision, int type_scale,
253
        ParseResult* result);
254
} // end namespace doris
255
#include "common/compile_check_avoid_end.h"