Coverage Report

Created: 2026-03-18 20:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/string_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/string_parser.hpp"
19
20
#include <limits>
21
22
#include "core/extended_types.h"
23
#include "core/types.h"
24
namespace doris {
25
#include "common/compile_check_avoid_begin.h"
26
// Supported decimal number format:
27
// <decimal> ::= <whitespace>* <value> <whitespace>*
28
//
29
// <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v"
30
//
31
// <value> ::= <sign>? <significand> <exponent>?
32
//
33
// <sign> ::= "+" | "-"
34
//
35
// <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits>
36
//
37
// <digits> ::= <digit>+
38
//
39
// <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
40
//
41
// <exponent> ::= <e_marker> <sign>? <digits>
42
//
43
// <e_marker> ::= "e" | "E"
44
template <PrimitiveType P>
45
typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal(
46
        const char* __restrict s, size_t len, int type_precision, int type_scale,
47
21.1M
        ParseResult* result) {
48
21.1M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
21.1M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
21.1M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
21.1M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
21.1M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
21.1M
    s = skip_ascii_whitespaces(s, len);
55
56
21.1M
    bool is_negative = false;
57
21.1M
    if (len > 0) {
58
21.0M
        switch (*s) {
59
515k
        case '-':
60
515k
            is_negative = true;
61
515k
            [[fallthrough]];
62
542k
        case '+':
63
542k
            ++s;
64
542k
            --len;
65
21.0M
        }
66
21.0M
    }
67
    // Ignore leading zeros.
68
21.1M
    bool found_value = false;
69
28.7M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
7.55M
        found_value = true;
71
7.55M
        ++s;
72
7.55M
        --len;
73
7.55M
    }
74
75
21.1M
    int found_dot = 0;
76
21.1M
    if (len > 0 && *s == '.') {
77
7.28M
        found_dot = 1;
78
7.28M
        ++s;
79
7.28M
        --len;
80
7.28M
    }
81
21.1M
    int int_part_count = 0;
82
21.1M
    int i = 0;
83
149M
    for (; i != len; ++i) {
84
128M
        const char& c = s[i];
85
128M
        if (LIKELY('0' <= c && c <= '9')) {
86
118M
            found_value = true;
87
118M
            if (!found_dot) {
88
60.9M
                ++int_part_count;
89
60.9M
            }
90
118M
        } else if (c == '.') {
91
9.74M
            if (found_dot) {
92
2
                *result = StringParser::PARSE_FAILURE;
93
2
                return 0;
94
2
            }
95
9.74M
            found_dot = 1;
96
9.74M
        } else {
97
102k
            break;
98
102k
        }
99
128M
    }
100
21.1M
    if (!found_value) {
101
        // '', '.'
102
98.0k
        *result = StringParser::PARSE_FAILURE;
103
98.0k
        return 0;
104
98.0k
    }
105
    // parse exponent if any
106
21.0M
    int64_t exponent = 0;
107
21.0M
    auto end_digit_index = i;
108
21.0M
    if (i != len) {
109
113k
        bool negative_exponent = false;
110
113k
        if (s[i] == 'e' || s[i] == 'E') {
111
113k
            ++i;
112
113k
            if (i != len) {
113
113k
                switch (s[i]) {
114
15.6k
                case '-':
115
15.6k
                    negative_exponent = true;
116
15.6k
                    [[fallthrough]];
117
82.2k
                case '+':
118
82.2k
                    ++i;
119
113k
                }
120
113k
            }
121
113k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
6
                *result = StringParser::PARSE_FAILURE;
124
6
                return 0;
125
6
            }
126
335k
            for (; i != len; ++i) {
127
222k
                const char& c = s[i];
128
222k
                if (LIKELY('0' <= c && c <= '9')) {
129
222k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
222k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
222k
                } else {
139
                    // '123e12abc', '123e1.2'
140
22
                    *result = StringParser::PARSE_FAILURE;
141
22
                    return 0;
142
22
                }
143
222k
            }
144
113k
            if (negative_exponent) {
145
15.6k
                exponent = -exponent;
146
15.6k
            }
147
113k
        } else {
148
203
            *result = StringParser::PARSE_FAILURE;
149
203
            return 0;
150
203
        }
151
113k
    }
152
21.0M
    T int_part_number = 0;
153
21.0M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
21.0M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
21.0M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
21.0M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
21.0M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
21.0M
    int actual_frac_part_count = 0;
167
21.0M
    int digit_index = 0;
168
21.0M
    if (result_int_part_digit_count >= 0) {
169
21.0M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
17.0M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
21.0M
                                           : result_int_part_digit_count,
172
21.0M
                                 end_digit_index);
173
21.0M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
21.9M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
858k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
21.0M
        if (digit_index != max_index &&
180
21.0M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
17.0k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
17.0k
            return 0;
183
17.0k
        }
184
        // get int part number
185
82.2M
        for (; digit_index != max_index; ++digit_index) {
186
61.2M
            if (UNLIKELY(s[digit_index] == '.')) {
187
71.1k
                continue;
188
71.1k
            }
189
61.1M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
61.1M
        }
191
21.0M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
21.0M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
2.46k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
2.46k
                                                       total_significant_digit_count);
195
2.46k
        }
196
18.4E
    } else {
197
        // leading zeros of fraction part
198
18.4E
        actual_frac_part_count = -result_int_part_digit_count;
199
18.4E
    }
200
    // get fraction part number
201
86.2M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
65.1M
        if (UNLIKELY(s[digit_index] == '.')) {
203
9.44M
            continue;
204
9.44M
        }
205
55.7M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
55.7M
        ++actual_frac_part_count;
207
55.7M
    }
208
21.0M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
21.0M
    if (digit_index != end_digit_index) {
211
290k
        if (UNLIKELY(s[digit_index] == '.')) {
212
205k
            ++digit_index;
213
205k
        }
214
290k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
289k
            if (s[digit_index] >= '5') {
217
203k
                ++frac_part_number;
218
203k
                if (frac_part_number == type_scale_multiplier) {
219
171k
                    frac_part_number = 0;
220
171k
                    ++int_part_number;
221
171k
                }
222
203k
            }
223
289k
        }
224
20.7M
    } else {
225
20.7M
        if (actual_frac_part_count < type_scale) {
226
4.12M
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
4.12M
        }
228
20.7M
    }
229
21.0M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
152
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
152
        return 0;
232
152
    }
233
234
21.0M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
21.0M
    *result = StringParser::PARSE_SUCCESS;
236
21.0M
    return is_negative ? T(-value) : T(value);
237
21.0M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
2.05M
        ParseResult* result) {
48
2.05M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
2.05M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
2.05M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
2.05M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
2.05M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
2.05M
    s = skip_ascii_whitespaces(s, len);
55
56
2.05M
    bool is_negative = false;
57
2.05M
    if (len > 0) {
58
2.05M
        switch (*s) {
59
223k
        case '-':
60
223k
            is_negative = true;
61
223k
            [[fallthrough]];
62
230k
        case '+':
63
230k
            ++s;
64
230k
            --len;
65
2.05M
        }
66
2.05M
    }
67
    // Ignore leading zeros.
68
2.05M
    bool found_value = false;
69
2.11M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
56.2k
        found_value = true;
71
56.2k
        ++s;
72
56.2k
        --len;
73
56.2k
    }
74
75
2.05M
    int found_dot = 0;
76
2.05M
    if (len > 0 && *s == '.') {
77
19.7k
        found_dot = 1;
78
19.7k
        ++s;
79
19.7k
        --len;
80
19.7k
    }
81
2.05M
    int int_part_count = 0;
82
2.05M
    int i = 0;
83
12.5M
    for (; i != len; ++i) {
84
10.4M
        const char& c = s[i];
85
10.4M
        if (LIKELY('0' <= c && c <= '9')) {
86
8.62M
            found_value = true;
87
8.62M
            if (!found_dot) {
88
3.98M
                ++int_part_count;
89
3.98M
            }
90
8.62M
        } else if (c == '.') {
91
1.86M
            if (found_dot) {
92
2
                *result = StringParser::PARSE_FAILURE;
93
2
                return 0;
94
2
            }
95
1.86M
            found_dot = 1;
96
1.86M
        } else {
97
8.79k
            break;
98
8.79k
        }
99
10.4M
    }
100
2.05M
    if (!found_value) {
101
        // '', '.'
102
6.97k
        *result = StringParser::PARSE_FAILURE;
103
6.97k
        return 0;
104
6.97k
    }
105
    // parse exponent if any
106
2.05M
    int64_t exponent = 0;
107
2.05M
    auto end_digit_index = i;
108
2.05M
    if (i != len) {
109
9.42k
        bool negative_exponent = false;
110
9.42k
        if (s[i] == 'e' || s[i] == 'E') {
111
9.33k
            ++i;
112
9.33k
            if (i != len) {
113
9.33k
                switch (s[i]) {
114
1.54k
                case '-':
115
1.54k
                    negative_exponent = true;
116
1.54k
                    [[fallthrough]];
117
1.54k
                case '+':
118
1.54k
                    ++i;
119
9.33k
                }
120
9.33k
            }
121
9.33k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
6
                *result = StringParser::PARSE_FAILURE;
124
6
                return 0;
125
6
            }
126
24.6k
            for (; i != len; ++i) {
127
15.3k
                const char& c = s[i];
128
15.3k
                if (LIKELY('0' <= c && c <= '9')) {
129
15.3k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
15.3k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
15.3k
                } else {
139
                    // '123e12abc', '123e1.2'
140
12
                    *result = StringParser::PARSE_FAILURE;
141
12
                    return 0;
142
12
                }
143
15.3k
            }
144
9.31k
            if (negative_exponent) {
145
1.53k
                exponent = -exponent;
146
1.53k
            }
147
9.31k
        } else {
148
90
            *result = StringParser::PARSE_FAILURE;
149
90
            return 0;
150
90
        }
151
9.42k
    }
152
2.05M
    T int_part_number = 0;
153
2.05M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
2.05M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
2.05M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
2.05M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
2.05M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
2.05M
    int actual_frac_part_count = 0;
167
2.05M
    int digit_index = 0;
168
2.05M
    if (result_int_part_digit_count >= 0) {
169
2.04M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
1.88M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
2.04M
                                           : result_int_part_digit_count,
172
2.04M
                                 end_digit_index);
173
2.04M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
2.25M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
212k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
2.04M
        if (digit_index != max_index &&
180
2.04M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
6.02k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
6.02k
            return 0;
183
6.02k
        }
184
        // get int part number
185
5.92M
        for (; digit_index != max_index; ++digit_index) {
186
3.88M
            if (UNLIKELY(s[digit_index] == '.')) {
187
1.60k
                continue;
188
1.60k
            }
189
3.87M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
3.87M
        }
191
2.04M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
2.04M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
100
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
100
                                                       total_significant_digit_count);
195
100
        }
196
2.04M
    } else {
197
        // leading zeros of fraction part
198
3.97k
        actual_frac_part_count = -result_int_part_digit_count;
199
3.97k
    }
200
    // get fraction part number
201
7.89M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
5.85M
        if (UNLIKELY(s[digit_index] == '.')) {
203
1.65M
            continue;
204
1.65M
        }
205
4.19M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
4.19M
        ++actual_frac_part_count;
207
4.19M
    }
208
2.04M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
2.04M
    if (digit_index != end_digit_index) {
211
223k
        if (UNLIKELY(s[digit_index] == '.')) {
212
203k
            ++digit_index;
213
203k
        }
214
223k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
223k
            if (s[digit_index] >= '5') {
217
176k
                ++frac_part_number;
218
176k
                if (frac_part_number == type_scale_multiplier) {
219
168k
                    frac_part_number = 0;
220
168k
                    ++int_part_number;
221
168k
                }
222
176k
            }
223
223k
        }
224
1.82M
    } else {
225
1.82M
        if (actual_frac_part_count < type_scale) {
226
30.1k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
30.1k
        }
228
1.82M
    }
229
2.04M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
24
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
24
        return 0;
232
24
    }
233
234
2.04M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
2.04M
    *result = StringParser::PARSE_SUCCESS;
236
2.04M
    return is_negative ? T(-value) : T(value);
237
2.04M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
16.2M
        ParseResult* result) {
48
16.2M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
16.2M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
16.2M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
16.2M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
16.2M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
16.2M
    s = skip_ascii_whitespaces(s, len);
55
56
16.2M
    bool is_negative = false;
57
16.2M
    if (len > 0) {
58
16.2M
        switch (*s) {
59
112k
        case '-':
60
112k
            is_negative = true;
61
112k
            [[fallthrough]];
62
119k
        case '+':
63
119k
            ++s;
64
119k
            --len;
65
16.2M
        }
66
16.2M
    }
67
    // Ignore leading zeros.
68
16.2M
    bool found_value = false;
69
23.4M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
7.27M
        found_value = true;
71
7.27M
        ++s;
72
7.27M
        --len;
73
7.27M
    }
74
75
16.2M
    int found_dot = 0;
76
16.2M
    if (len > 0 && *s == '.') {
77
7.22M
        found_dot = 1;
78
7.22M
        ++s;
79
7.22M
        --len;
80
7.22M
    }
81
16.2M
    int int_part_count = 0;
82
16.2M
    int i = 0;
83
83.2M
    for (; i != len; ++i) {
84
67.0M
        const char& c = s[i];
85
67.0M
        if (LIKELY('0' <= c && c <= '9')) {
86
61.6M
            found_value = true;
87
61.6M
            if (!found_dot) {
88
29.3M
                ++int_part_count;
89
29.3M
            }
90
61.6M
        } else if (c == '.') {
91
5.39M
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
5.39M
            found_dot = 1;
96
5.39M
        } else {
97
3.27k
            break;
98
3.27k
        }
99
67.0M
    }
100
16.2M
    if (!found_value) {
101
        // '', '.'
102
378
        *result = StringParser::PARSE_FAILURE;
103
378
        return 0;
104
378
    }
105
    // parse exponent if any
106
16.2M
    int64_t exponent = 0;
107
16.2M
    auto end_digit_index = i;
108
16.2M
    if (i != len) {
109
13.7k
        bool negative_exponent = false;
110
13.7k
        if (s[i] == 'e' || s[i] == 'E') {
111
13.6k
            ++i;
112
13.6k
            if (i != len) {
113
13.6k
                switch (s[i]) {
114
5.89k
                case '-':
115
5.89k
                    negative_exponent = true;
116
5.89k
                    [[fallthrough]];
117
5.89k
                case '+':
118
5.89k
                    ++i;
119
13.6k
                }
120
13.6k
            }
121
13.6k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
40.7k
            for (; i != len; ++i) {
127
27.0k
                const char& c = s[i];
128
27.0k
                if (LIKELY('0' <= c && c <= '9')) {
129
27.0k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
27.0k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
27.0k
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
27.0k
            }
144
13.6k
            if (negative_exponent) {
145
5.89k
                exponent = -exponent;
146
5.89k
            }
147
13.6k
        } else {
148
78
            *result = StringParser::PARSE_FAILURE;
149
78
            return 0;
150
78
        }
151
13.7k
    }
152
16.2M
    T int_part_number = 0;
153
16.2M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
16.2M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
16.2M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
16.2M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
16.2M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
16.2M
    int actual_frac_part_count = 0;
167
16.2M
    int digit_index = 0;
168
16.2M
    if (result_int_part_digit_count >= 0) {
169
16.2M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
12.6M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
16.2M
                                           : result_int_part_digit_count,
172
16.2M
                                 end_digit_index);
173
16.2M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
16.4M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
214k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
16.2M
        if (digit_index != max_index &&
180
16.2M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
10.5k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
10.5k
            return 0;
183
10.5k
        }
184
        // get int part number
185
45.2M
        for (; digit_index != max_index; ++digit_index) {
186
29.0M
            if (UNLIKELY(s[digit_index] == '.')) {
187
960
                continue;
188
960
            }
189
29.0M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
29.0M
        }
191
16.2M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
16.2M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
92
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
92
                                                       total_significant_digit_count);
195
92
        }
196
18.4E
    } else {
197
        // leading zeros of fraction part
198
18.4E
        actual_frac_part_count = -result_int_part_digit_count;
199
18.4E
    }
200
    // get fraction part number
201
53.6M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
37.4M
        if (UNLIKELY(s[digit_index] == '.')) {
203
5.38M
            continue;
204
5.38M
        }
205
32.0M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
32.0M
        ++actual_frac_part_count;
207
32.0M
    }
208
16.2M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
16.2M
    if (digit_index != end_digit_index) {
211
23.5k
        if (UNLIKELY(s[digit_index] == '.')) {
212
869
            ++digit_index;
213
869
        }
214
23.5k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
23.3k
            if (s[digit_index] >= '5') {
217
9.47k
                ++frac_part_number;
218
9.47k
                if (frac_part_number == type_scale_multiplier) {
219
988
                    frac_part_number = 0;
220
988
                    ++int_part_number;
221
988
                }
222
9.47k
            }
223
23.3k
        }
224
16.1M
    } else {
225
16.1M
        if (actual_frac_part_count < type_scale) {
226
3.63M
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
3.63M
        }
228
16.1M
    }
229
16.2M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
56
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
56
        return 0;
232
56
    }
233
234
16.2M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
16.2M
    *result = StringParser::PARSE_SUCCESS;
236
16.2M
    return is_negative ? T(-value) : T(value);
237
16.2M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
2.72M
        ParseResult* result) {
48
2.72M
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
2.72M
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
2.72M
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
2.72M
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
2.72M
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
2.72M
    s = skip_ascii_whitespaces(s, len);
55
56
2.72M
    bool is_negative = false;
57
2.72M
    if (len > 0) {
58
2.63M
        switch (*s) {
59
151k
        case '-':
60
151k
            is_negative = true;
61
151k
            [[fallthrough]];
62
158k
        case '+':
63
158k
            ++s;
64
158k
            --len;
65
2.63M
        }
66
2.63M
    }
67
    // Ignore leading zeros.
68
2.72M
    bool found_value = false;
69
2.80M
    while (len > 0 && UNLIKELY(*s == '0')) {
70
81.0k
        found_value = true;
71
81.0k
        ++s;
72
81.0k
        --len;
73
81.0k
    }
74
75
2.72M
    int found_dot = 0;
76
2.72M
    if (len > 0 && *s == '.') {
77
29.1k
        found_dot = 1;
78
29.1k
        ++s;
79
29.1k
        --len;
80
29.1k
    }
81
2.72M
    int int_part_count = 0;
82
2.72M
    int i = 0;
83
48.5M
    for (; i != len; ++i) {
84
45.8M
        const char& c = s[i];
85
45.8M
        if (LIKELY('0' <= c && c <= '9')) {
86
43.4M
            found_value = true;
87
43.4M
            if (!found_dot) {
88
25.6M
                ++int_part_count;
89
25.6M
            }
90
43.4M
        } else if (c == '.') {
91
2.36M
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
2.36M
            found_dot = 1;
96
2.36M
        } else {
97
12.1k
            break;
98
12.1k
        }
99
45.8M
    }
100
2.72M
    if (!found_value) {
101
        // '', '.'
102
90.6k
        *result = StringParser::PARSE_FAILURE;
103
90.6k
        return 0;
104
90.6k
    }
105
    // parse exponent if any
106
2.63M
    int64_t exponent = 0;
107
2.63M
    auto end_digit_index = i;
108
2.63M
    if (i != len) {
109
12.3k
        bool negative_exponent = false;
110
12.3k
        if (s[i] == 'e' || s[i] == 'E') {
111
12.3k
            ++i;
112
12.3k
            if (i != len) {
113
12.3k
                switch (s[i]) {
114
4.61k
                case '-':
115
4.61k
                    negative_exponent = true;
116
4.61k
                    [[fallthrough]];
117
4.61k
                case '+':
118
4.61k
                    ++i;
119
12.3k
                }
120
12.3k
            }
121
12.3k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
35.6k
            for (; i != len; ++i) {
127
23.3k
                const char& c = s[i];
128
23.3k
                if (LIKELY('0' <= c && c <= '9')) {
129
23.3k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
23.3k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
23.3k
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
23.3k
            }
144
12.3k
            if (negative_exponent) {
145
4.61k
                exponent = -exponent;
146
4.61k
            }
147
12.3k
        } else {
148
20
            *result = StringParser::PARSE_FAILURE;
149
20
            return 0;
150
20
        }
151
12.3k
    }
152
2.63M
    T int_part_number = 0;
153
2.63M
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
2.63M
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
2.63M
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
2.63M
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
2.63M
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
2.63M
    int actual_frac_part_count = 0;
167
2.63M
    int digit_index = 0;
168
2.63M
    if (result_int_part_digit_count >= 0) {
169
2.63M
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
2.39M
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
2.63M
                                           : result_int_part_digit_count,
172
2.63M
                                 end_digit_index);
173
2.63M
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
2.84M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
213k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
2.63M
        if (digit_index != max_index &&
180
2.63M
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
143
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
143
            return 0;
183
143
        }
184
        // get int part number
185
28.2M
        for (; digit_index != max_index; ++digit_index) {
186
25.5M
            if (UNLIKELY(s[digit_index] == '.')) {
187
960
                continue;
188
960
            }
189
25.5M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
25.5M
        }
191
2.63M
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
2.63M
        if (result_int_part_digit_count > total_significant_digit_count) {
193
76
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
76
                                                       total_significant_digit_count);
195
76
        }
196
2.63M
    } else {
197
        // leading zeros of fraction part
198
2.43k
        actual_frac_part_count = -result_int_part_digit_count;
199
2.43k
    }
200
    // get fraction part number
201
22.5M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
19.9M
        if (UNLIKELY(s[digit_index] == '.')) {
203
2.36M
            continue;
204
2.36M
        }
205
17.5M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
17.5M
        ++actual_frac_part_count;
207
17.5M
    }
208
2.63M
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
2.63M
    if (digit_index != end_digit_index) {
211
21.7k
        if (UNLIKELY(s[digit_index] == '.')) {
212
852
            ++digit_index;
213
852
        }
214
21.7k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
21.4k
            if (s[digit_index] >= '5') {
217
8.02k
                ++frac_part_number;
218
8.02k
                if (frac_part_number == type_scale_multiplier) {
219
906
                    frac_part_number = 0;
220
906
                    ++int_part_number;
221
906
                }
222
8.02k
            }
223
21.4k
        }
224
2.61M
    } else {
225
2.61M
        if (actual_frac_part_count < type_scale) {
226
362k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
362k
        }
228
2.61M
    }
229
2.63M
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
16
        return 0;
232
16
    }
233
234
2.63M
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
2.63M
    *result = StringParser::PARSE_SUCCESS;
236
2.63M
    return is_negative ? T(-value) : T(value);
237
2.63M
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
13.8k
        ParseResult* result) {
48
13.8k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
13.8k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
13.8k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
13.8k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
13.8k
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
13.8k
    s = skip_ascii_whitespaces(s, len);
55
56
13.8k
    bool is_negative = false;
57
13.8k
    if (len > 0) {
58
13.8k
        switch (*s) {
59
6.73k
        case '-':
60
6.73k
            is_negative = true;
61
6.73k
            [[fallthrough]];
62
6.73k
        case '+':
63
6.73k
            ++s;
64
6.73k
            --len;
65
13.8k
        }
66
13.8k
    }
67
    // Ignore leading zeros.
68
13.8k
    bool found_value = false;
69
52.7k
    while (len > 0 && UNLIKELY(*s == '0')) {
70
38.9k
        found_value = true;
71
38.9k
        ++s;
72
38.9k
        --len;
73
38.9k
    }
74
75
13.8k
    int found_dot = 0;
76
13.8k
    if (len > 0 && *s == '.') {
77
2.04k
        found_dot = 1;
78
2.04k
        ++s;
79
2.04k
        --len;
80
2.04k
    }
81
13.8k
    int int_part_count = 0;
82
13.8k
    int i = 0;
83
283k
    for (; i != len; ++i) {
84
270k
        const char& c = s[i];
85
270k
        if (LIKELY('0' <= c && c <= '9')) {
86
258k
            found_value = true;
87
258k
            if (!found_dot) {
88
138k
                ++int_part_count;
89
138k
            }
90
258k
        } else if (c == '.') {
91
11.6k
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
11.6k
            found_dot = 1;
96
11.6k
        } else {
97
12
            break;
98
12
        }
99
270k
    }
100
13.8k
    if (!found_value) {
101
        // '', '.'
102
11
        *result = StringParser::PARSE_FAILURE;
103
11
        return 0;
104
11
    }
105
    // parse exponent if any
106
13.8k
    int64_t exponent = 0;
107
13.8k
    auto end_digit_index = i;
108
13.8k
    if (i != len) {
109
1
        bool negative_exponent = false;
110
1
        if (s[i] == 'e' || s[i] == 'E') {
111
0
            ++i;
112
0
            if (i != len) {
113
0
                switch (s[i]) {
114
0
                case '-':
115
0
                    negative_exponent = true;
116
0
                    [[fallthrough]];
117
0
                case '+':
118
0
                    ++i;
119
0
                }
120
0
            }
121
0
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
0
            for (; i != len; ++i) {
127
0
                const char& c = s[i];
128
0
                if (LIKELY('0' <= c && c <= '9')) {
129
0
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
0
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
0
                } else {
139
                    // '123e12abc', '123e1.2'
140
0
                    *result = StringParser::PARSE_FAILURE;
141
0
                    return 0;
142
0
                }
143
0
            }
144
0
            if (negative_exponent) {
145
0
                exponent = -exponent;
146
0
            }
147
1
        } else {
148
1
            *result = StringParser::PARSE_FAILURE;
149
1
            return 0;
150
1
        }
151
1
    }
152
13.8k
    T int_part_number = 0;
153
13.8k
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
13.8k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
13.8k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
13.8k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
13.8k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
13.8k
    int actual_frac_part_count = 0;
167
13.8k
    int digit_index = 0;
168
13.8k
    if (result_int_part_digit_count >= 0) {
169
13.8k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
13.6k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
13.8k
                                           : result_int_part_digit_count,
172
13.8k
                                 end_digit_index);
173
13.8k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
13.8k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
0
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
13.8k
        if (digit_index != max_index &&
180
13.8k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
8
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
8
            return 0;
183
8
        }
184
        // get int part number
185
152k
        for (; digit_index != max_index; ++digit_index) {
186
138k
            if (UNLIKELY(s[digit_index] == '.')) {
187
0
                continue;
188
0
            }
189
138k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
138k
        }
191
13.8k
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
13.8k
        if (result_int_part_digit_count > total_significant_digit_count) {
193
0
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
0
                                                       total_significant_digit_count);
195
0
        }
196
13.8k
    } else {
197
        // leading zeros of fraction part
198
0
        actual_frac_part_count = -result_int_part_digit_count;
199
0
    }
200
    // get fraction part number
201
145k
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
131k
        if (UNLIKELY(s[digit_index] == '.')) {
203
11.6k
            continue;
204
11.6k
        }
205
119k
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
119k
        ++actual_frac_part_count;
207
119k
    }
208
13.8k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
13.8k
    if (digit_index != end_digit_index) {
211
17
        if (UNLIKELY(s[digit_index] == '.')) {
212
0
            ++digit_index;
213
0
        }
214
17
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
17
            if (s[digit_index] >= '5') {
217
17
                ++frac_part_number;
218
17
                if (frac_part_number == type_scale_multiplier) {
219
0
                    frac_part_number = 0;
220
0
                    ++int_part_number;
221
0
                }
222
17
            }
223
17
        }
224
13.8k
    } else {
225
13.8k
        if (actual_frac_part_count < type_scale) {
226
2.16k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
2.16k
        }
228
13.8k
    }
229
13.8k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
0
        return 0;
232
0
    }
233
234
13.8k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
13.8k
    *result = StringParser::PARSE_SUCCESS;
236
13.8k
    return is_negative ? T(-value) : T(value);
237
13.8k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
47
136k
        ParseResult* result) {
48
136k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
49
136k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
50
136k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
51
136k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
52
136k
                  "wide::Int256.");
53
    // Ignore leading and trailing spaces.
54
136k
    s = skip_ascii_whitespaces(s, len);
55
56
136k
    bool is_negative = false;
57
136k
    if (len > 0) {
58
136k
        switch (*s) {
59
20.6k
        case '-':
60
20.6k
            is_negative = true;
61
20.6k
            [[fallthrough]];
62
27.2k
        case '+':
63
27.2k
            ++s;
64
27.2k
            --len;
65
136k
        }
66
136k
    }
67
    // Ignore leading zeros.
68
136k
    bool found_value = false;
69
234k
    while (len > 0 && UNLIKELY(*s == '0')) {
70
98.9k
        found_value = true;
71
98.9k
        ++s;
72
98.9k
        --len;
73
98.9k
    }
74
75
136k
    int found_dot = 0;
76
136k
    if (len > 0 && *s == '.') {
77
17.0k
        found_dot = 1;
78
17.0k
        ++s;
79
17.0k
        --len;
80
17.0k
    }
81
136k
    int int_part_count = 0;
82
136k
    int i = 0;
83
5.06M
    for (; i != len; ++i) {
84
5.00M
        const char& c = s[i];
85
5.00M
        if (LIKELY('0' <= c && c <= '9')) {
86
4.81M
            found_value = true;
87
4.81M
            if (!found_dot) {
88
1.81M
                ++int_part_count;
89
1.81M
            }
90
4.81M
        } else if (c == '.') {
91
106k
            if (found_dot) {
92
0
                *result = StringParser::PARSE_FAILURE;
93
0
                return 0;
94
0
            }
95
106k
            found_dot = 1;
96
106k
        } else {
97
78.0k
            break;
98
78.0k
        }
99
5.00M
    }
100
136k
    if (!found_value) {
101
        // '', '.'
102
78
        *result = StringParser::PARSE_FAILURE;
103
78
        return 0;
104
78
    }
105
    // parse exponent if any
106
135k
    int64_t exponent = 0;
107
135k
    auto end_digit_index = i;
108
135k
    if (i != len) {
109
78.0k
        bool negative_exponent = false;
110
78.0k
        if (s[i] == 'e' || s[i] == 'E') {
111
77.9k
            ++i;
112
77.9k
            if (i != len) {
113
77.9k
                switch (s[i]) {
114
3.58k
                case '-':
115
3.58k
                    negative_exponent = true;
116
3.58k
                    [[fallthrough]];
117
70.2k
                case '+':
118
70.2k
                    ++i;
119
77.9k
                }
120
77.9k
            }
121
77.9k
            if (i == len) {
122
                // '123e', '123e+', '123e-'
123
0
                *result = StringParser::PARSE_FAILURE;
124
0
                return 0;
125
0
            }
126
234k
            for (; i != len; ++i) {
127
156k
                const char& c = s[i];
128
156k
                if (LIKELY('0' <= c && c <= '9')) {
129
156k
                    exponent = exponent * 10 + (c - '0');
130
                    // max string len is config::string_type_length_soft_limit_bytes,
131
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
132
                    // just check overflow of int32_t to simplify the logic
133
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
134
156k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
135
0
                        *result = StringParser::PARSE_OVERFLOW;
136
0
                        return 0;
137
0
                    }
138
156k
                } else {
139
                    // '123e12abc', '123e1.2'
140
10
                    *result = StringParser::PARSE_FAILURE;
141
10
                    return 0;
142
10
                }
143
156k
            }
144
77.9k
            if (negative_exponent) {
145
3.58k
                exponent = -exponent;
146
3.58k
            }
147
77.9k
        } else {
148
14
            *result = StringParser::PARSE_FAILURE;
149
14
            return 0;
150
14
        }
151
78.0k
    }
152
135k
    T int_part_number = 0;
153
135k
    T frac_part_number = 0;
154
    // TODO: check limit values of exponent and add UT
155
    // max string len is config::string_type_length_soft_limit_bytes,
156
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
157
    // so int_part_count will be in range of int32_t,
158
    // and int_part_count + exponent will be in range of int64_t
159
135k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
160
135k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
161
135k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
162
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
163
0
        return 0;
164
0
    }
165
135k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
166
135k
    int actual_frac_part_count = 0;
167
135k
    int digit_index = 0;
168
135k
    if (result_int_part_digit_count >= 0) {
169
135k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
170
123k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
171
135k
                                           : result_int_part_digit_count,
172
135k
                                 end_digit_index);
173
135k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
174
        // skip zero number
175
353k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
176
217k
        }
177
        // test 0.00, .00, 0.{00...}e2147483647
178
        // 0.00000e2147483647
179
135k
        if (digit_index != max_index &&
180
135k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
181
392
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
182
392
            return 0;
183
392
        }
184
        // get int part number
185
2.78M
        for (; digit_index != max_index; ++digit_index) {
186
2.64M
            if (UNLIKELY(s[digit_index] == '.')) {
187
67.5k
                continue;
188
67.5k
            }
189
2.58M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
190
2.58M
        }
191
135k
        auto total_significant_digit_count = i - ((found_dot && int_part_count > 0) ? 1 : 0);
192
135k
        if (result_int_part_digit_count > total_significant_digit_count) {
193
2.19k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
194
2.19k
                                                       total_significant_digit_count);
195
2.19k
        }
196
135k
    } else {
197
        // leading zeros of fraction part
198
173
        actual_frac_part_count = -result_int_part_digit_count;
199
173
    }
200
    // get fraction part number
201
1.93M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
202
1.79M
        if (UNLIKELY(s[digit_index] == '.')) {
203
35.2k
            continue;
204
35.2k
        }
205
1.76M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
206
1.76M
        ++actual_frac_part_count;
207
1.76M
    }
208
135k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
209
    // there are still extra fraction digits left, check rounding
210
135k
    if (digit_index != end_digit_index) {
211
21.4k
        if (UNLIKELY(s[digit_index] == '.')) {
212
862
            ++digit_index;
213
862
        }
214
21.4k
        if (digit_index != end_digit_index) {
215
            // example: test 1.5 -> decimal(1, 0)
216
21.2k
            if (s[digit_index] >= '5') {
217
8.99k
                ++frac_part_number;
218
8.99k
                if (frac_part_number == type_scale_multiplier) {
219
988
                    frac_part_number = 0;
220
988
                    ++int_part_number;
221
988
                }
222
8.99k
            }
223
21.2k
        }
224
114k
    } else {
225
114k
        if (actual_frac_part_count < type_scale) {
226
91.6k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
227
91.6k
        }
228
114k
    }
229
135k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
230
56
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
231
56
        return 0;
232
56
    }
233
234
135k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
235
135k
    *result = StringParser::PARSE_SUCCESS;
236
135k
    return is_negative ? T(-value) : T(value);
237
135k
}
238
239
template Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>(
240
        const char* __restrict s, size_t len, int type_precision, int type_scale,
241
        ParseResult* result);
242
template Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>(
243
        const char* __restrict s, size_t len, int type_precision, int type_scale,
244
        ParseResult* result);
245
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>(
246
        const char* __restrict s, size_t len, int type_precision, int type_scale,
247
        ParseResult* result);
248
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>(
249
        const char* __restrict s, size_t len, int type_precision, int type_scale,
250
        ParseResult* result);
251
template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>(
252
        const char* __restrict s, size_t len, int type_precision, int type_scale,
253
        ParseResult* result);
254
} // end namespace doris
255
#include "common/compile_check_avoid_end.h"