Coverage Report

Created: 2026-06-22 21:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/string_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/string_parser.hpp"
19
20
#include <limits>
21
22
#include "core/extended_types.h"
23
#include "core/types.h"
24
namespace doris {
25
#include "common/compile_check_avoid_begin.h"
26
// Supported decimal number format:
27
// <decimal> ::= <whitespace>* <value> <whitespace>*
28
//
29
// <whitespace> ::= " " | "\t" | "\n" | "\r" | "\f" | "\v"
30
//
31
// <value> ::= <sign>? <significand> <exponent>?
32
//
33
// <sign> ::= "+" | "-"
34
//
35
// <significand> ::= <digits> "." <digits> | <digits> | <digits> "." | "." <digits>
36
//
37
// <digits> ::= <digit>+
38
//
39
// <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
40
//
41
// <exponent> ::= <e_marker> <sign>? <digits>
42
//
43
// <e_marker> ::= "e" | "E"
44
//
45
// Parsing algorithm:
46
// 1. Trim spaces and the sign, then normalize the significand by skipping leading zeros and an
47
//    optional leading dot. During this scan, count digits that belong to the original integral
48
//    part (`int_part_count`) and remember where the significand ends (`end_digit_index`).
49
// 2. Parse the optional exponent. Scientific notation is handled by moving the decimal point:
50
//    `result_int_part_digit_count = int_part_count + exponent`. For example, "12.34e-1" has
51
//    int_part_count=2 and exponent=-1, so the result has one integral digit: "1.234".
52
// 3. Build the result in scaled-integer form: first collect the integral digits up to the shifted
53
//    decimal point, then collect up to `type_scale` fractional digits, padding with zeros when the
54
//    input has fewer fractional digits than the target scale.
55
// 4. If there are extra fractional digits, round half up using the first discarded digit. Finally,
56
//    check the integral digit count against `type_precision - type_scale` and return the signed
57
//    scaled integer value.
58
template <PrimitiveType P>
59
typename PrimitiveTypeTraits<P>::CppType::NativeType StringParser::string_to_decimal(
60
        const char* __restrict s, size_t len, int type_precision, int type_scale,
61
348k
        ParseResult* result) {
62
348k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
348k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
348k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
348k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
348k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
348k
    s = skip_ascii_whitespaces(s, len);
79
80
348k
    bool is_negative = false;
81
348k
    if (len > 0) {
82
348k
        switch (*s) {
83
92.2k
        case '-':
84
92.2k
            is_negative = true;
85
92.2k
            [[fallthrough]];
86
119k
        case '+':
87
119k
            ++s;
88
119k
            --len;
89
348k
        }
90
348k
    }
91
    // Ignore leading zeros.
92
348k
    bool found_value = false;
93
685k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
336k
        found_value = true;
95
336k
        ++s;
96
336k
        --len;
97
336k
    }
98
99
348k
    int found_dot = 0;
100
348k
    if (len > 0 && *s == '.') {
101
84.3k
        found_dot = 1;
102
84.3k
        ++s;
103
84.3k
        --len;
104
84.3k
    }
105
348k
    int int_part_count = 0;
106
348k
    int i = 0;
107
8.16M
    for (; i != len; ++i) {
108
7.92M
        const char& c = s[i];
109
7.92M
        if (LIKELY('0' <= c && c <= '9')) {
110
7.59M
            found_value = true;
111
7.59M
            if (!found_dot) {
112
2.34M
                ++int_part_count;
113
2.34M
            }
114
7.59M
        } else if (c == '.') {
115
220k
            if (found_dot) {
116
2
                *result = StringParser::PARSE_FAILURE;
117
2
                return 0;
118
2
            }
119
220k
            found_dot = 1;
120
220k
        } else {
121
108k
            break;
122
108k
        }
123
7.92M
    }
124
348k
    if (!found_value) {
125
        // '', '.'
126
346
        *result = StringParser::PARSE_FAILURE;
127
346
        return 0;
128
346
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
348k
    int64_t exponent = 0;
133
348k
    auto end_digit_index = i;
134
348k
    if (i != len) {
135
108k
        bool negative_exponent = false;
136
108k
        if (s[i] == 'e' || s[i] == 'E') {
137
108k
            ++i;
138
108k
            if (i != len) {
139
108k
                switch (s[i]) {
140
11.4k
                case '-':
141
11.4k
                    negative_exponent = true;
142
11.4k
                    [[fallthrough]];
143
77.1k
                case '+':
144
77.1k
                    ++i;
145
108k
                }
146
108k
            }
147
108k
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
6
                *result = StringParser::PARSE_FAILURE;
150
6
                return 0;
151
6
            }
152
315k
            for (; i != len; ++i) {
153
207k
                const char& c = s[i];
154
207k
                if (LIKELY('0' <= c && c <= '9')) {
155
207k
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
207k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
207k
                } else {
165
                    // '123e12abc', '123e1.2'
166
22
                    *result = StringParser::PARSE_FAILURE;
167
22
                    return 0;
168
22
                }
169
207k
            }
170
108k
            if (negative_exponent) {
171
11.4k
                exponent = -exponent;
172
11.4k
            }
173
108k
        } else {
174
110
            *result = StringParser::PARSE_FAILURE;
175
110
            return 0;
176
110
        }
177
108k
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
348k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
348k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
348k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
348k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
348k
    T int_part_number = 0;
191
348k
    T frac_part_number = 0;
192
348k
    int actual_frac_part_count = 0;
193
348k
    int digit_index = 0;
194
348k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
342k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
302k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
342k
                                           : result_int_part_digit_count,
202
342k
                                 end_digit_index);
203
342k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
1.19M
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
855k
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
342k
        if (digit_index != max_index &&
210
342k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
11.5k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
11.5k
            return 0;
213
11.5k
        }
214
        // get int part number
215
3.44M
        for (; digit_index != max_index; ++digit_index) {
216
3.11M
            if (UNLIKELY(s[digit_index] == '.')) {
217
70.1k
                continue;
218
70.1k
            }
219
3.04M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
3.04M
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
331k
        auto total_significant_digit_count =
225
331k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
331k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
63.9k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
63.9k
                                                       total_significant_digit_count);
229
63.9k
        }
230
331k
    } else {
231
        // leading zeros of fraction part
232
5.51k
        actual_frac_part_count = -result_int_part_digit_count;
233
5.51k
    }
234
    // get fraction part number
235
3.61M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
3.27M
        if (UNLIKELY(s[digit_index] == '.')) {
237
130k
            continue;
238
130k
        }
239
3.14M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
3.14M
        ++actual_frac_part_count;
241
3.14M
    }
242
336k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
336k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
80.7k
        if (UNLIKELY(s[digit_index] == '.')) {
249
3.46k
            ++digit_index;
250
3.46k
        }
251
80.7k
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
79.7k
            if (s[digit_index] >= '5') {
254
32.9k
                ++frac_part_number;
255
32.9k
                if (frac_part_number == type_scale_multiplier) {
256
3.43k
                    frac_part_number = 0;
257
3.43k
                    ++int_part_number;
258
3.43k
                }
259
32.9k
            }
260
79.7k
        }
261
256k
    } else {
262
256k
        if (actual_frac_part_count < type_scale) {
263
190k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
190k
        }
265
256k
    }
266
336k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
72
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
72
        return 0;
269
72
    }
270
271
336k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
336k
    *result = StringParser::PARSE_SUCCESS;
273
336k
    return is_negative ? T(-value) : T(value);
274
336k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE28EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
61
53.8k
        ParseResult* result) {
62
53.8k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
53.8k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
53.8k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
53.8k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
53.8k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
53.8k
    s = skip_ascii_whitespaces(s, len);
79
80
53.8k
    bool is_negative = false;
81
53.8k
    if (len > 0) {
82
53.8k
        switch (*s) {
83
24.7k
        case '-':
84
24.7k
            is_negative = true;
85
24.7k
            [[fallthrough]];
86
32.1k
        case '+':
87
32.1k
            ++s;
88
32.1k
            --len;
89
53.8k
        }
90
53.8k
    }
91
    // Ignore leading zeros.
92
53.8k
    bool found_value = false;
93
107k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
53.5k
        found_value = true;
95
53.5k
        ++s;
96
53.5k
        --len;
97
53.5k
    }
98
99
53.8k
    int found_dot = 0;
100
53.8k
    if (len > 0 && *s == '.') {
101
17.8k
        found_dot = 1;
102
17.8k
        ++s;
103
17.8k
        --len;
104
17.8k
    }
105
53.8k
    int int_part_count = 0;
106
53.8k
    int i = 0;
107
576k
    for (; i != len; ++i) {
108
532k
        const char& c = s[i];
109
532k
        if (LIKELY('0' <= c && c <= '9')) {
110
508k
            found_value = true;
111
508k
            if (!found_dot) {
112
158k
                ++int_part_count;
113
158k
            }
114
508k
        } else if (c == '.') {
115
14.5k
            if (found_dot) {
116
2
                *result = StringParser::PARSE_FAILURE;
117
2
                return 0;
118
2
            }
119
14.5k
            found_dot = 1;
120
14.5k
        } else {
121
9.52k
            break;
122
9.52k
        }
123
532k
    }
124
53.8k
    if (!found_value) {
125
        // '', '.'
126
146
        *result = StringParser::PARSE_FAILURE;
127
146
        return 0;
128
146
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
53.7k
    int64_t exponent = 0;
133
53.7k
    auto end_digit_index = i;
134
53.7k
    if (i != len) {
135
9.39k
        bool negative_exponent = false;
136
9.39k
        if (s[i] == 'e' || s[i] == 'E') {
137
9.33k
            ++i;
138
9.33k
            if (i != len) {
139
9.33k
                switch (s[i]) {
140
1.54k
                case '-':
141
1.54k
                    negative_exponent = true;
142
1.54k
                    [[fallthrough]];
143
1.54k
                case '+':
144
1.54k
                    ++i;
145
9.33k
                }
146
9.33k
            }
147
9.33k
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
6
                *result = StringParser::PARSE_FAILURE;
150
6
                return 0;
151
6
            }
152
24.6k
            for (; i != len; ++i) {
153
15.3k
                const char& c = s[i];
154
15.3k
                if (LIKELY('0' <= c && c <= '9')) {
155
15.3k
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
15.3k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
15.3k
                } else {
165
                    // '123e12abc', '123e1.2'
166
12
                    *result = StringParser::PARSE_FAILURE;
167
12
                    return 0;
168
12
                }
169
15.3k
            }
170
9.31k
            if (negative_exponent) {
171
1.53k
                exponent = -exponent;
172
1.53k
            }
173
9.31k
        } else {
174
60
            *result = StringParser::PARSE_FAILURE;
175
60
            return 0;
176
60
        }
177
9.39k
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
53.6k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
53.6k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
53.6k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
53.6k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
53.6k
    T int_part_number = 0;
191
53.6k
    T frac_part_number = 0;
192
53.6k
    int actual_frac_part_count = 0;
193
53.6k
    int digit_index = 0;
194
53.6k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
53.6k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
32.3k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
53.6k
                                           : result_int_part_digit_count,
202
53.6k
                                 end_digit_index);
203
53.6k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
266k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
212k
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
53.6k
        if (digit_index != max_index &&
210
53.6k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
1.29k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
1.29k
            return 0;
213
1.29k
        }
214
        // get int part number
215
154k
        for (; digit_index != max_index; ++digit_index) {
216
101k
            if (UNLIKELY(s[digit_index] == '.')) {
217
1.60k
                continue;
218
1.60k
            }
219
100k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
100k
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
52.3k
        auto total_significant_digit_count =
225
52.3k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
52.3k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
100
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
100
                                                       total_significant_digit_count);
229
100
        }
230
52.3k
    } else {
231
        // leading zeros of fraction part
232
48
        actual_frac_part_count = -result_int_part_digit_count;
233
48
    }
234
    // get fraction part number
235
168k
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
115k
        if (UNLIKELY(s[digit_index] == '.')) {
237
9.90k
            continue;
238
9.90k
        }
239
105k
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
105k
        ++actual_frac_part_count;
241
105k
    }
242
52.3k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
52.3k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
21.4k
        if (UNLIKELY(s[digit_index] == '.')) {
249
904
            ++digit_index;
250
904
        }
251
21.4k
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
21.1k
            if (s[digit_index] >= '5') {
254
8.96k
                ++frac_part_number;
255
8.96k
                if (frac_part_number == type_scale_multiplier) {
256
856
                    frac_part_number = 0;
257
856
                    ++int_part_number;
258
856
                }
259
8.96k
            }
260
21.1k
        }
261
30.9k
    } else {
262
30.9k
        if (actual_frac_part_count < type_scale) {
263
27.4k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
27.4k
        }
265
30.9k
    }
266
52.3k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
24
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
24
        return 0;
269
24
    }
270
271
52.3k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
52.3k
    *result = StringParser::PARSE_SUCCESS;
273
52.3k
    return is_negative ? T(-value) : T(value);
274
52.3k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE29EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
61
85.1k
        ParseResult* result) {
62
85.1k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
85.1k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
85.1k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
85.1k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
85.1k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
85.1k
    s = skip_ascii_whitespaces(s, len);
79
80
85.1k
    bool is_negative = false;
81
85.1k
    if (len > 0) {
82
85.1k
        switch (*s) {
83
21.7k
        case '-':
84
21.7k
            is_negative = true;
85
21.7k
            [[fallthrough]];
86
28.3k
        case '+':
87
28.3k
            ++s;
88
28.3k
            --len;
89
85.1k
        }
90
85.1k
    }
91
    // Ignore leading zeros.
92
85.1k
    bool found_value = false;
93
161k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
75.9k
        found_value = true;
95
75.9k
        ++s;
96
75.9k
        --len;
97
75.9k
    }
98
99
85.1k
    int found_dot = 0;
100
85.1k
    if (len > 0 && *s == '.') {
101
23.7k
        found_dot = 1;
102
23.7k
        ++s;
103
23.7k
        --len;
104
23.7k
    }
105
85.1k
    int int_part_count = 0;
106
85.1k
    int i = 0;
107
1.39M
    for (; i != len; ++i) {
108
1.32M
        const char& c = s[i];
109
1.32M
        if (LIKELY('0' <= c && c <= '9')) {
110
1.25M
            found_value = true;
111
1.25M
            if (!found_dot) {
112
514k
                ++int_part_count;
113
514k
            }
114
1.25M
        } else if (c == '.') {
115
52.7k
            if (found_dot) {
116
0
                *result = StringParser::PARSE_FAILURE;
117
0
                return 0;
118
0
            }
119
52.7k
            found_dot = 1;
120
52.7k
        } else {
121
11.6k
            break;
122
11.6k
        }
123
1.32M
    }
124
85.1k
    if (!found_value) {
125
        // '', '.'
126
69
        *result = StringParser::PARSE_FAILURE;
127
69
        return 0;
128
69
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
85.0k
    int64_t exponent = 0;
133
85.0k
    auto end_digit_index = i;
134
85.0k
    if (i != len) {
135
11.5k
        bool negative_exponent = false;
136
11.5k
        if (s[i] == 'e' || s[i] == 'E') {
137
11.5k
            ++i;
138
11.5k
            if (i != len) {
139
11.5k
                switch (s[i]) {
140
3.81k
                case '-':
141
3.81k
                    negative_exponent = true;
142
3.81k
                    [[fallthrough]];
143
3.81k
                case '+':
144
3.81k
                    ++i;
145
11.5k
                }
146
11.5k
            }
147
11.5k
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
0
                *result = StringParser::PARSE_FAILURE;
150
0
                return 0;
151
0
            }
152
32.2k
            for (; i != len; ++i) {
153
20.6k
                const char& c = s[i];
154
20.6k
                if (LIKELY('0' <= c && c <= '9')) {
155
20.6k
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
20.6k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
20.6k
                } else {
165
                    // '123e12abc', '123e1.2'
166
0
                    *result = StringParser::PARSE_FAILURE;
167
0
                    return 0;
168
0
                }
169
20.6k
            }
170
11.5k
            if (negative_exponent) {
171
3.81k
                exponent = -exponent;
172
3.81k
            }
173
11.5k
        } else {
174
23
            *result = StringParser::PARSE_FAILURE;
175
23
            return 0;
176
23
        }
177
11.5k
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
85.0k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
85.0k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
85.0k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
85.0k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
85.0k
    T int_part_number = 0;
191
85.0k
    T frac_part_number = 0;
192
85.0k
    int actual_frac_part_count = 0;
193
85.0k
    int digit_index = 0;
194
85.0k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
82.7k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
75.7k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
82.7k
                                           : result_int_part_digit_count,
202
82.7k
                                 end_digit_index);
203
82.7k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
295k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
212k
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
82.7k
        if (digit_index != max_index &&
210
82.7k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
10.0k
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
10.0k
            return 0;
213
10.0k
        }
214
        // get int part number
215
406k
        for (; digit_index != max_index; ++digit_index) {
216
333k
            if (UNLIKELY(s[digit_index] == '.')) {
217
960
                continue;
218
960
            }
219
332k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
332k
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
72.7k
        auto total_significant_digit_count =
225
72.7k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
72.7k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
76
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
76
                                                       total_significant_digit_count);
229
76
        }
230
72.7k
    } else {
231
        // leading zeros of fraction part
232
2.32k
        actual_frac_part_count = -result_int_part_digit_count;
233
2.32k
    }
234
    // get fraction part number
235
586k
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
511k
        if (UNLIKELY(s[digit_index] == '.')) {
237
40.0k
            continue;
238
40.0k
        }
239
471k
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
471k
        ++actual_frac_part_count;
241
471k
    }
242
75.0k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
75.0k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
20.0k
        if (UNLIKELY(s[digit_index] == '.')) {
249
852
            ++digit_index;
250
852
        }
251
20.0k
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
19.8k
            if (s[digit_index] >= '5') {
254
7.95k
                ++frac_part_number;
255
7.95k
                if (frac_part_number == type_scale_multiplier) {
256
836
                    frac_part_number = 0;
257
836
                    ++int_part_number;
258
836
                }
259
7.95k
            }
260
19.8k
        }
261
55.0k
    } else {
262
55.0k
        if (actual_frac_part_count < type_scale) {
263
31.1k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
31.1k
        }
265
55.0k
    }
266
75.0k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
16
        return 0;
269
16
    }
270
271
75.0k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
75.0k
    *result = StringParser::PARSE_SUCCESS;
273
75.0k
    return is_negative ? T(-value) : T(value);
274
75.0k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE30EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
61
81.8k
        ParseResult* result) {
62
81.8k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
81.8k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
81.8k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
81.8k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
81.8k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
81.8k
    s = skip_ascii_whitespaces(s, len);
79
80
81.8k
    bool is_negative = false;
81
81.8k
    if (len > 0) {
82
81.8k
        switch (*s) {
83
21.7k
        case '-':
84
21.7k
            is_negative = true;
85
21.7k
            [[fallthrough]];
86
28.3k
        case '+':
87
28.3k
            ++s;
88
28.3k
            --len;
89
81.8k
        }
90
81.8k
    }
91
    // Ignore leading zeros.
92
81.8k
    bool found_value = false;
93
156k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
74.5k
        found_value = true;
95
74.5k
        ++s;
96
74.5k
        --len;
97
74.5k
    }
98
99
81.8k
    int found_dot = 0;
100
81.8k
    if (len > 0 && *s == '.') {
101
24.8k
        found_dot = 1;
102
24.8k
        ++s;
103
24.8k
        --len;
104
24.8k
    }
105
81.8k
    int int_part_count = 0;
106
81.8k
    int i = 0;
107
2.04M
    for (; i != len; ++i) {
108
1.97M
        const char& c = s[i];
109
1.97M
        if (LIKELY('0' <= c && c <= '9')) {
110
1.91M
            found_value = true;
111
1.91M
            if (!found_dot) {
112
555k
                ++int_part_count;
113
555k
            }
114
1.91M
        } else if (c == '.') {
115
49.1k
            if (found_dot) {
116
0
                *result = StringParser::PARSE_FAILURE;
117
0
                return 0;
118
0
            }
119
49.1k
            found_dot = 1;
120
49.1k
        } else {
121
12.4k
            break;
122
12.4k
        }
123
1.97M
    }
124
81.8k
    if (!found_value) {
125
        // '', '.'
126
55
        *result = StringParser::PARSE_FAILURE;
127
55
        return 0;
128
55
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
81.7k
    int64_t exponent = 0;
133
81.7k
    auto end_digit_index = i;
134
81.7k
    if (i != len) {
135
12.3k
        bool negative_exponent = false;
136
12.3k
        if (s[i] == 'e' || s[i] == 'E') {
137
12.3k
            ++i;
138
12.3k
            if (i != len) {
139
12.3k
                switch (s[i]) {
140
4.57k
                case '-':
141
4.57k
                    negative_exponent = true;
142
4.57k
                    [[fallthrough]];
143
4.58k
                case '+':
144
4.58k
                    ++i;
145
12.3k
                }
146
12.3k
            }
147
12.3k
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
0
                *result = StringParser::PARSE_FAILURE;
150
0
                return 0;
151
0
            }
152
35.5k
            for (; i != len; ++i) {
153
23.2k
                const char& c = s[i];
154
23.2k
                if (LIKELY('0' <= c && c <= '9')) {
155
23.2k
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
23.2k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
23.2k
                } else {
165
                    // '123e12abc', '123e1.2'
166
0
                    *result = StringParser::PARSE_FAILURE;
167
0
                    return 0;
168
0
                }
169
23.2k
            }
170
12.3k
            if (negative_exponent) {
171
4.57k
                exponent = -exponent;
172
4.57k
            }
173
12.3k
        } else {
174
12
            *result = StringParser::PARSE_FAILURE;
175
12
            return 0;
176
12
        }
177
12.3k
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
81.7k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
81.7k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
81.7k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
81.7k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
81.7k
    T int_part_number = 0;
191
81.7k
    T frac_part_number = 0;
192
81.7k
    int actual_frac_part_count = 0;
193
81.7k
    int digit_index = 0;
194
81.7k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
78.6k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
72.4k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
78.6k
                                           : result_int_part_digit_count,
202
78.6k
                                 end_digit_index);
203
78.6k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
292k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
213k
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
78.6k
        if (digit_index != max_index &&
210
78.6k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
140
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
140
            return 0;
213
140
        }
214
        // get int part number
215
594k
        for (; digit_index != max_index; ++digit_index) {
216
515k
            if (UNLIKELY(s[digit_index] == '.')) {
217
962
                continue;
218
962
            }
219
514k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
514k
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
78.5k
        auto total_significant_digit_count =
225
78.5k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
78.5k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
80
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
80
                                                       total_significant_digit_count);
229
80
        }
230
78.5k
    } else {
231
        // leading zeros of fraction part
232
3.08k
        actual_frac_part_count = -result_int_part_digit_count;
233
3.08k
    }
234
    // get fraction part number
235
1.20M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
1.12M
        if (UNLIKELY(s[digit_index] == '.')) {
237
45.5k
            continue;
238
45.5k
        }
239
1.08M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
1.08M
        ++actual_frac_part_count;
241
1.08M
    }
242
81.6k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
81.6k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
20.1k
        if (UNLIKELY(s[digit_index] == '.')) {
249
852
            ++digit_index;
250
852
        }
251
20.1k
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
19.9k
            if (s[digit_index] >= '5') {
254
8.02k
                ++frac_part_number;
255
8.02k
                if (frac_part_number == type_scale_multiplier) {
256
905
                    frac_part_number = 0;
257
905
                    ++int_part_number;
258
905
                }
259
8.02k
            }
260
19.9k
        }
261
61.4k
    } else {
262
61.4k
        if (actual_frac_part_count < type_scale) {
263
44.0k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
44.0k
        }
265
61.4k
    }
266
81.6k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
16
        return 0;
269
16
    }
270
271
81.5k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
81.5k
    *result = StringParser::PARSE_SUCCESS;
273
81.5k
    return is_negative ? T(-value) : T(value);
274
81.6k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE20EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
61
13.5k
        ParseResult* result) {
62
13.5k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
13.5k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
13.5k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
13.5k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
13.5k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
13.5k
    s = skip_ascii_whitespaces(s, len);
79
80
13.5k
    bool is_negative = false;
81
13.5k
    if (len > 0) {
82
13.5k
        switch (*s) {
83
6.68k
        case '-':
84
6.68k
            is_negative = true;
85
6.68k
            [[fallthrough]];
86
6.68k
        case '+':
87
6.68k
            ++s;
88
6.68k
            --len;
89
13.5k
        }
90
13.5k
    }
91
    // Ignore leading zeros.
92
13.5k
    bool found_value = false;
93
52.3k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
38.8k
        found_value = true;
95
38.8k
        ++s;
96
38.8k
        --len;
97
38.8k
    }
98
99
13.5k
    int found_dot = 0;
100
13.5k
    if (len > 0 && *s == '.') {
101
2.00k
        found_dot = 1;
102
2.00k
        ++s;
103
2.00k
        --len;
104
2.00k
    }
105
13.5k
    int int_part_count = 0;
106
13.5k
    int i = 0;
107
279k
    for (; i != len; ++i) {
108
266k
        const char& c = s[i];
109
266k
        if (LIKELY('0' <= c && c <= '9')) {
110
254k
            found_value = true;
111
254k
            if (!found_dot) {
112
136k
                ++int_part_count;
113
136k
            }
114
254k
        } else if (c == '.') {
115
11.4k
            if (found_dot) {
116
0
                *result = StringParser::PARSE_FAILURE;
117
0
                return 0;
118
0
            }
119
11.4k
            found_dot = 1;
120
11.4k
        } else {
121
11
            break;
122
11
        }
123
266k
    }
124
13.5k
    if (!found_value) {
125
        // '', '.'
126
10
        *result = StringParser::PARSE_FAILURE;
127
10
        return 0;
128
10
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
13.5k
    int64_t exponent = 0;
133
13.5k
    auto end_digit_index = i;
134
13.5k
    if (i != len) {
135
1
        bool negative_exponent = false;
136
1
        if (s[i] == 'e' || s[i] == 'E') {
137
0
            ++i;
138
0
            if (i != len) {
139
0
                switch (s[i]) {
140
0
                case '-':
141
0
                    negative_exponent = true;
142
0
                    [[fallthrough]];
143
0
                case '+':
144
0
                    ++i;
145
0
                }
146
0
            }
147
0
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
0
                *result = StringParser::PARSE_FAILURE;
150
0
                return 0;
151
0
            }
152
0
            for (; i != len; ++i) {
153
0
                const char& c = s[i];
154
0
                if (LIKELY('0' <= c && c <= '9')) {
155
0
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
0
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
0
                } else {
165
                    // '123e12abc', '123e1.2'
166
0
                    *result = StringParser::PARSE_FAILURE;
167
0
                    return 0;
168
0
                }
169
0
            }
170
0
            if (negative_exponent) {
171
0
                exponent = -exponent;
172
0
            }
173
1
        } else {
174
1
            *result = StringParser::PARSE_FAILURE;
175
1
            return 0;
176
1
        }
177
1
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
13.5k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
13.5k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
13.5k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
13.5k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
13.5k
    T int_part_number = 0;
191
13.5k
    T frac_part_number = 0;
192
13.5k
    int actual_frac_part_count = 0;
193
13.5k
    int digit_index = 0;
194
13.5k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
13.5k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
13.4k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
13.5k
                                           : result_int_part_digit_count,
202
13.5k
                                 end_digit_index);
203
13.5k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
13.5k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
0
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
13.5k
        if (digit_index != max_index &&
210
13.5k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
8
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
8
            return 0;
213
8
        }
214
        // get int part number
215
150k
        for (; digit_index != max_index; ++digit_index) {
216
136k
            if (UNLIKELY(s[digit_index] == '.')) {
217
0
                continue;
218
0
            }
219
136k
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
136k
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
13.5k
        auto total_significant_digit_count =
225
13.5k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
13.5k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
0
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
0
                                                       total_significant_digit_count);
229
0
        }
230
13.5k
    } else {
231
        // leading zeros of fraction part
232
0
        actual_frac_part_count = -result_int_part_digit_count;
233
0
    }
234
    // get fraction part number
235
143k
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
129k
        if (UNLIKELY(s[digit_index] == '.')) {
237
11.4k
            continue;
238
11.4k
        }
239
118k
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
118k
        ++actual_frac_part_count;
241
118k
    }
242
13.5k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
13.5k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
17
        if (UNLIKELY(s[digit_index] == '.')) {
249
0
            ++digit_index;
250
0
        }
251
17
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
17
            if (s[digit_index] >= '5') {
254
17
                ++frac_part_number;
255
17
                if (frac_part_number == type_scale_multiplier) {
256
0
                    frac_part_number = 0;
257
0
                    ++int_part_number;
258
0
                }
259
17
            }
260
17
        }
261
13.5k
    } else {
262
13.5k
        if (actual_frac_part_count < type_scale) {
263
1.94k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
1.94k
        }
265
13.5k
    }
266
13.5k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
0
        return 0;
269
0
    }
270
271
13.5k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
13.5k
    *result = StringParser::PARSE_SUCCESS;
273
13.5k
    return is_negative ? T(-value) : T(value);
274
13.5k
}
_ZN5doris12StringParser17string_to_decimalILNS_13PrimitiveTypeE35EEENS_19PrimitiveTypeTraitsIXT_EE7CppType10NativeTypeEPKcmiiPNS0_11ParseResultE
Line
Count
Source
61
114k
        ParseResult* result) {
62
114k
    using T = typename PrimitiveTypeTraits<P>::CppType::NativeType;
63
114k
    static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
64
114k
                          std::is_same_v<T, __int128> || std::is_same_v<T, wide::Int256>,
65
114k
                  "Cast string to decimal only support target type int32_t, int64_t, __int128 or "
66
114k
                  "wide::Int256.");
67
68
    // Parse in two logical coordinate systems:
69
    // 1. `s[0, end_digit_index)` is the normalized significand after trimming spaces, sign and
70
    //    leading zeros. If the original value starts with '.', the dot is also skipped so
71
    //    ".14E+3" is parsed as significand "14" with exponent 3.
72
    // 2. `result_int_part_digit_count = int_part_count + exponent` is the decimal point position
73
    //    after applying scientific notation. For example, "1.4E+2" has int_part_count=1,
74
    //    exponent=2, result_int_part_digit_count=3, so "14" becomes integer 140.
75
    // `digit_index` always indexes the normalized significand string, which may still contain a
76
    // dot for inputs like "1.4E+2"; loops that build numbers skip that dot explicitly.
77
    // Ignore leading and trailing spaces.
78
114k
    s = skip_ascii_whitespaces(s, len);
79
80
114k
    bool is_negative = false;
81
114k
    if (len > 0) {
82
114k
        switch (*s) {
83
17.3k
        case '-':
84
17.3k
            is_negative = true;
85
17.3k
            [[fallthrough]];
86
23.9k
        case '+':
87
23.9k
            ++s;
88
23.9k
            --len;
89
114k
        }
90
114k
    }
91
    // Ignore leading zeros.
92
114k
    bool found_value = false;
93
208k
    while (len > 0 && UNLIKELY(*s == '0')) {
94
93.9k
        found_value = true;
95
93.9k
        ++s;
96
93.9k
        --len;
97
93.9k
    }
98
99
114k
    int found_dot = 0;
100
114k
    if (len > 0 && *s == '.') {
101
15.8k
        found_dot = 1;
102
15.8k
        ++s;
103
15.8k
        --len;
104
15.8k
    }
105
114k
    int int_part_count = 0;
106
114k
    int i = 0;
107
3.86M
    for (; i != len; ++i) {
108
3.82M
        const char& c = s[i];
109
3.82M
        if (LIKELY('0' <= c && c <= '9')) {
110
3.65M
            found_value = true;
111
3.65M
            if (!found_dot) {
112
984k
                ++int_part_count;
113
984k
            }
114
3.65M
        } else if (c == '.') {
115
92.3k
            if (found_dot) {
116
0
                *result = StringParser::PARSE_FAILURE;
117
0
                return 0;
118
0
            }
119
92.3k
            found_dot = 1;
120
92.3k
        } else {
121
75.0k
            break;
122
75.0k
        }
123
3.82M
    }
124
114k
    if (!found_value) {
125
        // '', '.'
126
66
        *result = StringParser::PARSE_FAILURE;
127
66
        return 0;
128
66
    }
129
    // Parse exponent if any. Keep `end_digit_index` before consuming 'e/E' so later digit counts
130
    // ignore exponent syntax. For "1.4E+2", end_digit_index points just after "1.4", not after
131
    // "E+2".
132
114k
    int64_t exponent = 0;
133
114k
    auto end_digit_index = i;
134
114k
    if (i != len) {
135
74.9k
        bool negative_exponent = false;
136
74.9k
        if (s[i] == 'e' || s[i] == 'E') {
137
74.9k
            ++i;
138
74.9k
            if (i != len) {
139
74.9k
                switch (s[i]) {
140
1.53k
                case '-':
141
1.53k
                    negative_exponent = true;
142
1.53k
                    [[fallthrough]];
143
67.2k
                case '+':
144
67.2k
                    ++i;
145
74.9k
                }
146
74.9k
            }
147
74.9k
            if (i == len) {
148
                // '123e', '123e+', '123e-'
149
0
                *result = StringParser::PARSE_FAILURE;
150
0
                return 0;
151
0
            }
152
223k
            for (; i != len; ++i) {
153
148k
                const char& c = s[i];
154
148k
                if (LIKELY('0' <= c && c <= '9')) {
155
148k
                    exponent = exponent * 10 + (c - '0');
156
                    // max string len is config::string_type_length_soft_limit_bytes,
157
                    // whose max value is std::numeric_limits<int32_t>::max() - 4,
158
                    // just check overflow of int32_t to simplify the logic
159
                    // For edge cases like 0.{2147483647 zeros}e+2147483647
160
148k
                    if (exponent > std::numeric_limits<int32_t>::max()) {
161
0
                        *result = StringParser::PARSE_OVERFLOW;
162
0
                        return 0;
163
0
                    }
164
148k
                } else {
165
                    // '123e12abc', '123e1.2'
166
10
                    *result = StringParser::PARSE_FAILURE;
167
10
                    return 0;
168
10
                }
169
148k
            }
170
74.9k
            if (negative_exponent) {
171
1.53k
                exponent = -exponent;
172
1.53k
            }
173
74.9k
        } else {
174
14
            *result = StringParser::PARSE_FAILURE;
175
14
            return 0;
176
14
        }
177
74.9k
    }
178
    // TODO: check limit values of exponent and add UT
179
    // max string len is config::string_type_length_soft_limit_bytes,
180
    // whose max value is std::numeric_limits<int32_t>::max() - 4,
181
    // so int_part_count will be in range of int32_t,
182
    // and int_part_count + exponent will be in range of int64_t
183
114k
    int64_t tmp_result_int_part_digit_count = int_part_count + exponent;
184
114k
    if (tmp_result_int_part_digit_count > std::numeric_limits<int>::max() ||
185
114k
        tmp_result_int_part_digit_count < std::numeric_limits<int>::min()) {
186
0
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
187
0
        return 0;
188
0
    }
189
114k
    int result_int_part_digit_count = tmp_result_int_part_digit_count;
190
114k
    T int_part_number = 0;
191
114k
    T frac_part_number = 0;
192
114k
    int actual_frac_part_count = 0;
193
114k
    int digit_index = 0;
194
114k
    if (result_int_part_digit_count >= 0) {
195
        // `max_index` is the raw significand index where integer-part digits stop. Add one extra
196
        // raw character only when crossing an in-buffer dot, e.g. "1.4E+2" must scan "1.4" to
197
        // collect three integer digits after the exponent shift. It is capped by end_digit_index
198
        // because missing digits are appended later by multiplying with powers of 10.
199
114k
        int max_index = std::min(found_dot ? (result_int_part_digit_count +
200
108k
                                              ((int_part_count > 0 && exponent > 0) ? 1 : 0))
201
114k
                                           : result_int_part_digit_count,
202
114k
                                 end_digit_index);
203
114k
        max_index = (max_index == std::numeric_limits<int>::min() ? end_digit_index : max_index);
204
        // skip zero number
205
330k
        for (; digit_index != max_index && s[digit_index] == '0'; ++digit_index) {
206
216k
        }
207
        // test 0.00, .00, 0.{00...}e2147483647
208
        // 0.00000e2147483647
209
114k
        if (digit_index != max_index &&
210
114k
            (result_int_part_digit_count - digit_index > type_precision - type_scale)) {
211
112
            *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
212
112
            return 0;
213
112
        }
214
        // get int part number
215
2.14M
        for (; digit_index != max_index; ++digit_index) {
216
2.02M
            if (UNLIKELY(s[digit_index] == '.')) {
217
66.6k
                continue;
218
66.6k
            }
219
1.95M
            int_part_number = int_part_number * 10 + (s[digit_index] - '0');
220
1.95M
        }
221
        // Count only significand digits, not exponent syntax. If the exponent moves the decimal
222
        // point past all available significant digits, append zeros by scaling the integer part:
223
        // "1.4E+2" scans integer 14, total_significant_digit_count=2, then multiplies by 10.
224
114k
        auto total_significant_digit_count =
225
114k
                end_digit_index - ((found_dot && int_part_count > 0) ? 1 : 0);
226
114k
        if (result_int_part_digit_count > total_significant_digit_count) {
227
63.6k
            int_part_number *= get_scale_multiplier<T>(result_int_part_digit_count -
228
63.6k
                                                       total_significant_digit_count);
229
63.6k
        }
230
114k
    } else {
231
        // leading zeros of fraction part
232
48
        actual_frac_part_count = -result_int_part_digit_count;
233
48
    }
234
    // get fraction part number
235
1.50M
    for (; digit_index != end_digit_index && actual_frac_part_count < type_scale; ++digit_index) {
236
1.38M
        if (UNLIKELY(s[digit_index] == '.')) {
237
23.7k
            continue;
238
23.7k
        }
239
1.36M
        frac_part_number = frac_part_number * 10 + (s[digit_index] - '0');
240
1.36M
        ++actual_frac_part_count;
241
1.36M
    }
242
114k
    auto type_scale_multiplier = get_scale_multiplier<T>(type_scale);
243
    // Round only when the next parsed significand digit is exactly the first discarded fractional
244
    // digit. If `actual_frac_part_count` is already greater than type_scale, the missing positions
245
    // are implicit zeros from a negative exponent, so "5e-17" to scale 15 must stay 0 instead of
246
    // rounding up.
247
114k
    if (actual_frac_part_count == type_scale && digit_index != end_digit_index) {
248
19.0k
        if (UNLIKELY(s[digit_index] == '.')) {
249
852
            ++digit_index;
250
852
        }
251
19.0k
        if (digit_index != end_digit_index) {
252
            // example: test 1.5 -> decimal(1, 0)
253
18.8k
            if (s[digit_index] >= '5') {
254
7.94k
                ++frac_part_number;
255
7.94k
                if (frac_part_number == type_scale_multiplier) {
256
836
                    frac_part_number = 0;
257
836
                    ++int_part_number;
258
836
                }
259
7.94k
            }
260
18.8k
        }
261
95.2k
    } else {
262
95.2k
        if (actual_frac_part_count < type_scale) {
263
85.9k
            frac_part_number *= get_scale_multiplier<T>(type_scale - actual_frac_part_count);
264
85.9k
        }
265
95.2k
    }
266
114k
    if (int_part_number >= get_scale_multiplier<T>(type_precision - type_scale)) {
267
16
        *result = is_negative ? StringParser::PARSE_UNDERFLOW : StringParser::PARSE_OVERFLOW;
268
16
        return 0;
269
16
    }
270
271
114k
    T value = int_part_number * type_scale_multiplier + frac_part_number;
272
114k
    *result = StringParser::PARSE_SUCCESS;
273
114k
    return is_negative ? T(-value) : T(value);
274
114k
}
275
276
template Int32 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL32>(
277
        const char* __restrict s, size_t len, int type_precision, int type_scale,
278
        ParseResult* result);
279
template Int64 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL64>(
280
        const char* __restrict s, size_t len, int type_precision, int type_scale,
281
        ParseResult* result);
282
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>(
283
        const char* __restrict s, size_t len, int type_precision, int type_scale,
284
        ParseResult* result);
285
template Int128 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMALV2>(
286
        const char* __restrict s, size_t len, int type_precision, int type_scale,
287
        ParseResult* result);
288
template wide::Int256 StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL256>(
289
        const char* __restrict s, size_t len, int type_precision, int type_scale,
290
        ParseResult* result);
291
} // end namespace doris
292
#include "common/compile_check_avoid_end.h"