Coverage Report

Created: 2025-05-21 13:31

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <assert.h>
10
#include <ctype.h>
11
#include <errno.h>
12
#include <float.h> // for DBL_DIG and FLT_DIG
13
#include <math.h>  // for HUGE_VAL
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <string.h>
17
#include <inttypes.h>
18
#include <sys/types.h>
19
#include <limits>
20
#include <ostream>
21
22
#include "common/exception.h"
23
24
using std::numeric_limits;
25
#include <string>
26
27
using std::string;
28
29
#include <fmt/compile.h>
30
#include <fmt/format.h>
31
32
#include "common/logging.h"
33
34
#include "gutil/integral_types.h"
35
#include "gutil/strings/ascii_ctype.h"
36
#include "gutil/strtoint.h"
37
38
namespace {
39
40
// Represents integer values of digits.
41
// Uses 36 to indicate an invalid character since we support
42
// bases up to 36.
43
static const int8 kAsciiToInt[256] = {
44
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
45
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
46
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  36, 36,
47
        36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
48
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16,
49
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36,
50
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
51
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
52
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
53
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
54
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
55
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
56
57
// Input format based on POSIX.1-2008 strtol
58
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
59
template <typename IntType>
60
33
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
61
    // Consume whitespace.
62
34
    while (start < end && ascii_isspace(start[0])) {
63
1
        ++start;
64
1
    }
65
33
    while (start < end && ascii_isspace(end[-1])) {
66
0
        --end;
67
0
    }
68
33
    if (start >= end) {
69
3
        return false;
70
3
    }
71
72
    // Consume sign.
73
30
    const bool negative = (start[0] == '-');
74
30
    if (negative || start[0] == '+') {
75
7
        ++start;
76
7
        if (start >= end) {
77
0
            return false;
78
0
        }
79
7
    }
80
81
    // Consume base-dependent prefix.
82
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
83
    //  base 16: "0x" -> base 16
84
    // Also validate the base.
85
30
    if (base == 0) {
86
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
87
0
            base = 16;
88
0
            start += 2;
89
0
        } else if (end - start >= 1 && start[0] == '0') {
90
0
            base = 8;
91
0
            start += 1;
92
0
        } else {
93
0
            base = 10;
94
0
        }
95
30
    } else if (base == 16) {
96
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
97
0
            start += 2;
98
0
        }
99
30
    } else if (base >= 2 && base <= 36) {
100
        // okay
101
30
    } else {
102
0
        return false;
103
0
    }
104
105
    // Consume digits.
106
    //
107
    // The classic loop:
108
    //
109
    //   for each digit
110
    //     value = value * base + digit
111
    //   value *= sign
112
    //
113
    // The classic loop needs overflow checking.  It also fails on the most
114
    // negative integer, -2147483648 in 32-bit two's complement representation.
115
    //
116
    // My improved loop:
117
    //
118
    //  if (!negative)
119
    //    for each digit
120
    //      value = value * base
121
    //      value = value + digit
122
    //  else
123
    //    for each digit
124
    //      value = value * base
125
    //      value = value - digit
126
    //
127
    // Overflow checking becomes simple.
128
    //
129
    // I present the positive code first for easier reading.
130
30
    IntType value = 0;
131
30
    if (!negative) {
132
23
        const IntType vmax = std::numeric_limits<IntType>::max();
133
23
        assert(vmax > 0);
134
0
        assert(vmax >= base);
135
0
        const IntType vmax_over_base = vmax / base;
136
        // loop over digits
137
        // loop body is interleaved for perf, not readability
138
148
        for (; start < end; ++start) {
139
134
            unsigned char c = static_cast<unsigned char>(start[0]);
140
134
            int digit = kAsciiToInt[c];
141
134
            if (value > vmax_over_base) return false;
142
134
            value *= base;
143
134
            if (digit >= base) return false;
144
128
            if (value > vmax - digit) return false;
145
125
            value += digit;
146
125
        }
147
23
    } else {
148
7
        const IntType vmin = std::numeric_limits<IntType>::min();
149
7
        assert(vmin < 0);
150
0
        assert(vmin <= 0 - base);
151
0
        IntType vmin_over_base = vmin / base;
152
        // 2003 c++ standard [expr.mul]
153
        // "... the sign of the remainder is implementation-defined."
154
        // Although (vmin/base)*base + vmin%base is always vmin.
155
        // 2011 c++ standard tightens the spec but we cannot rely on it.
156
7
        if (vmin % base > 0) {
157
0
            vmin_over_base += 1;
158
0
        }
159
        // loop over digits
160
        // loop body is interleaved for perf, not readability
161
53
        for (; start < end; ++start) {
162
46
            unsigned char c = static_cast<unsigned char>(start[0]);
163
46
            int digit = kAsciiToInt[c];
164
46
            if (value < vmin_over_base) return false;
165
46
            value *= base;
166
46
            if (digit >= base) return false;
167
46
            if (value < vmin + digit) return false;
168
46
            value -= digit;
169
46
        }
170
7
    }
171
172
    // Store output.
173
21
    *value_p = value;
174
21
    return true;
175
30
}
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Line
Count
Source
60
21
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
61
    // Consume whitespace.
62
21
    while (start < end && ascii_isspace(start[0])) {
63
0
        ++start;
64
0
    }
65
21
    while (start < end && ascii_isspace(end[-1])) {
66
0
        --end;
67
0
    }
68
21
    if (start >= end) {
69
1
        return false;
70
1
    }
71
72
    // Consume sign.
73
20
    const bool negative = (start[0] == '-');
74
20
    if (negative || start[0] == '+') {
75
5
        ++start;
76
5
        if (start >= end) {
77
0
            return false;
78
0
        }
79
5
    }
80
81
    // Consume base-dependent prefix.
82
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
83
    //  base 16: "0x" -> base 16
84
    // Also validate the base.
85
20
    if (base == 0) {
86
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
87
0
            base = 16;
88
0
            start += 2;
89
0
        } else if (end - start >= 1 && start[0] == '0') {
90
0
            base = 8;
91
0
            start += 1;
92
0
        } else {
93
0
            base = 10;
94
0
        }
95
20
    } else if (base == 16) {
96
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
97
0
            start += 2;
98
0
        }
99
20
    } else if (base >= 2 && base <= 36) {
100
        // okay
101
20
    } else {
102
0
        return false;
103
0
    }
104
105
    // Consume digits.
106
    //
107
    // The classic loop:
108
    //
109
    //   for each digit
110
    //     value = value * base + digit
111
    //   value *= sign
112
    //
113
    // The classic loop needs overflow checking.  It also fails on the most
114
    // negative integer, -2147483648 in 32-bit two's complement representation.
115
    //
116
    // My improved loop:
117
    //
118
    //  if (!negative)
119
    //    for each digit
120
    //      value = value * base
121
    //      value = value + digit
122
    //  else
123
    //    for each digit
124
    //      value = value * base
125
    //      value = value - digit
126
    //
127
    // Overflow checking becomes simple.
128
    //
129
    // I present the positive code first for easier reading.
130
20
    IntType value = 0;
131
20
    if (!negative) {
132
15
        const IntType vmax = std::numeric_limits<IntType>::max();
133
15
        assert(vmax > 0);
134
0
        assert(vmax >= base);
135
0
        const IntType vmax_over_base = vmax / base;
136
        // loop over digits
137
        // loop body is interleaved for perf, not readability
138
80
        for (; start < end; ++start) {
139
70
            unsigned char c = static_cast<unsigned char>(start[0]);
140
70
            int digit = kAsciiToInt[c];
141
70
            if (value > vmax_over_base) return false;
142
70
            value *= base;
143
70
            if (digit >= base) return false;
144
67
            if (value > vmax - digit) return false;
145
65
            value += digit;
146
65
        }
147
15
    } else {
148
5
        const IntType vmin = std::numeric_limits<IntType>::min();
149
5
        assert(vmin < 0);
150
0
        assert(vmin <= 0 - base);
151
0
        IntType vmin_over_base = vmin / base;
152
        // 2003 c++ standard [expr.mul]
153
        // "... the sign of the remainder is implementation-defined."
154
        // Although (vmin/base)*base + vmin%base is always vmin.
155
        // 2011 c++ standard tightens the spec but we cannot rely on it.
156
5
        if (vmin % base > 0) {
157
0
            vmin_over_base += 1;
158
0
        }
159
        // loop over digits
160
        // loop body is interleaved for perf, not readability
161
31
        for (; start < end; ++start) {
162
26
            unsigned char c = static_cast<unsigned char>(start[0]);
163
26
            int digit = kAsciiToInt[c];
164
26
            if (value < vmin_over_base) return false;
165
26
            value *= base;
166
26
            if (digit >= base) return false;
167
26
            if (value < vmin + digit) return false;
168
26
            value -= digit;
169
26
        }
170
5
    }
171
172
    // Store output.
173
15
    *value_p = value;
174
15
    return true;
175
20
}
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_
Line
Count
Source
60
12
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
61
    // Consume whitespace.
62
13
    while (start < end && ascii_isspace(start[0])) {
63
1
        ++start;
64
1
    }
65
12
    while (start < end && ascii_isspace(end[-1])) {
66
0
        --end;
67
0
    }
68
12
    if (start >= end) {
69
2
        return false;
70
2
    }
71
72
    // Consume sign.
73
10
    const bool negative = (start[0] == '-');
74
10
    if (negative || start[0] == '+') {
75
2
        ++start;
76
2
        if (start >= end) {
77
0
            return false;
78
0
        }
79
2
    }
80
81
    // Consume base-dependent prefix.
82
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
83
    //  base 16: "0x" -> base 16
84
    // Also validate the base.
85
10
    if (base == 0) {
86
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
87
0
            base = 16;
88
0
            start += 2;
89
0
        } else if (end - start >= 1 && start[0] == '0') {
90
0
            base = 8;
91
0
            start += 1;
92
0
        } else {
93
0
            base = 10;
94
0
        }
95
10
    } else if (base == 16) {
96
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
97
0
            start += 2;
98
0
        }
99
10
    } else if (base >= 2 && base <= 36) {
100
        // okay
101
10
    } else {
102
0
        return false;
103
0
    }
104
105
    // Consume digits.
106
    //
107
    // The classic loop:
108
    //
109
    //   for each digit
110
    //     value = value * base + digit
111
    //   value *= sign
112
    //
113
    // The classic loop needs overflow checking.  It also fails on the most
114
    // negative integer, -2147483648 in 32-bit two's complement representation.
115
    //
116
    // My improved loop:
117
    //
118
    //  if (!negative)
119
    //    for each digit
120
    //      value = value * base
121
    //      value = value + digit
122
    //  else
123
    //    for each digit
124
    //      value = value * base
125
    //      value = value - digit
126
    //
127
    // Overflow checking becomes simple.
128
    //
129
    // I present the positive code first for easier reading.
130
10
    IntType value = 0;
131
10
    if (!negative) {
132
8
        const IntType vmax = std::numeric_limits<IntType>::max();
133
8
        assert(vmax > 0);
134
0
        assert(vmax >= base);
135
0
        const IntType vmax_over_base = vmax / base;
136
        // loop over digits
137
        // loop body is interleaved for perf, not readability
138
68
        for (; start < end; ++start) {
139
64
            unsigned char c = static_cast<unsigned char>(start[0]);
140
64
            int digit = kAsciiToInt[c];
141
64
            if (value > vmax_over_base) return false;
142
64
            value *= base;
143
64
            if (digit >= base) return false;
144
61
            if (value > vmax - digit) return false;
145
60
            value += digit;
146
60
        }
147
8
    } else {
148
2
        const IntType vmin = std::numeric_limits<IntType>::min();
149
2
        assert(vmin < 0);
150
0
        assert(vmin <= 0 - base);
151
0
        IntType vmin_over_base = vmin / base;
152
        // 2003 c++ standard [expr.mul]
153
        // "... the sign of the remainder is implementation-defined."
154
        // Although (vmin/base)*base + vmin%base is always vmin.
155
        // 2011 c++ standard tightens the spec but we cannot rely on it.
156
2
        if (vmin % base > 0) {
157
0
            vmin_over_base += 1;
158
0
        }
159
        // loop over digits
160
        // loop body is interleaved for perf, not readability
161
22
        for (; start < end; ++start) {
162
20
            unsigned char c = static_cast<unsigned char>(start[0]);
163
20
            int digit = kAsciiToInt[c];
164
20
            if (value < vmin_over_base) return false;
165
20
            value *= base;
166
20
            if (digit >= base) return false;
167
20
            if (value < vmin + digit) return false;
168
20
            value -= digit;
169
20
        }
170
2
    }
171
172
    // Store output.
173
6
    *value_p = value;
174
6
    return true;
175
10
}
176
177
} // anonymous namespace
178
179
0
bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) {
180
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
181
0
}
182
183
0
bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) {
184
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
185
0
}
186
187
21
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
188
21
    return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
189
21
}
190
191
12
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
192
12
    return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
193
12
}
194
195
0
bool safe_strto32_base(const char* str, int32* value, int base) {
196
0
    char* endptr;
197
0
    errno = 0; // errno only gets set on errors
198
0
    *value = strto32(str, &endptr, base);
199
0
    if (endptr != str) {
200
0
        while (ascii_isspace(*endptr)) ++endptr;
201
0
    }
202
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
203
0
}
204
205
0
bool safe_strto64_base(const char* str, int64* value, int base) {
206
0
    char* endptr;
207
0
    errno = 0; // errno only gets set on errors
208
0
    *value = strto64(str, &endptr, base);
209
0
    if (endptr != str) {
210
0
        while (ascii_isspace(*endptr)) ++endptr;
211
0
    }
212
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
213
0
}
214
215
0
bool safe_strtou32_base(const char* str, uint32* value, int base) {
216
    // strtoul does not give any errors on negative numbers, so we have to
217
    // search the string for '-' manually.
218
0
    while (ascii_isspace(*str)) ++str;
219
0
    if (*str == '-') return false;
220
221
0
    char* endptr;
222
0
    errno = 0; // errno only gets set on errors
223
0
    *value = strtou32(str, &endptr, base);
224
0
    if (endptr != str) {
225
0
        while (ascii_isspace(*endptr)) ++endptr;
226
0
    }
227
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
228
0
}
229
230
0
bool safe_strtou64_base(const char* str, uint64* value, int base) {
231
    // strtou64 does not give any errors on negative numbers, so we have to
232
    // search the string for '-' manually.
233
0
    while (ascii_isspace(*str)) ++str;
234
0
    if (*str == '-') return false;
235
236
0
    char* endptr;
237
0
    errno = 0; // errno only gets set on errors
238
0
    *value = strtou64(str, &endptr, base);
239
0
    if (endptr != str) {
240
0
        while (ascii_isspace(*endptr)) ++endptr;
241
0
    }
242
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
243
0
}
244
245
// ----------------------------------------------------------------------
246
// u64tostr_base36()
247
//    Converts unsigned number to string representation in base-36.
248
// --------------------------------------------------------------------
249
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
250
0
    CHECK_GT(buf_size, 0);
251
0
    CHECK(buffer);
252
0
    static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
253
254
0
    buffer[buf_size - 1] = '\0';
255
0
    size_t result_size = 1;
256
257
0
    do {
258
0
        if (buf_size == result_size) { // Ran out of space.
259
0
            return 0;
260
0
        }
261
0
        int remainder = number % 36;
262
0
        number /= 36;
263
0
        buffer[buf_size - result_size - 1] = kAlphabet[remainder];
264
0
        result_size++;
265
0
    } while (number);
266
267
0
    memmove(buffer, buffer + buf_size - result_size, result_size);
268
269
0
    return result_size - 1;
270
0
}
271
272
// Generate functions that wrap safe_strtoXXX_base.
273
#define GEN_SAFE_STRTO(name, type)                                                  \
274
0
    bool name##_base(const string& str, type* value, int base) {                    \
275
0
        return name##_base(str.c_str(), value, base);                               \
276
0
    }                                                                               \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi
277
0
    bool name(const char* str, type* value) { return name##_base(str, value, 10); } \
Unexecuted instantiation: _Z12safe_strto32PKcPi
Unexecuted instantiation: _Z13safe_strtou32PKcPj
Unexecuted instantiation: _Z12safe_strto64PKcPl
Unexecuted instantiation: _Z13safe_strtou64PKcPm
278
0
    bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); }
Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj
Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl
Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm
279
GEN_SAFE_STRTO(safe_strto32, int32);
280
GEN_SAFE_STRTO(safe_strtou32, uint32);
281
GEN_SAFE_STRTO(safe_strto64, int64);
282
GEN_SAFE_STRTO(safe_strtou64, uint64);
283
#undef GEN_SAFE_STRTO
284
285
18.0M
bool safe_strtof(const char* str, float* value) {
286
18.0M
    char* endptr;
287
#ifdef _MSC_VER // has no strtof()
288
    *value = strtod(str, &endptr);
289
#else
290
18.0M
    *value = strtof(str, &endptr);
291
18.0M
#endif
292
18.0M
    if (endptr != str) {
293
18.0M
        while (ascii_isspace(*endptr)) ++endptr;
294
18.0M
    }
295
    // Ignore range errors from strtod/strtof.
296
    // The values it returns on underflow and
297
    // overflow are the right fallback in a
298
    // robust setting.
299
18.0M
    return *str != '\0' && *endptr == '\0';
300
18.0M
}
301
302
0
bool safe_strtod(const char* str, double* value) {
303
0
    char* endptr;
304
0
    *value = strtod(str, &endptr);
305
0
    if (endptr != str) {
306
0
        while (ascii_isspace(*endptr)) ++endptr;
307
0
    }
308
    // Ignore range errors from strtod.  The values it
309
    // returns on underflow and overflow are the right
310
    // fallback in a robust setting.
311
0
    return *str != '\0' && *endptr == '\0';
312
0
}
313
314
11
bool safe_strtof(const string& str, float* value) {
315
11
    return safe_strtof(str.c_str(), value);
316
11
}
317
318
0
bool safe_strtod(const string& str, double* value) {
319
0
    return safe_strtod(str.c_str(), value);
320
0
}
321
322
// ----------------------------------------------------------------------
323
// SimpleDtoa()
324
// SimpleFtoa()
325
// DoubleToBuffer()
326
// FloatToBuffer()
327
//    We want to print the value without losing precision, but we also do
328
//    not want to print more digits than necessary.  This turns out to be
329
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
330
//    exactly in binary.  If we print 0.2 with a very large precision,
331
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
332
//    On the other hand, if we set the precision too low, we lose
333
//    significant digits when printing numbers that actually need them.
334
//    It turns out there is no precision value that does the right thing
335
//    for all numbers.
336
//
337
//    Our strategy is to first try printing with a precision that is never
338
//    over-precise, then parse the result with strtod() to see if it
339
//    matches.  If not, we print again with a precision that will always
340
//    give a precise result, but may use more digits than necessary.
341
//
342
//    An arguably better strategy would be to use the algorithm described
343
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
344
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
345
//    however, that the following implementation is about as fast as
346
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
347
//    will not scale well on multi-core machines.  DMG's code is slightly
348
//    more accurate (in that it will never use more digits than
349
//    necessary), but this is probably irrelevant for most users.
350
//
351
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
352
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
353
//    one in that it makes guesses and then uses strtod() to check them.
354
//    Their implementation is faster because they use their own code to
355
//    generate the digits in the first place rather than use snprintf(),
356
//    thus avoiding format string parsing overhead.  However, this makes
357
//    it considerably more complicated than the following implementation,
358
//    and it is embedded in a larger library.  If speed turns out to be
359
//    an issue, we could re-implement this in terms of their
360
//    implementation.
361
// ----------------------------------------------------------------------
362
11
int DoubleToBuffer(double value, int width, char* buffer) {
363
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
364
    // platforms these days.  Just in case some system exists where DBL_DIG
365
    // is significantly larger -- and risks overflowing our buffer -- we have
366
    // this assert.
367
11
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
368
369
11
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
370
371
    // The snprintf should never overflow because the buffer is significantly
372
    // larger than the precision we asked for.
373
11
    DCHECK(snprintf_result > 0 && snprintf_result < width);
374
375
11
    if (strtod(buffer, nullptr) != value) {
376
3
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
377
378
        // Should never overflow; see above.
379
3
        DCHECK(snprintf_result > 0 && snprintf_result < width);
380
3
    }
381
382
11
    return snprintf_result;
383
11
}
384
385
18.0M
int FloatToBuffer(float value, int width, char* buffer) {
386
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
387
    // platforms these days.  Just in case some system exists where FLT_DIG
388
    // is significantly larger -- and risks overflowing our buffer -- we have
389
    // this assert.
390
18.0M
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
391
392
18.0M
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
393
394
    // The snprintf should never overflow because the buffer is significantly
395
    // larger than the precision we asked for.
396
18.0M
    DCHECK(snprintf_result > 0 && snprintf_result < width);
397
398
18.0M
    float parsed_value;
399
18.0M
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
400
10
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
401
402
        // Should never overflow; see above.
403
10
        DCHECK(snprintf_result > 0 && snprintf_result < width);
404
10
    }
405
406
18.0M
    return snprintf_result;
407
18.0M
}
408
409
862
int FastDoubleToBuffer(double value, char* buffer) {
410
862
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
411
862
    *end = '\0';
412
862
    return end - buffer;
413
862
}
414
415
761
int FastFloatToBuffer(float value, char* buffer) {
416
761
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
417
761
    *end = '\0';
418
761
    return end - buffer;
419
761
}
420
421
// ----------------------------------------------------------------------
422
// SimpleItoaWithCommas()
423
//    Description: converts an integer to a string.
424
//    Puts commas every 3 spaces.
425
//    Faster than printf("%d")?
426
//
427
//    Return value: string
428
// ----------------------------------------------------------------------
429
430
27
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
431
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
432
27
    char* p = buffer + buffer_size;
433
    // Need to use uint64 instead of int64 to correctly handle
434
    // -9,223,372,036,854,775,808.
435
27
    uint64 n = i;
436
27
    if (i < 0) n = 0 - n;
437
27
    *--p = '0' + n % 10; // this case deals with the number "0"
438
27
    n /= 10;
439
68
    while (n) {
440
63
        *--p = '0' + n % 10;
441
63
        n /= 10;
442
63
        if (n == 0) break;
443
444
46
        *--p = '0' + n % 10;
445
46
        n /= 10;
446
46
        if (n == 0) break;
447
448
41
        *--p = ',';
449
41
        *--p = '0' + n % 10;
450
41
        n /= 10;
451
        // For this unrolling, we check if n == 0 in the main while loop
452
41
    }
453
27
    if (i < 0) *--p = '-';
454
27
    return p;
455
27
}
456
457
29
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
458
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
459
29
    char* p = buffer + buffer_size;
460
    // Need to use uint128 instead of int128 to correctly handle
461
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
462
29
    __uint128_t n = i;
463
29
    if (i < 0) n = 0 - n;
464
29
    *--p = '0' + n % 10; // this case deals with the number "0"
465
29
    n /= 10;
466
74
    while (n) {
467
64
        *--p = '0' + n % 10;
468
64
        n /= 10;
469
64
        if (n == 0) break;
470
471
56
        *--p = '0' + n % 10;
472
56
        n /= 10;
473
56
        if (n == 0) break;
474
475
45
        *--p = ',';
476
45
        *--p = '0' + n % 10;
477
45
        n /= 10;
478
        // For this unrolling, we check if n == 0 in the main while loop
479
45
    }
480
29
    if (i < 0) *--p = '-';
481
29
    return p;
482
29
}