Coverage Report

Created: 2025-06-20 03:12

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <fmt/compile.h>
10
#include <fmt/format.h>
11
12
#include <cfloat>
13
14
#include "common/logging.h"
15
#include "gutil/integral_types.h"
16
#include "gutil/strings/ascii_ctype.h"
17
#include "gutil/strtoint.h"
18
19
36.1M
bool safe_strtof(const char* str, float* value) {
20
36.1M
    char* endptr;
21
#ifdef _MSC_VER // has no strtof()
22
    *value = strtod(str, &endptr);
23
#else
24
36.1M
    *value = strtof(str, &endptr);
25
36.1M
#endif
26
36.1M
    if (endptr != str) {
27
36.1M
        while (ascii_isspace(*endptr)) ++endptr;
28
36.1M
    }
29
    // Ignore range errors from strtod/strtof.
30
    // The values it returns on underflow and
31
    // overflow are the right fallback in a
32
    // robust setting.
33
36.1M
    return *str != '\0' && *endptr == '\0';
34
36.1M
}
35
36
0
bool safe_strtod(const char* str, double* value) {
37
0
    char* endptr;
38
0
    *value = strtod(str, &endptr);
39
0
    if (endptr != str) {
40
0
        while (ascii_isspace(*endptr)) ++endptr;
41
0
    }
42
    // Ignore range errors from strtod.  The values it
43
    // returns on underflow and overflow are the right
44
    // fallback in a robust setting.
45
0
    return *str != '\0' && *endptr == '\0';
46
0
}
47
48
0
bool safe_strtof(const string& str, float* value) {
49
0
    return safe_strtof(str.c_str(), value);
50
0
}
51
52
0
bool safe_strtod(const string& str, double* value) {
53
0
    return safe_strtod(str.c_str(), value);
54
0
}
55
56
// ----------------------------------------------------------------------
57
// SimpleDtoa()
58
// SimpleFtoa()
59
// DoubleToBuffer()
60
// FloatToBuffer()
61
//    We want to print the value without losing precision, but we also do
62
//    not want to print more digits than necessary.  This turns out to be
63
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
64
//    exactly in binary.  If we print 0.2 with a very large precision,
65
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
66
//    On the other hand, if we set the precision too low, we lose
67
//    significant digits when printing numbers that actually need them.
68
//    It turns out there is no precision value that does the right thing
69
//    for all numbers.
70
//
71
//    Our strategy is to first try printing with a precision that is never
72
//    over-precise, then parse the result with strtod() to see if it
73
//    matches.  If not, we print again with a precision that will always
74
//    give a precise result, but may use more digits than necessary.
75
//
76
//    An arguably better strategy would be to use the algorithm described
77
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
78
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
79
//    however, that the following implementation is about as fast as
80
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
81
//    will not scale well on multi-core machines.  DMG's code is slightly
82
//    more accurate (in that it will never use more digits than
83
//    necessary), but this is probably irrelevant for most users.
84
//
85
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
86
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
87
//    one in that it makes guesses and then uses strtod() to check them.
88
//    Their implementation is faster because they use their own code to
89
//    generate the digits in the first place rather than use snprintf(),
90
//    thus avoiding format string parsing overhead.  However, this makes
91
//    it considerably more complicated than the following implementation,
92
//    and it is embedded in a larger library.  If speed turns out to be
93
//    an issue, we could re-implement this in terms of their
94
//    implementation.
95
// ----------------------------------------------------------------------
96
22
int DoubleToBuffer(double value, int width, char* buffer) {
97
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
98
    // platforms these days.  Just in case some system exists where DBL_DIG
99
    // is significantly larger -- and risks overflowing our buffer -- we have
100
    // this assert.
101
22
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
102
103
22
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
104
105
    // The snprintf should never overflow because the buffer is significantly
106
    // larger than the precision we asked for.
107
22
    DCHECK(snprintf_result > 0 && snprintf_result < width);
108
109
22
    if (strtod(buffer, nullptr) != value) {
110
6
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
111
112
        // Should never overflow; see above.
113
6
        DCHECK(snprintf_result > 0 && snprintf_result < width);
114
6
    }
115
116
22
    return snprintf_result;
117
22
}
118
119
36.1M
int FloatToBuffer(float value, int width, char* buffer) {
120
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
121
    // platforms these days.  Just in case some system exists where FLT_DIG
122
    // is significantly larger -- and risks overflowing our buffer -- we have
123
    // this assert.
124
36.1M
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
125
126
36.1M
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
127
128
    // The snprintf should never overflow because the buffer is significantly
129
    // larger than the precision we asked for.
130
36.1M
    DCHECK(snprintf_result > 0 && snprintf_result < width);
131
132
36.1M
    float parsed_value;
133
36.1M
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
134
20
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
135
136
        // Should never overflow; see above.
137
20
        DCHECK(snprintf_result > 0 && snprintf_result < width);
138
20
    }
139
140
36.1M
    return snprintf_result;
141
36.1M
}
142
143
1.72k
int FastDoubleToBuffer(double value, char* buffer) {
144
1.72k
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
145
1.72k
    *end = '\0';
146
1.72k
    return end - buffer;
147
1.72k
}
148
149
1.52k
int FastFloatToBuffer(float value, char* buffer) {
150
1.52k
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
151
1.52k
    *end = '\0';
152
1.52k
    return end - buffer;
153
1.52k
}
154
155
// ----------------------------------------------------------------------
156
// SimpleItoaWithCommas()
157
//    Description: converts an integer to a string.
158
//    Puts commas every 3 spaces.
159
//    Faster than printf("%d")?
160
//
161
//    Return value: string
162
// ----------------------------------------------------------------------
163
164
54
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
165
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
166
54
    char* p = buffer + buffer_size;
167
    // Need to use uint64 instead of int64 to correctly handle
168
    // -9,223,372,036,854,775,808.
169
54
    uint64 n = i;
170
54
    if (i < 0) n = 0 - n;
171
54
    *--p = '0' + n % 10; // this case deals with the number "0"
172
54
    n /= 10;
173
136
    while (n) {
174
126
        *--p = '0' + n % 10;
175
126
        n /= 10;
176
126
        if (n == 0) break;
177
178
92
        *--p = '0' + n % 10;
179
92
        n /= 10;
180
92
        if (n == 0) break;
181
182
82
        *--p = ',';
183
82
        *--p = '0' + n % 10;
184
82
        n /= 10;
185
        // For this unrolling, we check if n == 0 in the main while loop
186
82
    }
187
54
    if (i < 0) *--p = '-';
188
54
    return p;
189
54
}
190
191
58
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
192
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
193
58
    char* p = buffer + buffer_size;
194
    // Need to use uint128 instead of int128 to correctly handle
195
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
196
58
    __uint128_t n = i;
197
58
    if (i < 0) n = 0 - n;
198
58
    *--p = '0' + n % 10; // this case deals with the number "0"
199
58
    n /= 10;
200
148
    while (n) {
201
128
        *--p = '0' + n % 10;
202
128
        n /= 10;
203
128
        if (n == 0) break;
204
205
112
        *--p = '0' + n % 10;
206
112
        n /= 10;
207
112
        if (n == 0) break;
208
209
90
        *--p = ',';
210
90
        *--p = '0' + n % 10;
211
90
        n /= 10;
212
        // For this unrolling, we check if n == 0 in the main while loop
213
90
    }
214
58
    if (i < 0) *--p = '-';
215
58
    return p;
216
58
}