/root/doris/be/src/gutil/strings/numbers.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.cc |
3 | | // |
4 | | // This file contains string processing functions related to |
5 | | // numeric values. |
6 | | |
7 | | #include "gutil/strings/numbers.h" |
8 | | |
9 | | #include <fmt/compile.h> |
10 | | #include <fmt/format.h> |
11 | | |
12 | | #include <cfloat> |
13 | | |
14 | | #include "common/logging.h" |
15 | | #include "gutil/integral_types.h" |
16 | | #include "gutil/strings/ascii_ctype.h" |
17 | | #include "gutil/strtoint.h" |
18 | | |
19 | 18.0M | bool safe_strtof(const char* str, float* value) { |
20 | 18.0M | char* endptr; |
21 | | #ifdef _MSC_VER // has no strtof() |
22 | | *value = strtod(str, &endptr); |
23 | | #else |
24 | 18.0M | *value = strtof(str, &endptr); |
25 | 18.0M | #endif |
26 | 18.0M | if (endptr != str) { |
27 | 18.0M | while (ascii_isspace(*endptr)) ++endptr; |
28 | 18.0M | } |
29 | | // Ignore range errors from strtod/strtof. |
30 | | // The values it returns on underflow and |
31 | | // overflow are the right fallback in a |
32 | | // robust setting. |
33 | 18.0M | return *str != '\0' && *endptr == '\0'; |
34 | 18.0M | } |
35 | | |
36 | 0 | bool safe_strtod(const char* str, double* value) { |
37 | 0 | char* endptr; |
38 | 0 | *value = strtod(str, &endptr); |
39 | 0 | if (endptr != str) { |
40 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
41 | 0 | } |
42 | | // Ignore range errors from strtod. The values it |
43 | | // returns on underflow and overflow are the right |
44 | | // fallback in a robust setting. |
45 | 0 | return *str != '\0' && *endptr == '\0'; |
46 | 0 | } |
47 | | |
48 | 0 | bool safe_strtof(const string& str, float* value) { |
49 | 0 | return safe_strtof(str.c_str(), value); |
50 | 0 | } |
51 | | |
52 | 0 | bool safe_strtod(const string& str, double* value) { |
53 | 0 | return safe_strtod(str.c_str(), value); |
54 | 0 | } |
55 | | |
56 | | // ---------------------------------------------------------------------- |
57 | | // SimpleDtoa() |
58 | | // SimpleFtoa() |
59 | | // DoubleToBuffer() |
60 | | // FloatToBuffer() |
61 | | // We want to print the value without losing precision, but we also do |
62 | | // not want to print more digits than necessary. This turns out to be |
63 | | // trickier than it sounds. Numbers like 0.2 cannot be represented |
64 | | // exactly in binary. If we print 0.2 with a very large precision, |
65 | | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
66 | | // On the other hand, if we set the precision too low, we lose |
67 | | // significant digits when printing numbers that actually need them. |
68 | | // It turns out there is no precision value that does the right thing |
69 | | // for all numbers. |
70 | | // |
71 | | // Our strategy is to first try printing with a precision that is never |
72 | | // over-precise, then parse the result with strtod() to see if it |
73 | | // matches. If not, we print again with a precision that will always |
74 | | // give a precise result, but may use more digits than necessary. |
75 | | // |
76 | | // An arguably better strategy would be to use the algorithm described |
77 | | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
78 | | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
79 | | // however, that the following implementation is about as fast as |
80 | | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
81 | | // will not scale well on multi-core machines. DMG's code is slightly |
82 | | // more accurate (in that it will never use more digits than |
83 | | // necessary), but this is probably irrelevant for most users. |
84 | | // |
85 | | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
86 | | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
87 | | // one in that it makes guesses and then uses strtod() to check them. |
88 | | // Their implementation is faster because they use their own code to |
89 | | // generate the digits in the first place rather than use snprintf(), |
90 | | // thus avoiding format string parsing overhead. However, this makes |
91 | | // it considerably more complicated than the following implementation, |
92 | | // and it is embedded in a larger library. If speed turns out to be |
93 | | // an issue, we could re-implement this in terms of their |
94 | | // implementation. |
95 | | // ---------------------------------------------------------------------- |
96 | 11 | int DoubleToBuffer(double value, int width, char* buffer) { |
97 | | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
98 | | // platforms these days. Just in case some system exists where DBL_DIG |
99 | | // is significantly larger -- and risks overflowing our buffer -- we have |
100 | | // this assert. |
101 | 11 | COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
102 | | |
103 | 11 | int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value); |
104 | | |
105 | | // The snprintf should never overflow because the buffer is significantly |
106 | | // larger than the precision we asked for. |
107 | 11 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
108 | | |
109 | 11 | if (strtod(buffer, nullptr) != value) { |
110 | 3 | snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value); |
111 | | |
112 | | // Should never overflow; see above. |
113 | 3 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
114 | 3 | } |
115 | | |
116 | 11 | return snprintf_result; |
117 | 11 | } |
118 | | |
119 | 18.0M | int FloatToBuffer(float value, int width, char* buffer) { |
120 | | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
121 | | // platforms these days. Just in case some system exists where FLT_DIG |
122 | | // is significantly larger -- and risks overflowing our buffer -- we have |
123 | | // this assert. |
124 | 18.0M | COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
125 | | |
126 | 18.0M | int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value); |
127 | | |
128 | | // The snprintf should never overflow because the buffer is significantly |
129 | | // larger than the precision we asked for. |
130 | 18.0M | DCHECK(snprintf_result > 0 && snprintf_result < width); |
131 | | |
132 | 18.0M | float parsed_value; |
133 | 18.0M | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
134 | 10 | snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value); |
135 | | |
136 | | // Should never overflow; see above. |
137 | 10 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
138 | 10 | } |
139 | | |
140 | 18.0M | return snprintf_result; |
141 | 18.0M | } |
142 | | |
143 | 862 | int FastDoubleToBuffer(double value, char* buffer) { |
144 | 862 | auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
145 | 862 | *end = '\0'; |
146 | 862 | return end - buffer; |
147 | 862 | } |
148 | | |
149 | 761 | int FastFloatToBuffer(float value, char* buffer) { |
150 | 761 | auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
151 | 761 | *end = '\0'; |
152 | 761 | return end - buffer; |
153 | 761 | } |
154 | | |
155 | | // ---------------------------------------------------------------------- |
156 | | // SimpleItoaWithCommas() |
157 | | // Description: converts an integer to a string. |
158 | | // Puts commas every 3 spaces. |
159 | | // Faster than printf("%d")? |
160 | | // |
161 | | // Return value: string |
162 | | // ---------------------------------------------------------------------- |
163 | | |
164 | 27 | char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) { |
165 | | // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. |
166 | 27 | char* p = buffer + buffer_size; |
167 | | // Need to use uint64 instead of int64 to correctly handle |
168 | | // -9,223,372,036,854,775,808. |
169 | 27 | uint64 n = i; |
170 | 27 | if (i < 0) n = 0 - n; |
171 | 27 | *--p = '0' + n % 10; // this case deals with the number "0" |
172 | 27 | n /= 10; |
173 | 68 | while (n) { |
174 | 63 | *--p = '0' + n % 10; |
175 | 63 | n /= 10; |
176 | 63 | if (n == 0) break; |
177 | | |
178 | 46 | *--p = '0' + n % 10; |
179 | 46 | n /= 10; |
180 | 46 | if (n == 0) break; |
181 | | |
182 | 41 | *--p = ','; |
183 | 41 | *--p = '0' + n % 10; |
184 | 41 | n /= 10; |
185 | | // For this unrolling, we check if n == 0 in the main while loop |
186 | 41 | } |
187 | 27 | if (i < 0) *--p = '-'; |
188 | 27 | return p; |
189 | 27 | } |
190 | | |
191 | 29 | char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) { |
192 | | // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints. |
193 | 29 | char* p = buffer + buffer_size; |
194 | | // Need to use uint128 instead of int128 to correctly handle |
195 | | // -170,141,183,460,469,231,731,687,303,715,884,105,728. |
196 | 29 | __uint128_t n = i; |
197 | 29 | if (i < 0) n = 0 - n; |
198 | 29 | *--p = '0' + n % 10; // this case deals with the number "0" |
199 | 29 | n /= 10; |
200 | 74 | while (n) { |
201 | 64 | *--p = '0' + n % 10; |
202 | 64 | n /= 10; |
203 | 64 | if (n == 0) break; |
204 | | |
205 | 56 | *--p = '0' + n % 10; |
206 | 56 | n /= 10; |
207 | 56 | if (n == 0) break; |
208 | | |
209 | 45 | *--p = ','; |
210 | 45 | *--p = '0' + n % 10; |
211 | 45 | n /= 10; |
212 | | // For this unrolling, we check if n == 0 in the main while loop |
213 | 45 | } |
214 | 29 | if (i < 0) *--p = '-'; |
215 | 29 | return p; |
216 | 29 | } |