/root/doris/be/src/gutil/strings/numbers.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | | // Refactored from contributions of various authors in strings/strutil.cc |
3 | | // |
4 | | // This file contains string processing functions related to |
5 | | // numeric values. |
6 | | |
7 | | #include "gutil/strings/numbers.h" |
8 | | |
9 | | #include <assert.h> |
10 | | #include <ctype.h> |
11 | | #include <errno.h> |
12 | | #include <float.h> // for DBL_DIG and FLT_DIG |
13 | | #include <math.h> // for HUGE_VAL |
14 | | #include <stdio.h> |
15 | | #include <stdlib.h> |
16 | | #include <string.h> |
17 | | #include <inttypes.h> |
18 | | #include <sys/types.h> |
19 | | #include <limits> |
20 | | #include <ostream> |
21 | | |
22 | | #include "common/exception.h" |
23 | | |
24 | | using std::numeric_limits; |
25 | | #include <string> |
26 | | |
27 | | using std::string; |
28 | | |
29 | | #include <fmt/compile.h> |
30 | | #include <fmt/format.h> |
31 | | |
32 | | #include "common/logging.h" |
33 | | |
34 | | #include "gutil/integral_types.h" |
35 | | #include "gutil/stringprintf.h" |
36 | | #include "gutil/strings/ascii_ctype.h" |
37 | | #include "gutil/strtoint.h" |
38 | | |
39 | | // ---------------------------------------------------------------------- |
40 | | // ConsumeStrayLeadingZeroes |
41 | | // Eliminates all leading zeroes (unless the string itself is composed |
42 | | // of nothing but zeroes, in which case one is kept: 0...0 becomes 0). |
43 | | // -------------------------------------------------------------------- |
44 | | |
45 | 0 | void ConsumeStrayLeadingZeroes(string* const str) { |
46 | 0 | const string::size_type len(str->size()); |
47 | 0 | if (len > 1 && (*str)[0] == '0') { |
48 | 0 | const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1); |
49 | 0 | while (ptr != end && *ptr == '0') { |
50 | 0 | ++ptr; |
51 | 0 | } |
52 | 0 | string::size_type remove(ptr - begin); |
53 | 0 | DCHECK_GT(ptr, begin); |
54 | 0 | if (remove == len) { |
55 | 0 | --remove; // if they are all zero, leave one... |
56 | 0 | } |
57 | 0 | str->erase(0, remove); |
58 | 0 | } |
59 | 0 | } |
60 | | |
61 | | // ---------------------------------------------------------------------- |
62 | | // ParseLeadingInt32Value() |
63 | | // ParseLeadingUInt32Value() |
64 | | // A simple parser for [u]int32 values. Returns the parsed value |
65 | | // if a valid value is found; else returns deflt |
66 | | // This cannot handle decimal numbers with leading 0s. |
67 | | // -------------------------------------------------------------------- |
68 | | |
69 | 0 | int32 ParseLeadingInt32Value(const char* str, int32 deflt) { |
70 | 0 | char* error = nullptr; |
71 | 0 | long value = strtol(str, &error, 0); |
72 | | // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. |
73 | 0 | if (value > numeric_limits<int32>::max()) { |
74 | 0 | value = numeric_limits<int32>::max(); |
75 | 0 | } else if (value < numeric_limits<int32>::min()) { |
76 | 0 | value = numeric_limits<int32>::min(); |
77 | 0 | } |
78 | 0 | return (error == str) ? deflt : value; |
79 | 0 | } |
80 | | |
81 | 0 | uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) { |
82 | 0 | if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) { |
83 | | // When long is 32 bits, we can use strtoul. |
84 | 0 | char* error = nullptr; |
85 | 0 | const uint32 value = strtoul(str, &error, 0); |
86 | 0 | return (error == str) ? deflt : value; |
87 | 0 | } else { |
88 | | // When long is 64 bits, we must use strto64 and handle limits |
89 | | // by hand. The reason we cannot use a 64-bit strtoul is that |
90 | | // it would be impossible to differentiate "-2" (that should wrap |
91 | | // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 |
92 | | // (that should be pegged to UINT_MAX due to overflow). |
93 | 0 | char* error = nullptr; |
94 | 0 | int64 value = strto64(str, &error, 0); |
95 | 0 | if (value > numeric_limits<uint32>::max() || |
96 | 0 | value < -static_cast<int64>(numeric_limits<uint32>::max())) { |
97 | 0 | value = numeric_limits<uint32>::max(); |
98 | 0 | } |
99 | | // Within these limits, truncation to 32 bits handles negatives correctly. |
100 | 0 | return (error == str) ? deflt : value; |
101 | 0 | } |
102 | 0 | } |
103 | | |
104 | | // ---------------------------------------------------------------------- |
105 | | // ParseLeadingDec32Value |
106 | | // ParseLeadingUDec32Value |
107 | | // A simple parser for [u]int32 values. Returns the parsed value |
108 | | // if a valid value is found; else returns deflt |
109 | | // The string passed in is treated as *10 based*. |
110 | | // This can handle strings with leading 0s. |
111 | | // -------------------------------------------------------------------- |
112 | | |
113 | 0 | int32 ParseLeadingDec32Value(const char* str, int32 deflt) { |
114 | 0 | char* error = nullptr; |
115 | 0 | long value = strtol(str, &error, 10); |
116 | | // Limit long values to int32 min/max. Needed for lp64; no-op on 32 bits. |
117 | 0 | if (value > numeric_limits<int32>::max()) { |
118 | 0 | value = numeric_limits<int32>::max(); |
119 | 0 | } else if (value < numeric_limits<int32>::min()) { |
120 | 0 | value = numeric_limits<int32>::min(); |
121 | 0 | } |
122 | 0 | return (error == str) ? deflt : value; |
123 | 0 | } |
124 | | |
125 | 0 | uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) { |
126 | 0 | if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) { |
127 | | // When long is 32 bits, we can use strtoul. |
128 | 0 | char* error = nullptr; |
129 | 0 | const uint32 value = strtoul(str, &error, 10); |
130 | 0 | return (error == str) ? deflt : value; |
131 | 0 | } else { |
132 | | // When long is 64 bits, we must use strto64 and handle limits |
133 | | // by hand. The reason we cannot use a 64-bit strtoul is that |
134 | | // it would be impossible to differentiate "-2" (that should wrap |
135 | | // around to the value UINT_MAX-1) from a string with ULONG_MAX-1 |
136 | | // (that should be pegged to UINT_MAX due to overflow). |
137 | 0 | char* error = nullptr; |
138 | 0 | int64 value = strto64(str, &error, 10); |
139 | 0 | if (value > numeric_limits<uint32>::max() || |
140 | 0 | value < -static_cast<int64>(numeric_limits<uint32>::max())) { |
141 | 0 | value = numeric_limits<uint32>::max(); |
142 | 0 | } |
143 | | // Within these limits, truncation to 32 bits handles negatives correctly. |
144 | 0 | return (error == str) ? deflt : value; |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | | // ---------------------------------------------------------------------- |
149 | | // ParseLeadingUInt64Value |
150 | | // ParseLeadingInt64Value |
151 | | // ParseLeadingHex64Value |
152 | | // A simple parser for 64-bit values. Returns the parsed value if a |
153 | | // valid integer is found; else returns deflt |
154 | | // UInt64 and Int64 cannot handle decimal numbers with leading 0s. |
155 | | // -------------------------------------------------------------------- |
156 | 0 | uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) { |
157 | 0 | char* error = nullptr; |
158 | 0 | const uint64 value = strtou64(str, &error, 0); |
159 | 0 | return (error == str) ? deflt : value; |
160 | 0 | } |
161 | | |
162 | 0 | int64 ParseLeadingInt64Value(const char* str, int64 deflt) { |
163 | 0 | char* error = nullptr; |
164 | 0 | const int64 value = strto64(str, &error, 0); |
165 | 0 | return (error == str) ? deflt : value; |
166 | 0 | } |
167 | | |
168 | 0 | uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) { |
169 | 0 | char* error = nullptr; |
170 | 0 | const uint64 value = strtou64(str, &error, 16); |
171 | 0 | return (error == str) ? deflt : value; |
172 | 0 | } |
173 | | |
174 | | // ---------------------------------------------------------------------- |
175 | | // ParseLeadingDec64Value |
176 | | // ParseLeadingUDec64Value |
177 | | // A simple parser for [u]int64 values. Returns the parsed value |
178 | | // if a valid value is found; else returns deflt |
179 | | // The string passed in is treated as *10 based*. |
180 | | // This can handle strings with leading 0s. |
181 | | // -------------------------------------------------------------------- |
182 | | |
183 | 0 | int64 ParseLeadingDec64Value(const char* str, int64 deflt) { |
184 | 0 | char* error = nullptr; |
185 | 0 | const int64 value = strto64(str, &error, 10); |
186 | 0 | return (error == str) ? deflt : value; |
187 | 0 | } |
188 | | |
189 | 0 | uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) { |
190 | 0 | char* error = nullptr; |
191 | 0 | const uint64 value = strtou64(str, &error, 10); |
192 | 0 | return (error == str) ? deflt : value; |
193 | 0 | } |
194 | | |
195 | | // ---------------------------------------------------------------------- |
196 | | // ParseLeadingDoubleValue() |
197 | | // A simple parser for double values. Returns the parsed value |
198 | | // if a valid value is found; else returns deflt |
199 | | // -------------------------------------------------------------------- |
200 | | |
201 | 0 | double ParseLeadingDoubleValue(const char* str, double deflt) { |
202 | 0 | char* error = nullptr; |
203 | 0 | errno = 0; |
204 | 0 | const double value = strtod(str, &error); |
205 | 0 | if (errno != 0 || // overflow/underflow happened |
206 | 0 | error == str) { // no valid parse |
207 | 0 | return deflt; |
208 | 0 | } else { |
209 | 0 | return value; |
210 | 0 | } |
211 | 0 | } |
212 | | |
213 | | // ---------------------------------------------------------------------- |
214 | | // ParseLeadingBoolValue() |
215 | | // A recognizer of boolean string values. Returns the parsed value |
216 | | // if a valid value is found; else returns deflt. This skips leading |
217 | | // whitespace, is case insensitive, and recognizes these forms: |
218 | | // 0/1, false/true, no/yes, n/y |
219 | | // -------------------------------------------------------------------- |
220 | 0 | bool ParseLeadingBoolValue(const char* str, bool deflt) { |
221 | 0 | static const int kMaxLen = 5; |
222 | 0 | char value[kMaxLen + 1]; |
223 | | // Skip whitespace |
224 | 0 | while (ascii_isspace(*str)) { |
225 | 0 | ++str; |
226 | 0 | } |
227 | 0 | int len = 0; |
228 | 0 | for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str); |
229 | 0 | if (len == 0 || len > kMaxLen) return deflt; |
230 | 0 | value[len] = '\0'; |
231 | 0 | switch (len) { |
232 | 0 | case 1: |
233 | 0 | if (value[0] == '0' || value[0] == 'n') return false; |
234 | 0 | if (value[0] == '1' || value[0] == 'y') return true; |
235 | 0 | break; |
236 | 0 | case 2: |
237 | 0 | if (!strcmp(value, "no")) return false; |
238 | 0 | break; |
239 | 0 | case 3: |
240 | 0 | if (!strcmp(value, "yes")) return true; |
241 | 0 | break; |
242 | 0 | case 4: |
243 | 0 | if (!strcmp(value, "true")) return true; |
244 | 0 | break; |
245 | 0 | case 5: |
246 | 0 | if (!strcmp(value, "false")) return false; |
247 | 0 | break; |
248 | 0 | } |
249 | 0 | return deflt; |
250 | 0 | } |
251 | | |
252 | | // ---------------------------------------------------------------------- |
253 | | // Uint64ToString() |
254 | | // FloatToString() |
255 | | // IntToString() |
256 | | // Convert various types to their string representation, possibly padded |
257 | | // with spaces, using snprintf format specifiers. |
258 | | // ---------------------------------------------------------------------- |
259 | | |
260 | 0 | string Uint64ToString(uint64 fp) { |
261 | 0 | char buf[17]; |
262 | 0 | snprintf(buf, sizeof(buf), "%016" PRIx64, fp); |
263 | 0 | return string(buf); |
264 | 0 | } |
265 | | namespace { |
266 | | |
267 | | // Represents integer values of digits. |
268 | | // Uses 36 to indicate an invalid character since we support |
269 | | // bases up to 36. |
270 | | static const int8 kAsciiToInt[256] = { |
271 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. |
272 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
273 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 36, 36, |
274 | | 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, |
275 | | 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, |
276 | | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, |
277 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
278 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
279 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
280 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
281 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
282 | | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; |
283 | | |
284 | | // Input format based on POSIX.1-2008 strtol |
285 | | // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html |
286 | | template <typename IntType> |
287 | 33 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { |
288 | | // Consume whitespace. |
289 | 34 | while (start < end && ascii_isspace(start[0])) { |
290 | 1 | ++start; |
291 | 1 | } |
292 | 33 | while (start < end && ascii_isspace(end[-1])) { |
293 | 0 | --end; |
294 | 0 | } |
295 | 33 | if (start >= end) { |
296 | 3 | return false; |
297 | 3 | } |
298 | | |
299 | | // Consume sign. |
300 | 30 | const bool negative = (start[0] == '-'); |
301 | 30 | if (negative || start[0] == '+') { |
302 | 7 | ++start; |
303 | 7 | if (start >= end) { |
304 | 0 | return false; |
305 | 0 | } |
306 | 7 | } |
307 | | |
308 | | // Consume base-dependent prefix. |
309 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 |
310 | | // base 16: "0x" -> base 16 |
311 | | // Also validate the base. |
312 | 30 | if (base == 0) { |
313 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { |
314 | 0 | base = 16; |
315 | 0 | start += 2; |
316 | 0 | } else if (end - start >= 1 && start[0] == '0') { |
317 | 0 | base = 8; |
318 | 0 | start += 1; |
319 | 0 | } else { |
320 | 0 | base = 10; |
321 | 0 | } |
322 | 30 | } else if (base == 16) { |
323 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { |
324 | 0 | start += 2; |
325 | 0 | } |
326 | 30 | } else if (base >= 2 && base <= 36) { |
327 | | // okay |
328 | 30 | } else { |
329 | 0 | return false; |
330 | 0 | } |
331 | | |
332 | | // Consume digits. |
333 | | // |
334 | | // The classic loop: |
335 | | // |
336 | | // for each digit |
337 | | // value = value * base + digit |
338 | | // value *= sign |
339 | | // |
340 | | // The classic loop needs overflow checking. It also fails on the most |
341 | | // negative integer, -2147483648 in 32-bit two's complement representation. |
342 | | // |
343 | | // My improved loop: |
344 | | // |
345 | | // if (!negative) |
346 | | // for each digit |
347 | | // value = value * base |
348 | | // value = value + digit |
349 | | // else |
350 | | // for each digit |
351 | | // value = value * base |
352 | | // value = value - digit |
353 | | // |
354 | | // Overflow checking becomes simple. |
355 | | // |
356 | | // I present the positive code first for easier reading. |
357 | 30 | IntType value = 0; |
358 | 30 | if (!negative) { |
359 | 23 | const IntType vmax = std::numeric_limits<IntType>::max(); |
360 | 23 | assert(vmax > 0); |
361 | 0 | assert(vmax >= base); |
362 | 0 | const IntType vmax_over_base = vmax / base; |
363 | | // loop over digits |
364 | | // loop body is interleaved for perf, not readability |
365 | 148 | for (; start < end; ++start) { |
366 | 134 | unsigned char c = static_cast<unsigned char>(start[0]); |
367 | 134 | int digit = kAsciiToInt[c]; |
368 | 134 | if (value > vmax_over_base) return false; |
369 | 134 | value *= base; |
370 | 134 | if (digit >= base) return false; |
371 | 128 | if (value > vmax - digit) return false; |
372 | 125 | value += digit; |
373 | 125 | } |
374 | 23 | } else { |
375 | 7 | const IntType vmin = std::numeric_limits<IntType>::min(); |
376 | 7 | assert(vmin < 0); |
377 | 0 | assert(vmin <= 0 - base); |
378 | 0 | IntType vmin_over_base = vmin / base; |
379 | | // 2003 c++ standard [expr.mul] |
380 | | // "... the sign of the remainder is implementation-defined." |
381 | | // Although (vmin/base)*base + vmin%base is always vmin. |
382 | | // 2011 c++ standard tightens the spec but we cannot rely on it. |
383 | 7 | if (vmin % base > 0) { |
384 | 0 | vmin_over_base += 1; |
385 | 0 | } |
386 | | // loop over digits |
387 | | // loop body is interleaved for perf, not readability |
388 | 53 | for (; start < end; ++start) { |
389 | 46 | unsigned char c = static_cast<unsigned char>(start[0]); |
390 | 46 | int digit = kAsciiToInt[c]; |
391 | 46 | if (value < vmin_over_base) return false; |
392 | 46 | value *= base; |
393 | 46 | if (digit >= base) return false; |
394 | 46 | if (value < vmin + digit) return false; |
395 | 46 | value -= digit; |
396 | 46 | } |
397 | 7 | } |
398 | | |
399 | | // Store output. |
400 | 21 | *value_p = value; |
401 | 21 | return true; |
402 | 30 | } numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_ Line | Count | Source | 287 | 21 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { | 288 | | // Consume whitespace. | 289 | 21 | while (start < end && ascii_isspace(start[0])) { | 290 | 0 | ++start; | 291 | 0 | } | 292 | 21 | while (start < end && ascii_isspace(end[-1])) { | 293 | 0 | --end; | 294 | 0 | } | 295 | 21 | if (start >= end) { | 296 | 1 | return false; | 297 | 1 | } | 298 | | | 299 | | // Consume sign. | 300 | 20 | const bool negative = (start[0] == '-'); | 301 | 20 | if (negative || start[0] == '+') { | 302 | 5 | ++start; | 303 | 5 | if (start >= end) { | 304 | 0 | return false; | 305 | 0 | } | 306 | 5 | } | 307 | | | 308 | | // Consume base-dependent prefix. | 309 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 | 310 | | // base 16: "0x" -> base 16 | 311 | | // Also validate the base. | 312 | 20 | if (base == 0) { | 313 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 314 | 0 | base = 16; | 315 | 0 | start += 2; | 316 | 0 | } else if (end - start >= 1 && start[0] == '0') { | 317 | 0 | base = 8; | 318 | 0 | start += 1; | 319 | 0 | } else { | 320 | 0 | base = 10; | 321 | 0 | } | 322 | 20 | } else if (base == 16) { | 323 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 324 | 0 | start += 2; | 325 | 0 | } | 326 | 20 | } else if (base >= 2 && base <= 36) { | 327 | | // okay | 328 | 20 | } else { | 329 | 0 | return false; | 330 | 0 | } | 331 | | | 332 | | // Consume digits. | 333 | | // | 334 | | // The classic loop: | 335 | | // | 336 | | // for each digit | 337 | | // value = value * base + digit | 338 | | // value *= sign | 339 | | // | 340 | | // The classic loop needs overflow checking. It also fails on the most | 341 | | // negative integer, -2147483648 in 32-bit two's complement representation. | 342 | | // | 343 | | // My improved loop: | 344 | | // | 345 | | // if (!negative) | 346 | | // for each digit | 347 | | // value = value * base | 348 | | // value = value + digit | 349 | | // else | 350 | | // for each digit | 351 | | // value = value * base | 352 | | // value = value - digit | 353 | | // | 354 | | // Overflow checking becomes simple. | 355 | | // | 356 | | // I present the positive code first for easier reading. | 357 | 20 | IntType value = 0; | 358 | 20 | if (!negative) { | 359 | 15 | const IntType vmax = std::numeric_limits<IntType>::max(); | 360 | 15 | assert(vmax > 0); | 361 | 0 | assert(vmax >= base); | 362 | 0 | const IntType vmax_over_base = vmax / base; | 363 | | // loop over digits | 364 | | // loop body is interleaved for perf, not readability | 365 | 80 | for (; start < end; ++start) { | 366 | 70 | unsigned char c = static_cast<unsigned char>(start[0]); | 367 | 70 | int digit = kAsciiToInt[c]; | 368 | 70 | if (value > vmax_over_base) return false; | 369 | 70 | value *= base; | 370 | 70 | if (digit >= base) return false; | 371 | 67 | if (value > vmax - digit) return false; | 372 | 65 | value += digit; | 373 | 65 | } | 374 | 15 | } else { | 375 | 5 | const IntType vmin = std::numeric_limits<IntType>::min(); | 376 | 5 | assert(vmin < 0); | 377 | 0 | assert(vmin <= 0 - base); | 378 | 0 | IntType vmin_over_base = vmin / base; | 379 | | // 2003 c++ standard [expr.mul] | 380 | | // "... the sign of the remainder is implementation-defined." | 381 | | // Although (vmin/base)*base + vmin%base is always vmin. | 382 | | // 2011 c++ standard tightens the spec but we cannot rely on it. | 383 | 5 | if (vmin % base > 0) { | 384 | 0 | vmin_over_base += 1; | 385 | 0 | } | 386 | | // loop over digits | 387 | | // loop body is interleaved for perf, not readability | 388 | 31 | for (; start < end; ++start) { | 389 | 26 | unsigned char c = static_cast<unsigned char>(start[0]); | 390 | 26 | int digit = kAsciiToInt[c]; | 391 | 26 | if (value < vmin_over_base) return false; | 392 | 26 | value *= base; | 393 | 26 | if (digit >= base) return false; | 394 | 26 | if (value < vmin + digit) return false; | 395 | 26 | value -= digit; | 396 | 26 | } | 397 | 5 | } | 398 | | | 399 | | // Store output. | 400 | 15 | *value_p = value; | 401 | 15 | return true; | 402 | 20 | } |
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_ Line | Count | Source | 287 | 12 | bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) { | 288 | | // Consume whitespace. | 289 | 13 | while (start < end && ascii_isspace(start[0])) { | 290 | 1 | ++start; | 291 | 1 | } | 292 | 12 | while (start < end && ascii_isspace(end[-1])) { | 293 | 0 | --end; | 294 | 0 | } | 295 | 12 | if (start >= end) { | 296 | 2 | return false; | 297 | 2 | } | 298 | | | 299 | | // Consume sign. | 300 | 10 | const bool negative = (start[0] == '-'); | 301 | 10 | if (negative || start[0] == '+') { | 302 | 2 | ++start; | 303 | 2 | if (start >= end) { | 304 | 0 | return false; | 305 | 0 | } | 306 | 2 | } | 307 | | | 308 | | // Consume base-dependent prefix. | 309 | | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 | 310 | | // base 16: "0x" -> base 16 | 311 | | // Also validate the base. | 312 | 10 | if (base == 0) { | 313 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 314 | 0 | base = 16; | 315 | 0 | start += 2; | 316 | 0 | } else if (end - start >= 1 && start[0] == '0') { | 317 | 0 | base = 8; | 318 | 0 | start += 1; | 319 | 0 | } else { | 320 | 0 | base = 10; | 321 | 0 | } | 322 | 10 | } else if (base == 16) { | 323 | 0 | if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) { | 324 | 0 | start += 2; | 325 | 0 | } | 326 | 10 | } else if (base >= 2 && base <= 36) { | 327 | | // okay | 328 | 10 | } else { | 329 | 0 | return false; | 330 | 0 | } | 331 | | | 332 | | // Consume digits. | 333 | | // | 334 | | // The classic loop: | 335 | | // | 336 | | // for each digit | 337 | | // value = value * base + digit | 338 | | // value *= sign | 339 | | // | 340 | | // The classic loop needs overflow checking. It also fails on the most | 341 | | // negative integer, -2147483648 in 32-bit two's complement representation. | 342 | | // | 343 | | // My improved loop: | 344 | | // | 345 | | // if (!negative) | 346 | | // for each digit | 347 | | // value = value * base | 348 | | // value = value + digit | 349 | | // else | 350 | | // for each digit | 351 | | // value = value * base | 352 | | // value = value - digit | 353 | | // | 354 | | // Overflow checking becomes simple. | 355 | | // | 356 | | // I present the positive code first for easier reading. | 357 | 10 | IntType value = 0; | 358 | 10 | if (!negative) { | 359 | 8 | const IntType vmax = std::numeric_limits<IntType>::max(); | 360 | 8 | assert(vmax > 0); | 361 | 0 | assert(vmax >= base); | 362 | 0 | const IntType vmax_over_base = vmax / base; | 363 | | // loop over digits | 364 | | // loop body is interleaved for perf, not readability | 365 | 68 | for (; start < end; ++start) { | 366 | 64 | unsigned char c = static_cast<unsigned char>(start[0]); | 367 | 64 | int digit = kAsciiToInt[c]; | 368 | 64 | if (value > vmax_over_base) return false; | 369 | 64 | value *= base; | 370 | 64 | if (digit >= base) return false; | 371 | 61 | if (value > vmax - digit) return false; | 372 | 60 | value += digit; | 373 | 60 | } | 374 | 8 | } else { | 375 | 2 | const IntType vmin = std::numeric_limits<IntType>::min(); | 376 | 2 | assert(vmin < 0); | 377 | 0 | assert(vmin <= 0 - base); | 378 | 0 | IntType vmin_over_base = vmin / base; | 379 | | // 2003 c++ standard [expr.mul] | 380 | | // "... the sign of the remainder is implementation-defined." | 381 | | // Although (vmin/base)*base + vmin%base is always vmin. | 382 | | // 2011 c++ standard tightens the spec but we cannot rely on it. | 383 | 2 | if (vmin % base > 0) { | 384 | 0 | vmin_over_base += 1; | 385 | 0 | } | 386 | | // loop over digits | 387 | | // loop body is interleaved for perf, not readability | 388 | 22 | for (; start < end; ++start) { | 389 | 20 | unsigned char c = static_cast<unsigned char>(start[0]); | 390 | 20 | int digit = kAsciiToInt[c]; | 391 | 20 | if (value < vmin_over_base) return false; | 392 | 20 | value *= base; | 393 | 20 | if (digit >= base) return false; | 394 | 20 | if (value < vmin + digit) return false; | 395 | 20 | value -= digit; | 396 | 20 | } | 397 | 2 | } | 398 | | | 399 | | // Store output. | 400 | 6 | *value_p = value; | 401 | 6 | return true; | 402 | 10 | } |
|
403 | | |
404 | | } // anonymous namespace |
405 | | |
406 | 0 | bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) { |
407 | 0 | return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v); |
408 | 0 | } |
409 | | |
410 | 0 | bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) { |
411 | 0 | return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v); |
412 | 0 | } |
413 | | |
414 | 21 | bool safe_strto32(const char* startptr, const int buffer_size, int32* value) { |
415 | 21 | return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value); |
416 | 21 | } |
417 | | |
418 | 12 | bool safe_strto64(const char* startptr, const int buffer_size, int64* value) { |
419 | 12 | return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value); |
420 | 12 | } |
421 | | |
422 | 0 | bool safe_strto32_base(const char* str, int32* value, int base) { |
423 | 0 | char* endptr; |
424 | 0 | errno = 0; // errno only gets set on errors |
425 | 0 | *value = strto32(str, &endptr, base); |
426 | 0 | if (endptr != str) { |
427 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
428 | 0 | } |
429 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
430 | 0 | } |
431 | | |
432 | 0 | bool safe_strto64_base(const char* str, int64* value, int base) { |
433 | 0 | char* endptr; |
434 | 0 | errno = 0; // errno only gets set on errors |
435 | 0 | *value = strto64(str, &endptr, base); |
436 | 0 | if (endptr != str) { |
437 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
438 | 0 | } |
439 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
440 | 0 | } |
441 | | |
442 | 0 | bool safe_strtou32_base(const char* str, uint32* value, int base) { |
443 | | // strtoul does not give any errors on negative numbers, so we have to |
444 | | // search the string for '-' manually. |
445 | 0 | while (ascii_isspace(*str)) ++str; |
446 | 0 | if (*str == '-') return false; |
447 | | |
448 | 0 | char* endptr; |
449 | 0 | errno = 0; // errno only gets set on errors |
450 | 0 | *value = strtou32(str, &endptr, base); |
451 | 0 | if (endptr != str) { |
452 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
453 | 0 | } |
454 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
455 | 0 | } |
456 | | |
457 | 0 | bool safe_strtou64_base(const char* str, uint64* value, int base) { |
458 | | // strtou64 does not give any errors on negative numbers, so we have to |
459 | | // search the string for '-' manually. |
460 | 0 | while (ascii_isspace(*str)) ++str; |
461 | 0 | if (*str == '-') return false; |
462 | | |
463 | 0 | char* endptr; |
464 | 0 | errno = 0; // errno only gets set on errors |
465 | 0 | *value = strtou64(str, &endptr, base); |
466 | 0 | if (endptr != str) { |
467 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
468 | 0 | } |
469 | 0 | return *str != '\0' && *endptr == '\0' && errno == 0; |
470 | 0 | } |
471 | | |
472 | | // ---------------------------------------------------------------------- |
473 | | // u64tostr_base36() |
474 | | // Converts unsigned number to string representation in base-36. |
475 | | // -------------------------------------------------------------------- |
476 | 0 | size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) { |
477 | 0 | CHECK_GT(buf_size, 0); |
478 | 0 | CHECK(buffer); |
479 | 0 | static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz"; |
480 | |
|
481 | 0 | buffer[buf_size - 1] = '\0'; |
482 | 0 | size_t result_size = 1; |
483 | |
|
484 | 0 | do { |
485 | 0 | if (buf_size == result_size) { // Ran out of space. |
486 | 0 | return 0; |
487 | 0 | } |
488 | 0 | int remainder = number % 36; |
489 | 0 | number /= 36; |
490 | 0 | buffer[buf_size - result_size - 1] = kAlphabet[remainder]; |
491 | 0 | result_size++; |
492 | 0 | } while (number); |
493 | | |
494 | 0 | memmove(buffer, buffer + buf_size - result_size, result_size); |
495 | |
|
496 | 0 | return result_size - 1; |
497 | 0 | } |
498 | | |
499 | | // Generate functions that wrap safe_strtoXXX_base. |
500 | | #define GEN_SAFE_STRTO(name, type) \ |
501 | 0 | bool name##_base(const string& str, type* value, int base) { \ |
502 | 0 | return name##_base(str.c_str(), value, base); \ |
503 | 0 | } \ Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi |
504 | 0 | bool name(const char* str, type* value) { return name##_base(str, value, 10); } \ Unexecuted instantiation: _Z12safe_strto32PKcPi Unexecuted instantiation: _Z13safe_strtou32PKcPj Unexecuted instantiation: _Z12safe_strto64PKcPl Unexecuted instantiation: _Z13safe_strtou64PKcPm |
505 | 0 | bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); } Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm |
506 | | GEN_SAFE_STRTO(safe_strto32, int32); |
507 | | GEN_SAFE_STRTO(safe_strtou32, uint32); |
508 | | GEN_SAFE_STRTO(safe_strto64, int64); |
509 | | GEN_SAFE_STRTO(safe_strtou64, uint64); |
510 | | #undef GEN_SAFE_STRTO |
511 | | |
512 | 18.0M | bool safe_strtof(const char* str, float* value) { |
513 | 18.0M | char* endptr; |
514 | | #ifdef _MSC_VER // has no strtof() |
515 | | *value = strtod(str, &endptr); |
516 | | #else |
517 | 18.0M | *value = strtof(str, &endptr); |
518 | 18.0M | #endif |
519 | 18.0M | if (endptr != str) { |
520 | 18.0M | while (ascii_isspace(*endptr)) ++endptr; |
521 | 18.0M | } |
522 | | // Ignore range errors from strtod/strtof. |
523 | | // The values it returns on underflow and |
524 | | // overflow are the right fallback in a |
525 | | // robust setting. |
526 | 18.0M | return *str != '\0' && *endptr == '\0'; |
527 | 18.0M | } |
528 | | |
529 | 0 | bool safe_strtod(const char* str, double* value) { |
530 | 0 | char* endptr; |
531 | 0 | *value = strtod(str, &endptr); |
532 | 0 | if (endptr != str) { |
533 | 0 | while (ascii_isspace(*endptr)) ++endptr; |
534 | 0 | } |
535 | | // Ignore range errors from strtod. The values it |
536 | | // returns on underflow and overflow are the right |
537 | | // fallback in a robust setting. |
538 | 0 | return *str != '\0' && *endptr == '\0'; |
539 | 0 | } |
540 | | |
541 | 11 | bool safe_strtof(const string& str, float* value) { |
542 | 11 | return safe_strtof(str.c_str(), value); |
543 | 11 | } |
544 | | |
545 | 0 | bool safe_strtod(const string& str, double* value) { |
546 | 0 | return safe_strtod(str.c_str(), value); |
547 | 0 | } |
548 | | |
549 | 0 | uint64 atoi_kmgt(const char* s) { |
550 | 0 | char* endptr; |
551 | 0 | uint64 n = strtou64(s, &endptr, 10); |
552 | 0 | uint64 scale = 1; |
553 | 0 | char c = *endptr; |
554 | 0 | if (c != '\0') { |
555 | 0 | c = ascii_toupper(c); |
556 | 0 | switch (c) { |
557 | 0 | case 'K': |
558 | 0 | scale = GG_ULONGLONG(1) << 10; |
559 | 0 | break; |
560 | 0 | case 'M': |
561 | 0 | scale = GG_ULONGLONG(1) << 20; |
562 | 0 | break; |
563 | 0 | case 'G': |
564 | 0 | scale = GG_ULONGLONG(1) << 30; |
565 | 0 | break; |
566 | 0 | case 'T': |
567 | 0 | scale = GG_ULONGLONG(1) << 40; |
568 | 0 | break; |
569 | 0 | default: |
570 | 0 | throw doris::Exception(doris::Status::FatalError( |
571 | 0 | "Invalid mnemonic: `{}'; should be one of `K', `M', `G', and `T'.", c)); |
572 | 0 | } |
573 | 0 | } |
574 | 0 | return n * scale; |
575 | 0 | } |
576 | | |
577 | | // ---------------------------------------------------------------------- |
578 | | // AutoDigitStrCmp |
579 | | // AutoDigitLessThan |
580 | | // StrictAutoDigitLessThan |
581 | | // autodigit_less |
582 | | // autodigit_greater |
583 | | // strict_autodigit_less |
584 | | // strict_autodigit_greater |
585 | | // These are like less<string> and greater<string>, except when a |
586 | | // run of digits is encountered at corresponding points in the two |
587 | | // arguments. Such digit strings are compared numerically instead |
588 | | // of lexicographically. Therefore if you sort by |
589 | | // "autodigit_less", some machine names might get sorted as: |
590 | | // exaf1 |
591 | | // exaf2 |
592 | | // exaf10 |
593 | | // When using "strict" comparison (AutoDigitStrCmp with the strict flag |
594 | | // set to true, or the strict version of the other functions), |
595 | | // strings that represent equal numbers will not be considered equal if |
596 | | // the string representations are not identical. That is, "01" < "1" in |
597 | | // strict mode, but "01" == "1" otherwise. |
598 | | // ---------------------------------------------------------------------- |
599 | | |
600 | 0 | int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) { |
601 | 0 | int aindex = 0; |
602 | 0 | int bindex = 0; |
603 | 0 | while ((aindex < alen) && (bindex < blen)) { |
604 | 0 | if (isdigit(a[aindex]) && isdigit(b[bindex])) { |
605 | | // Compare runs of digits. Instead of extracting numbers, we |
606 | | // just skip leading zeroes, and then get the run-lengths. This |
607 | | // allows us to handle arbitrary precision numbers. We remember |
608 | | // how many zeroes we found so that we can differentiate between |
609 | | // "1" and "01" in strict mode. |
610 | | |
611 | | // Skip leading zeroes, but remember how many we found |
612 | 0 | int azeroes = aindex; |
613 | 0 | int bzeroes = bindex; |
614 | 0 | while ((aindex < alen) && (a[aindex] == '0')) aindex++; |
615 | 0 | while ((bindex < blen) && (b[bindex] == '0')) bindex++; |
616 | 0 | azeroes = aindex - azeroes; |
617 | 0 | bzeroes = bindex - bzeroes; |
618 | | |
619 | | // Count digit lengths |
620 | 0 | int astart = aindex; |
621 | 0 | int bstart = bindex; |
622 | 0 | while ((aindex < alen) && isdigit(a[aindex])) aindex++; |
623 | 0 | while ((bindex < blen) && isdigit(b[bindex])) bindex++; |
624 | 0 | if (aindex - astart < bindex - bstart) { |
625 | | // a has shorter run of digits: so smaller |
626 | 0 | return -1; |
627 | 0 | } else if (aindex - astart > bindex - bstart) { |
628 | | // a has longer run of digits: so larger |
629 | 0 | return 1; |
630 | 0 | } else { |
631 | | // Same lengths, so compare digit by digit |
632 | 0 | for (int i = 0; i < aindex - astart; i++) { |
633 | 0 | if (a[astart + i] < b[bstart + i]) { |
634 | 0 | return -1; |
635 | 0 | } else if (a[astart + i] > b[bstart + i]) { |
636 | 0 | return 1; |
637 | 0 | } |
638 | 0 | } |
639 | | // Equal: did one have more leading zeroes? |
640 | 0 | if (strict && azeroes != bzeroes) { |
641 | 0 | if (azeroes > bzeroes) { |
642 | | // a has more leading zeroes: a < b |
643 | 0 | return -1; |
644 | 0 | } else { |
645 | | // b has more leading zeroes: a > b |
646 | 0 | return 1; |
647 | 0 | } |
648 | 0 | } |
649 | | // Equal: so continue scanning |
650 | 0 | } |
651 | 0 | } else if (a[aindex] < b[bindex]) { |
652 | 0 | return -1; |
653 | 0 | } else if (a[aindex] > b[bindex]) { |
654 | 0 | return 1; |
655 | 0 | } else { |
656 | 0 | aindex++; |
657 | 0 | bindex++; |
658 | 0 | } |
659 | 0 | } |
660 | | |
661 | 0 | if (aindex < alen) { |
662 | | // b is prefix of a |
663 | 0 | return 1; |
664 | 0 | } else if (bindex < blen) { |
665 | | // a is prefix of b |
666 | 0 | return -1; |
667 | 0 | } else { |
668 | | // a is equal to b |
669 | 0 | return 0; |
670 | 0 | } |
671 | 0 | } |
672 | | |
673 | 0 | bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) { |
674 | 0 | return AutoDigitStrCmp(a, alen, b, blen, false) < 0; |
675 | 0 | } |
676 | | |
677 | 0 | bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) { |
678 | 0 | return AutoDigitStrCmp(a, alen, b, blen, true) < 0; |
679 | 0 | } |
680 | | |
681 | | // ---------------------------------------------------------------------- |
682 | | // SimpleDtoa() |
683 | | // SimpleFtoa() |
684 | | // DoubleToBuffer() |
685 | | // FloatToBuffer() |
686 | | // We want to print the value without losing precision, but we also do |
687 | | // not want to print more digits than necessary. This turns out to be |
688 | | // trickier than it sounds. Numbers like 0.2 cannot be represented |
689 | | // exactly in binary. If we print 0.2 with a very large precision, |
690 | | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
691 | | // On the other hand, if we set the precision too low, we lose |
692 | | // significant digits when printing numbers that actually need them. |
693 | | // It turns out there is no precision value that does the right thing |
694 | | // for all numbers. |
695 | | // |
696 | | // Our strategy is to first try printing with a precision that is never |
697 | | // over-precise, then parse the result with strtod() to see if it |
698 | | // matches. If not, we print again with a precision that will always |
699 | | // give a precise result, but may use more digits than necessary. |
700 | | // |
701 | | // An arguably better strategy would be to use the algorithm described |
702 | | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
703 | | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
704 | | // however, that the following implementation is about as fast as |
705 | | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
706 | | // will not scale well on multi-core machines. DMG's code is slightly |
707 | | // more accurate (in that it will never use more digits than |
708 | | // necessary), but this is probably irrelevant for most users. |
709 | | // |
710 | | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
711 | | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
712 | | // one in that it makes guesses and then uses strtod() to check them. |
713 | | // Their implementation is faster because they use their own code to |
714 | | // generate the digits in the first place rather than use snprintf(), |
715 | | // thus avoiding format string parsing overhead. However, this makes |
716 | | // it considerably more complicated than the following implementation, |
717 | | // and it is embedded in a larger library. If speed turns out to be |
718 | | // an issue, we could re-implement this in terms of their |
719 | | // implementation. |
720 | | // ---------------------------------------------------------------------- |
721 | | |
722 | 0 | string SimpleDtoa(double value) { |
723 | 0 | char buffer[kDoubleToBufferSize]; |
724 | 0 | return DoubleToBuffer(value, buffer); |
725 | 0 | } |
726 | | |
727 | 0 | string SimpleFtoa(float value) { |
728 | 0 | char buffer[kFloatToBufferSize]; |
729 | 0 | return FloatToBuffer(value, buffer); |
730 | 0 | } |
731 | | |
732 | 0 | char* DoubleToBuffer(double value, char* buffer) { |
733 | | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
734 | | // platforms these days. Just in case some system exists where DBL_DIG |
735 | | // is significantly larger -- and risks overflowing our buffer -- we have |
736 | | // this assert. |
737 | 0 | COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
738 | |
|
739 | 0 | int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value); |
740 | | |
741 | | // The snprintf should never overflow because the buffer is significantly |
742 | | // larger than the precision we asked for. |
743 | 0 | DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
744 | |
|
745 | 0 | if (strtod(buffer, nullptr) != value) { |
746 | 0 | snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value); |
747 | | |
748 | | // Should never overflow; see above. |
749 | 0 | DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
750 | 0 | } |
751 | 0 | return buffer; |
752 | 0 | } |
753 | | |
754 | 0 | char* FloatToBuffer(float value, char* buffer) { |
755 | | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
756 | | // platforms these days. Just in case some system exists where FLT_DIG |
757 | | // is significantly larger -- and risks overflowing our buffer -- we have |
758 | | // this assert. |
759 | 0 | COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
760 | |
|
761 | 0 | int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value); |
762 | | |
763 | | // The snprintf should never overflow because the buffer is significantly |
764 | | // larger than the precision we asked for. |
765 | 0 | DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
766 | |
|
767 | 0 | float parsed_value; |
768 | 0 | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
769 | 0 | snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value); |
770 | | |
771 | | // Should never overflow; see above. |
772 | 0 | DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
773 | 0 | } |
774 | 0 | return buffer; |
775 | 0 | } |
776 | | |
777 | 11 | int DoubleToBuffer(double value, int width, char* buffer) { |
778 | | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
779 | | // platforms these days. Just in case some system exists where DBL_DIG |
780 | | // is significantly larger -- and risks overflowing our buffer -- we have |
781 | | // this assert. |
782 | 11 | COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big); |
783 | | |
784 | 11 | int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value); |
785 | | |
786 | | // The snprintf should never overflow because the buffer is significantly |
787 | | // larger than the precision we asked for. |
788 | 11 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
789 | | |
790 | 11 | if (strtod(buffer, nullptr) != value) { |
791 | 3 | snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value); |
792 | | |
793 | | // Should never overflow; see above. |
794 | 3 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
795 | 3 | } |
796 | | |
797 | 11 | return snprintf_result; |
798 | 11 | } |
799 | | |
800 | 18.0M | int FloatToBuffer(float value, int width, char* buffer) { |
801 | | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
802 | | // platforms these days. Just in case some system exists where FLT_DIG |
803 | | // is significantly larger -- and risks overflowing our buffer -- we have |
804 | | // this assert. |
805 | 18.0M | COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big); |
806 | | |
807 | 18.0M | int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value); |
808 | | |
809 | | // The snprintf should never overflow because the buffer is significantly |
810 | | // larger than the precision we asked for. |
811 | 18.0M | DCHECK(snprintf_result > 0 && snprintf_result < width); |
812 | | |
813 | 18.0M | float parsed_value; |
814 | 18.0M | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
815 | 10 | snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value); |
816 | | |
817 | | // Should never overflow; see above. |
818 | 10 | DCHECK(snprintf_result > 0 && snprintf_result < width); |
819 | 10 | } |
820 | | |
821 | 18.0M | return snprintf_result; |
822 | 18.0M | } |
823 | | |
824 | 862 | int FastDoubleToBuffer(double value, char* buffer) { |
825 | 862 | auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
826 | 862 | *end = '\0'; |
827 | 862 | return end - buffer; |
828 | 862 | } |
829 | | |
830 | 761 | int FastFloatToBuffer(float value, char* buffer) { |
831 | 761 | auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value); |
832 | 761 | *end = '\0'; |
833 | 761 | return end - buffer; |
834 | 761 | } |
835 | | |
836 | | // ---------------------------------------------------------------------- |
837 | | // SimpleItoaWithCommas() |
838 | | // Description: converts an integer to a string. |
839 | | // Puts commas every 3 spaces. |
840 | | // Faster than printf("%d")? |
841 | | // |
842 | | // Return value: string |
843 | | // ---------------------------------------------------------------------- |
844 | 0 | string SimpleItoaWithCommas(int32 i) { |
845 | | // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints. |
846 | | // Longest is -2,147,483,648. |
847 | 0 | char local[14]; |
848 | 0 | char* p = local + sizeof(local); |
849 | | // Need to use uint32 instead of int32 to correctly handle |
850 | | // -2,147,483,648. |
851 | 0 | uint32 n = i; |
852 | 0 | if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow |
853 | 0 | *--p = '0' + n % 10; // this case deals with the number "0" |
854 | 0 | n /= 10; |
855 | 0 | while (n) { |
856 | 0 | *--p = '0' + n % 10; |
857 | 0 | n /= 10; |
858 | 0 | if (n == 0) break; |
859 | | |
860 | 0 | *--p = '0' + n % 10; |
861 | 0 | n /= 10; |
862 | 0 | if (n == 0) break; |
863 | | |
864 | 0 | *--p = ','; |
865 | 0 | *--p = '0' + n % 10; |
866 | 0 | n /= 10; |
867 | | // For this unrolling, we check if n == 0 in the main while loop |
868 | 0 | } |
869 | 0 | if (i < 0) *--p = '-'; |
870 | 0 | return string(p, local + sizeof(local)); |
871 | 0 | } |
872 | | |
873 | | // We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't |
874 | | // compile. |
875 | 0 | string SimpleItoaWithCommas(uint32 i) { |
876 | | // 10 digits and 3 commas are good for 32-bit or smaller ints. |
877 | | // Longest is 4,294,967,295. |
878 | 0 | char local[13]; |
879 | 0 | char* p = local + sizeof(local); |
880 | 0 | *--p = '0' + i % 10; // this case deals with the number "0" |
881 | 0 | i /= 10; |
882 | 0 | while (i) { |
883 | 0 | *--p = '0' + i % 10; |
884 | 0 | i /= 10; |
885 | 0 | if (i == 0) break; |
886 | | |
887 | 0 | *--p = '0' + i % 10; |
888 | 0 | i /= 10; |
889 | 0 | if (i == 0) break; |
890 | | |
891 | 0 | *--p = ','; |
892 | 0 | *--p = '0' + i % 10; |
893 | 0 | i /= 10; |
894 | | // For this unrolling, we check if i == 0 in the main while loop |
895 | 0 | } |
896 | 0 | return string(p, local + sizeof(local)); |
897 | 0 | } |
898 | | |
899 | 0 | string SimpleItoaWithCommas(int64 i) { |
900 | | // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. |
901 | 0 | char local[26]; |
902 | 0 | char* p = SimpleItoaWithCommas(i, local, sizeof(local)); |
903 | 0 | return string(p, local + sizeof(local)); |
904 | 0 | } |
905 | | |
906 | | // We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't |
907 | | // compile. |
908 | 0 | string SimpleItoaWithCommas(uint64 i) { |
909 | | // 20 digits and 6 commas are good for 64-bit or smaller ints. |
910 | | // Longest is 18,446,744,073,709,551,615. |
911 | 0 | char local[26]; |
912 | 0 | char* p = local + sizeof(local); |
913 | 0 | *--p = '0' + i % 10; // this case deals with the number "0" |
914 | 0 | i /= 10; |
915 | 0 | while (i) { |
916 | 0 | *--p = '0' + i % 10; |
917 | 0 | i /= 10; |
918 | 0 | if (i == 0) break; |
919 | | |
920 | 0 | *--p = '0' + i % 10; |
921 | 0 | i /= 10; |
922 | 0 | if (i == 0) break; |
923 | | |
924 | 0 | *--p = ','; |
925 | 0 | *--p = '0' + i % 10; |
926 | 0 | i /= 10; |
927 | | // For this unrolling, we check if i == 0 in the main while loop |
928 | 0 | } |
929 | 0 | return string(p, local + sizeof(local)); |
930 | 0 | } |
931 | | |
932 | 27 | char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) { |
933 | | // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints. |
934 | 27 | char* p = buffer + buffer_size; |
935 | | // Need to use uint64 instead of int64 to correctly handle |
936 | | // -9,223,372,036,854,775,808. |
937 | 27 | uint64 n = i; |
938 | 27 | if (i < 0) n = 0 - n; |
939 | 27 | *--p = '0' + n % 10; // this case deals with the number "0" |
940 | 27 | n /= 10; |
941 | 68 | while (n) { |
942 | 63 | *--p = '0' + n % 10; |
943 | 63 | n /= 10; |
944 | 63 | if (n == 0) break; |
945 | | |
946 | 46 | *--p = '0' + n % 10; |
947 | 46 | n /= 10; |
948 | 46 | if (n == 0) break; |
949 | | |
950 | 41 | *--p = ','; |
951 | 41 | *--p = '0' + n % 10; |
952 | 41 | n /= 10; |
953 | | // For this unrolling, we check if n == 0 in the main while loop |
954 | 41 | } |
955 | 27 | if (i < 0) *--p = '-'; |
956 | 27 | return p; |
957 | 27 | } |
958 | | |
959 | 29 | char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) { |
960 | | // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints. |
961 | 29 | char* p = buffer + buffer_size; |
962 | | // Need to use uint128 instead of int128 to correctly handle |
963 | | // -170,141,183,460,469,231,731,687,303,715,884,105,728. |
964 | 29 | __uint128_t n = i; |
965 | 29 | if (i < 0) n = 0 - n; |
966 | 29 | *--p = '0' + n % 10; // this case deals with the number "0" |
967 | 29 | n /= 10; |
968 | 74 | while (n) { |
969 | 64 | *--p = '0' + n % 10; |
970 | 64 | n /= 10; |
971 | 64 | if (n == 0) break; |
972 | | |
973 | 56 | *--p = '0' + n % 10; |
974 | 56 | n /= 10; |
975 | 56 | if (n == 0) break; |
976 | | |
977 | 45 | *--p = ','; |
978 | 45 | *--p = '0' + n % 10; |
979 | 45 | n /= 10; |
980 | | // For this unrolling, we check if n == 0 in the main while loop |
981 | 45 | } |
982 | 29 | if (i < 0) *--p = '-'; |
983 | 29 | return p; |
984 | 29 | } |
985 | | |
986 | | // ---------------------------------------------------------------------- |
987 | | // ItoaKMGT() |
988 | | // Description: converts an integer to a string |
989 | | // Truncates values to a readable unit: K, G, M or T |
990 | | // Opposite of atoi_kmgt() |
991 | | // e.g. 100 -> "100" 1500 -> "1500" 4000 -> "3K" 57185920 -> "45M" |
992 | | // |
993 | | // Return value: string |
994 | | // ---------------------------------------------------------------------- |
995 | 0 | string ItoaKMGT(int64 i) { |
996 | 0 | const char *sign = "", *suffix = ""; |
997 | 0 | if (i < 0) { |
998 | | // We lose some accuracy if the caller passes LONG_LONG_MIN, but |
999 | | // that's OK as this function is only for human readability |
1000 | 0 | if (i == numeric_limits<int64>::min()) i++; |
1001 | 0 | sign = "-"; |
1002 | 0 | i = -i; |
1003 | 0 | } |
1004 | |
|
1005 | 0 | int64 val; |
1006 | |
|
1007 | 0 | if ((val = (i >> 40)) > 1) { |
1008 | 0 | suffix = "T"; |
1009 | 0 | } else if ((val = (i >> 30)) > 1) { |
1010 | 0 | suffix = "G"; |
1011 | 0 | } else if ((val = (i >> 20)) > 1) { |
1012 | 0 | suffix = "M"; |
1013 | 0 | } else if ((val = (i >> 10)) > 1) { |
1014 | 0 | suffix = "K"; |
1015 | 0 | } else { |
1016 | 0 | val = i; |
1017 | 0 | } |
1018 | |
|
1019 | 0 | return StringPrintf("%s%" PRId64 "%s", sign, val, suffix); |
1020 | 0 | } |
1021 | | |
1022 | 0 | string AccurateItoaKMGT(int64 i) { |
1023 | 0 | const char* sign = ""; |
1024 | 0 | if (i < 0) { |
1025 | | // We lose some accuracy if the caller passes LONG_LONG_MIN, but |
1026 | | // that's OK as this function is only for human readability |
1027 | 0 | if (i == numeric_limits<int64>::min()) i++; |
1028 | 0 | sign = "-"; |
1029 | 0 | i = -i; |
1030 | 0 | } |
1031 | |
|
1032 | 0 | string ret = StringPrintf("%s", sign); |
1033 | 0 | int64 val; |
1034 | 0 | if ((val = (i >> 40)) > 1) { |
1035 | 0 | ret += StringPrintf("%" PRId64 |
1036 | 0 | "%s" |
1037 | 0 | ",", |
1038 | 0 | val, "T"); |
1039 | 0 | i = i - (val << 40); |
1040 | 0 | } |
1041 | 0 | if ((val = (i >> 30)) > 1) { |
1042 | 0 | ret += StringPrintf("%" PRId64 |
1043 | 0 | "%s" |
1044 | 0 | ",", |
1045 | 0 | val, "G"); |
1046 | 0 | i = i - (val << 30); |
1047 | 0 | } |
1048 | 0 | if ((val = (i >> 20)) > 1) { |
1049 | 0 | ret += StringPrintf("%" PRId64 |
1050 | 0 | "%s" |
1051 | 0 | ",", |
1052 | 0 | val, "M"); |
1053 | 0 | i = i - (val << 20); |
1054 | 0 | } |
1055 | 0 | if ((val = (i >> 10)) > 1) { |
1056 | 0 | ret += StringPrintf("%" PRId64 "%s", val, "K"); |
1057 | 0 | i = i - (val << 10); |
1058 | 0 | } else { |
1059 | 0 | ret += StringPrintf("%" PRId64 "%s", i, "K"); |
1060 | 0 | } |
1061 | |
|
1062 | 0 | return ret; |
1063 | 0 | } |
1064 | | |
1065 | | // DEPRECATED(wadetregaskis). |
1066 | | // These are non-inline because some BUILD files turn on -Wformat-non-literal. |
1067 | | |
1068 | 0 | string FloatToString(float f, const char* format) { |
1069 | 0 | return StringPrintf(format, f); |
1070 | 0 | } |
1071 | | |
1072 | 0 | string IntToString(int i, const char* format) { |
1073 | 0 | return StringPrintf(format, i); |
1074 | 0 | } |
1075 | | |
1076 | 0 | string Int64ToString(int64 i64, const char* format) { |
1077 | 0 | return StringPrintf(format, i64); |
1078 | 0 | } |
1079 | | |
1080 | 0 | string UInt64ToString(uint64 ui64, const char* format) { |
1081 | 0 | return StringPrintf(format, ui64); |
1082 | 0 | } |