Coverage Report

Created: 2025-04-28 20:20

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <assert.h>
10
#include <ctype.h>
11
#include <errno.h>
12
#include <float.h> // for DBL_DIG and FLT_DIG
13
#include <math.h>  // for HUGE_VAL
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <string.h>
17
#include <inttypes.h>
18
#include <sys/types.h>
19
#include <limits>
20
#include <ostream>
21
22
#include "common/exception.h"
23
24
using std::numeric_limits;
25
#include <string>
26
27
using std::string;
28
29
#include <fmt/compile.h>
30
#include <fmt/format.h>
31
32
#include "common/logging.h"
33
34
#include "gutil/integral_types.h"
35
#include "gutil/stringprintf.h"
36
#include "gutil/strings/ascii_ctype.h"
37
#include "gutil/strtoint.h"
38
39
// ----------------------------------------------------------------------
40
// ConsumeStrayLeadingZeroes
41
//    Eliminates all leading zeroes (unless the string itself is composed
42
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
43
// --------------------------------------------------------------------
44
45
0
void ConsumeStrayLeadingZeroes(string* const str) {
46
0
    const string::size_type len(str->size());
47
0
    if (len > 1 && (*str)[0] == '0') {
48
0
        const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1);
49
0
        while (ptr != end && *ptr == '0') {
50
0
            ++ptr;
51
0
        }
52
0
        string::size_type remove(ptr - begin);
53
0
        DCHECK_GT(ptr, begin);
54
0
        if (remove == len) {
55
0
            --remove; // if they are all zero, leave one...
56
0
        }
57
0
        str->erase(0, remove);
58
0
    }
59
0
}
60
61
// ----------------------------------------------------------------------
62
// ParseLeadingInt32Value()
63
// ParseLeadingUInt32Value()
64
//    A simple parser for [u]int32 values. Returns the parsed value
65
//    if a valid value is found; else returns deflt
66
//    This cannot handle decimal numbers with leading 0s.
67
// --------------------------------------------------------------------
68
69
0
int32 ParseLeadingInt32Value(const char* str, int32 deflt) {
70
0
    char* error = nullptr;
71
0
    long value = strtol(str, &error, 0);
72
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
73
0
    if (value > numeric_limits<int32>::max()) {
74
0
        value = numeric_limits<int32>::max();
75
0
    } else if (value < numeric_limits<int32>::min()) {
76
0
        value = numeric_limits<int32>::min();
77
0
    }
78
0
    return (error == str) ? deflt : value;
79
0
}
80
81
0
uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) {
82
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
83
        // When long is 32 bits, we can use strtoul.
84
0
        char* error = nullptr;
85
0
        const uint32 value = strtoul(str, &error, 0);
86
0
        return (error == str) ? deflt : value;
87
0
    } else {
88
        // When long is 64 bits, we must use strto64 and handle limits
89
        // by hand.  The reason we cannot use a 64-bit strtoul is that
90
        // it would be impossible to differentiate "-2" (that should wrap
91
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
92
        // (that should be pegged to UINT_MAX due to overflow).
93
0
        char* error = nullptr;
94
0
        int64 value = strto64(str, &error, 0);
95
0
        if (value > numeric_limits<uint32>::max() ||
96
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
97
0
            value = numeric_limits<uint32>::max();
98
0
        }
99
        // Within these limits, truncation to 32 bits handles negatives correctly.
100
0
        return (error == str) ? deflt : value;
101
0
    }
102
0
}
103
104
// ----------------------------------------------------------------------
105
// ParseLeadingDec32Value
106
// ParseLeadingUDec32Value
107
//    A simple parser for [u]int32 values. Returns the parsed value
108
//    if a valid value is found; else returns deflt
109
//    The string passed in is treated as *10 based*.
110
//    This can handle strings with leading 0s.
111
// --------------------------------------------------------------------
112
113
0
int32 ParseLeadingDec32Value(const char* str, int32 deflt) {
114
0
    char* error = nullptr;
115
0
    long value = strtol(str, &error, 10);
116
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
117
0
    if (value > numeric_limits<int32>::max()) {
118
0
        value = numeric_limits<int32>::max();
119
0
    } else if (value < numeric_limits<int32>::min()) {
120
0
        value = numeric_limits<int32>::min();
121
0
    }
122
0
    return (error == str) ? deflt : value;
123
0
}
124
125
0
uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) {
126
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
127
        // When long is 32 bits, we can use strtoul.
128
0
        char* error = nullptr;
129
0
        const uint32 value = strtoul(str, &error, 10);
130
0
        return (error == str) ? deflt : value;
131
0
    } else {
132
        // When long is 64 bits, we must use strto64 and handle limits
133
        // by hand.  The reason we cannot use a 64-bit strtoul is that
134
        // it would be impossible to differentiate "-2" (that should wrap
135
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
136
        // (that should be pegged to UINT_MAX due to overflow).
137
0
        char* error = nullptr;
138
0
        int64 value = strto64(str, &error, 10);
139
0
        if (value > numeric_limits<uint32>::max() ||
140
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
141
0
            value = numeric_limits<uint32>::max();
142
0
        }
143
        // Within these limits, truncation to 32 bits handles negatives correctly.
144
0
        return (error == str) ? deflt : value;
145
0
    }
146
0
}
147
148
// ----------------------------------------------------------------------
149
// ParseLeadingUInt64Value
150
// ParseLeadingInt64Value
151
// ParseLeadingHex64Value
152
//    A simple parser for 64-bit values. Returns the parsed value if a
153
//    valid integer is found; else returns deflt
154
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
155
// --------------------------------------------------------------------
156
0
uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) {
157
0
    char* error = nullptr;
158
0
    const uint64 value = strtou64(str, &error, 0);
159
0
    return (error == str) ? deflt : value;
160
0
}
161
162
0
int64 ParseLeadingInt64Value(const char* str, int64 deflt) {
163
0
    char* error = nullptr;
164
0
    const int64 value = strto64(str, &error, 0);
165
0
    return (error == str) ? deflt : value;
166
0
}
167
168
0
uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) {
169
0
    char* error = nullptr;
170
0
    const uint64 value = strtou64(str, &error, 16);
171
0
    return (error == str) ? deflt : value;
172
0
}
173
174
// ----------------------------------------------------------------------
175
// ParseLeadingDec64Value
176
// ParseLeadingUDec64Value
177
//    A simple parser for [u]int64 values. Returns the parsed value
178
//    if a valid value is found; else returns deflt
179
//    The string passed in is treated as *10 based*.
180
//    This can handle strings with leading 0s.
181
// --------------------------------------------------------------------
182
183
0
int64 ParseLeadingDec64Value(const char* str, int64 deflt) {
184
0
    char* error = nullptr;
185
0
    const int64 value = strto64(str, &error, 10);
186
0
    return (error == str) ? deflt : value;
187
0
}
188
189
0
uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) {
190
0
    char* error = nullptr;
191
0
    const uint64 value = strtou64(str, &error, 10);
192
0
    return (error == str) ? deflt : value;
193
0
}
194
195
// ----------------------------------------------------------------------
196
// ParseLeadingDoubleValue()
197
//    A simple parser for double values. Returns the parsed value
198
//    if a valid value is found; else returns deflt
199
// --------------------------------------------------------------------
200
201
0
double ParseLeadingDoubleValue(const char* str, double deflt) {
202
0
    char* error = nullptr;
203
0
    errno = 0;
204
0
    const double value = strtod(str, &error);
205
0
    if (errno != 0 ||   // overflow/underflow happened
206
0
        error == str) { // no valid parse
207
0
        return deflt;
208
0
    } else {
209
0
        return value;
210
0
    }
211
0
}
212
213
// ----------------------------------------------------------------------
214
// ParseLeadingBoolValue()
215
//    A recognizer of boolean string values. Returns the parsed value
216
//    if a valid value is found; else returns deflt.  This skips leading
217
//    whitespace, is case insensitive, and recognizes these forms:
218
//    0/1, false/true, no/yes, n/y
219
// --------------------------------------------------------------------
220
0
bool ParseLeadingBoolValue(const char* str, bool deflt) {
221
0
    static const int kMaxLen = 5;
222
0
    char value[kMaxLen + 1];
223
    // Skip whitespace
224
0
    while (ascii_isspace(*str)) {
225
0
        ++str;
226
0
    }
227
0
    int len = 0;
228
0
    for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str);
229
0
    if (len == 0 || len > kMaxLen) return deflt;
230
0
    value[len] = '\0';
231
0
    switch (len) {
232
0
    case 1:
233
0
        if (value[0] == '0' || value[0] == 'n') return false;
234
0
        if (value[0] == '1' || value[0] == 'y') return true;
235
0
        break;
236
0
    case 2:
237
0
        if (!strcmp(value, "no")) return false;
238
0
        break;
239
0
    case 3:
240
0
        if (!strcmp(value, "yes")) return true;
241
0
        break;
242
0
    case 4:
243
0
        if (!strcmp(value, "true")) return true;
244
0
        break;
245
0
    case 5:
246
0
        if (!strcmp(value, "false")) return false;
247
0
        break;
248
0
    }
249
0
    return deflt;
250
0
}
251
252
// ----------------------------------------------------------------------
253
// Uint64ToString()
254
// FloatToString()
255
// IntToString()
256
//    Convert various types to their string representation, possibly padded
257
//    with spaces, using snprintf format specifiers.
258
// ----------------------------------------------------------------------
259
260
0
string Uint64ToString(uint64 fp) {
261
0
    char buf[17];
262
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
263
0
    return string(buf);
264
0
}
265
namespace {
266
267
// Represents integer values of digits.
268
// Uses 36 to indicate an invalid character since we support
269
// bases up to 36.
270
static const int8 kAsciiToInt[256] = {
271
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
272
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
273
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  36, 36,
274
        36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
275
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16,
276
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36,
277
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
278
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
279
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
280
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
281
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
282
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
283
284
// Input format based on POSIX.1-2008 strtol
285
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
286
template <typename IntType>
287
33
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
288
    // Consume whitespace.
289
34
    while (start < end && ascii_isspace(start[0])) {
290
1
        ++start;
291
1
    }
292
33
    while (start < end && ascii_isspace(end[-1])) {
293
0
        --end;
294
0
    }
295
33
    if (start >= end) {
296
3
        return false;
297
3
    }
298
299
    // Consume sign.
300
30
    const bool negative = (start[0] == '-');
301
30
    if (negative || start[0] == '+') {
302
7
        ++start;
303
7
        if (start >= end) {
304
0
            return false;
305
0
        }
306
7
    }
307
308
    // Consume base-dependent prefix.
309
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
310
    //  base 16: "0x" -> base 16
311
    // Also validate the base.
312
30
    if (base == 0) {
313
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
314
0
            base = 16;
315
0
            start += 2;
316
0
        } else if (end - start >= 1 && start[0] == '0') {
317
0
            base = 8;
318
0
            start += 1;
319
0
        } else {
320
0
            base = 10;
321
0
        }
322
30
    } else if (base == 16) {
323
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
324
0
            start += 2;
325
0
        }
326
30
    } else if (base >= 2 && base <= 36) {
327
        // okay
328
30
    } else {
329
0
        return false;
330
0
    }
331
332
    // Consume digits.
333
    //
334
    // The classic loop:
335
    //
336
    //   for each digit
337
    //     value = value * base + digit
338
    //   value *= sign
339
    //
340
    // The classic loop needs overflow checking.  It also fails on the most
341
    // negative integer, -2147483648 in 32-bit two's complement representation.
342
    //
343
    // My improved loop:
344
    //
345
    //  if (!negative)
346
    //    for each digit
347
    //      value = value * base
348
    //      value = value + digit
349
    //  else
350
    //    for each digit
351
    //      value = value * base
352
    //      value = value - digit
353
    //
354
    // Overflow checking becomes simple.
355
    //
356
    // I present the positive code first for easier reading.
357
30
    IntType value = 0;
358
30
    if (!negative) {
359
23
        const IntType vmax = std::numeric_limits<IntType>::max();
360
23
        assert(vmax > 0);
361
0
        assert(vmax >= base);
362
0
        const IntType vmax_over_base = vmax / base;
363
        // loop over digits
364
        // loop body is interleaved for perf, not readability
365
148
        for (; start < end; ++start) {
366
134
            unsigned char c = static_cast<unsigned char>(start[0]);
367
134
            int digit = kAsciiToInt[c];
368
134
            if (value > vmax_over_base) return false;
369
134
            value *= base;
370
134
            if (digit >= base) return false;
371
128
            if (value > vmax - digit) return false;
372
125
            value += digit;
373
125
        }
374
23
    } else {
375
7
        const IntType vmin = std::numeric_limits<IntType>::min();
376
7
        assert(vmin < 0);
377
0
        assert(vmin <= 0 - base);
378
0
        IntType vmin_over_base = vmin / base;
379
        // 2003 c++ standard [expr.mul]
380
        // "... the sign of the remainder is implementation-defined."
381
        // Although (vmin/base)*base + vmin%base is always vmin.
382
        // 2011 c++ standard tightens the spec but we cannot rely on it.
383
7
        if (vmin % base > 0) {
384
0
            vmin_over_base += 1;
385
0
        }
386
        // loop over digits
387
        // loop body is interleaved for perf, not readability
388
53
        for (; start < end; ++start) {
389
46
            unsigned char c = static_cast<unsigned char>(start[0]);
390
46
            int digit = kAsciiToInt[c];
391
46
            if (value < vmin_over_base) return false;
392
46
            value *= base;
393
46
            if (digit >= base) return false;
394
46
            if (value < vmin + digit) return false;
395
46
            value -= digit;
396
46
        }
397
7
    }
398
399
    // Store output.
400
21
    *value_p = value;
401
21
    return true;
402
30
}
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Line
Count
Source
287
21
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
288
    // Consume whitespace.
289
21
    while (start < end && ascii_isspace(start[0])) {
290
0
        ++start;
291
0
    }
292
21
    while (start < end && ascii_isspace(end[-1])) {
293
0
        --end;
294
0
    }
295
21
    if (start >= end) {
296
1
        return false;
297
1
    }
298
299
    // Consume sign.
300
20
    const bool negative = (start[0] == '-');
301
20
    if (negative || start[0] == '+') {
302
5
        ++start;
303
5
        if (start >= end) {
304
0
            return false;
305
0
        }
306
5
    }
307
308
    // Consume base-dependent prefix.
309
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
310
    //  base 16: "0x" -> base 16
311
    // Also validate the base.
312
20
    if (base == 0) {
313
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
314
0
            base = 16;
315
0
            start += 2;
316
0
        } else if (end - start >= 1 && start[0] == '0') {
317
0
            base = 8;
318
0
            start += 1;
319
0
        } else {
320
0
            base = 10;
321
0
        }
322
20
    } else if (base == 16) {
323
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
324
0
            start += 2;
325
0
        }
326
20
    } else if (base >= 2 && base <= 36) {
327
        // okay
328
20
    } else {
329
0
        return false;
330
0
    }
331
332
    // Consume digits.
333
    //
334
    // The classic loop:
335
    //
336
    //   for each digit
337
    //     value = value * base + digit
338
    //   value *= sign
339
    //
340
    // The classic loop needs overflow checking.  It also fails on the most
341
    // negative integer, -2147483648 in 32-bit two's complement representation.
342
    //
343
    // My improved loop:
344
    //
345
    //  if (!negative)
346
    //    for each digit
347
    //      value = value * base
348
    //      value = value + digit
349
    //  else
350
    //    for each digit
351
    //      value = value * base
352
    //      value = value - digit
353
    //
354
    // Overflow checking becomes simple.
355
    //
356
    // I present the positive code first for easier reading.
357
20
    IntType value = 0;
358
20
    if (!negative) {
359
15
        const IntType vmax = std::numeric_limits<IntType>::max();
360
15
        assert(vmax > 0);
361
0
        assert(vmax >= base);
362
0
        const IntType vmax_over_base = vmax / base;
363
        // loop over digits
364
        // loop body is interleaved for perf, not readability
365
80
        for (; start < end; ++start) {
366
70
            unsigned char c = static_cast<unsigned char>(start[0]);
367
70
            int digit = kAsciiToInt[c];
368
70
            if (value > vmax_over_base) return false;
369
70
            value *= base;
370
70
            if (digit >= base) return false;
371
67
            if (value > vmax - digit) return false;
372
65
            value += digit;
373
65
        }
374
15
    } else {
375
5
        const IntType vmin = std::numeric_limits<IntType>::min();
376
5
        assert(vmin < 0);
377
0
        assert(vmin <= 0 - base);
378
0
        IntType vmin_over_base = vmin / base;
379
        // 2003 c++ standard [expr.mul]
380
        // "... the sign of the remainder is implementation-defined."
381
        // Although (vmin/base)*base + vmin%base is always vmin.
382
        // 2011 c++ standard tightens the spec but we cannot rely on it.
383
5
        if (vmin % base > 0) {
384
0
            vmin_over_base += 1;
385
0
        }
386
        // loop over digits
387
        // loop body is interleaved for perf, not readability
388
31
        for (; start < end; ++start) {
389
26
            unsigned char c = static_cast<unsigned char>(start[0]);
390
26
            int digit = kAsciiToInt[c];
391
26
            if (value < vmin_over_base) return false;
392
26
            value *= base;
393
26
            if (digit >= base) return false;
394
26
            if (value < vmin + digit) return false;
395
26
            value -= digit;
396
26
        }
397
5
    }
398
399
    // Store output.
400
15
    *value_p = value;
401
15
    return true;
402
20
}
numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_
Line
Count
Source
287
12
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
288
    // Consume whitespace.
289
13
    while (start < end && ascii_isspace(start[0])) {
290
1
        ++start;
291
1
    }
292
12
    while (start < end && ascii_isspace(end[-1])) {
293
0
        --end;
294
0
    }
295
12
    if (start >= end) {
296
2
        return false;
297
2
    }
298
299
    // Consume sign.
300
10
    const bool negative = (start[0] == '-');
301
10
    if (negative || start[0] == '+') {
302
2
        ++start;
303
2
        if (start >= end) {
304
0
            return false;
305
0
        }
306
2
    }
307
308
    // Consume base-dependent prefix.
309
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
310
    //  base 16: "0x" -> base 16
311
    // Also validate the base.
312
10
    if (base == 0) {
313
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
314
0
            base = 16;
315
0
            start += 2;
316
0
        } else if (end - start >= 1 && start[0] == '0') {
317
0
            base = 8;
318
0
            start += 1;
319
0
        } else {
320
0
            base = 10;
321
0
        }
322
10
    } else if (base == 16) {
323
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
324
0
            start += 2;
325
0
        }
326
10
    } else if (base >= 2 && base <= 36) {
327
        // okay
328
10
    } else {
329
0
        return false;
330
0
    }
331
332
    // Consume digits.
333
    //
334
    // The classic loop:
335
    //
336
    //   for each digit
337
    //     value = value * base + digit
338
    //   value *= sign
339
    //
340
    // The classic loop needs overflow checking.  It also fails on the most
341
    // negative integer, -2147483648 in 32-bit two's complement representation.
342
    //
343
    // My improved loop:
344
    //
345
    //  if (!negative)
346
    //    for each digit
347
    //      value = value * base
348
    //      value = value + digit
349
    //  else
350
    //    for each digit
351
    //      value = value * base
352
    //      value = value - digit
353
    //
354
    // Overflow checking becomes simple.
355
    //
356
    // I present the positive code first for easier reading.
357
10
    IntType value = 0;
358
10
    if (!negative) {
359
8
        const IntType vmax = std::numeric_limits<IntType>::max();
360
8
        assert(vmax > 0);
361
0
        assert(vmax >= base);
362
0
        const IntType vmax_over_base = vmax / base;
363
        // loop over digits
364
        // loop body is interleaved for perf, not readability
365
68
        for (; start < end; ++start) {
366
64
            unsigned char c = static_cast<unsigned char>(start[0]);
367
64
            int digit = kAsciiToInt[c];
368
64
            if (value > vmax_over_base) return false;
369
64
            value *= base;
370
64
            if (digit >= base) return false;
371
61
            if (value > vmax - digit) return false;
372
60
            value += digit;
373
60
        }
374
8
    } else {
375
2
        const IntType vmin = std::numeric_limits<IntType>::min();
376
2
        assert(vmin < 0);
377
0
        assert(vmin <= 0 - base);
378
0
        IntType vmin_over_base = vmin / base;
379
        // 2003 c++ standard [expr.mul]
380
        // "... the sign of the remainder is implementation-defined."
381
        // Although (vmin/base)*base + vmin%base is always vmin.
382
        // 2011 c++ standard tightens the spec but we cannot rely on it.
383
2
        if (vmin % base > 0) {
384
0
            vmin_over_base += 1;
385
0
        }
386
        // loop over digits
387
        // loop body is interleaved for perf, not readability
388
22
        for (; start < end; ++start) {
389
20
            unsigned char c = static_cast<unsigned char>(start[0]);
390
20
            int digit = kAsciiToInt[c];
391
20
            if (value < vmin_over_base) return false;
392
20
            value *= base;
393
20
            if (digit >= base) return false;
394
20
            if (value < vmin + digit) return false;
395
20
            value -= digit;
396
20
        }
397
2
    }
398
399
    // Store output.
400
6
    *value_p = value;
401
6
    return true;
402
10
}
403
404
} // anonymous namespace
405
406
0
bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) {
407
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
408
0
}
409
410
0
bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) {
411
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
412
0
}
413
414
21
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
415
21
    return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
416
21
}
417
418
12
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
419
12
    return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
420
12
}
421
422
0
bool safe_strto32_base(const char* str, int32* value, int base) {
423
0
    char* endptr;
424
0
    errno = 0; // errno only gets set on errors
425
0
    *value = strto32(str, &endptr, base);
426
0
    if (endptr != str) {
427
0
        while (ascii_isspace(*endptr)) ++endptr;
428
0
    }
429
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
430
0
}
431
432
0
bool safe_strto64_base(const char* str, int64* value, int base) {
433
0
    char* endptr;
434
0
    errno = 0; // errno only gets set on errors
435
0
    *value = strto64(str, &endptr, base);
436
0
    if (endptr != str) {
437
0
        while (ascii_isspace(*endptr)) ++endptr;
438
0
    }
439
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
440
0
}
441
442
0
bool safe_strtou32_base(const char* str, uint32* value, int base) {
443
    // strtoul does not give any errors on negative numbers, so we have to
444
    // search the string for '-' manually.
445
0
    while (ascii_isspace(*str)) ++str;
446
0
    if (*str == '-') return false;
447
448
0
    char* endptr;
449
0
    errno = 0; // errno only gets set on errors
450
0
    *value = strtou32(str, &endptr, base);
451
0
    if (endptr != str) {
452
0
        while (ascii_isspace(*endptr)) ++endptr;
453
0
    }
454
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
455
0
}
456
457
0
bool safe_strtou64_base(const char* str, uint64* value, int base) {
458
    // strtou64 does not give any errors on negative numbers, so we have to
459
    // search the string for '-' manually.
460
0
    while (ascii_isspace(*str)) ++str;
461
0
    if (*str == '-') return false;
462
463
0
    char* endptr;
464
0
    errno = 0; // errno only gets set on errors
465
0
    *value = strtou64(str, &endptr, base);
466
0
    if (endptr != str) {
467
0
        while (ascii_isspace(*endptr)) ++endptr;
468
0
    }
469
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
470
0
}
471
472
// ----------------------------------------------------------------------
473
// u64tostr_base36()
474
//    Converts unsigned number to string representation in base-36.
475
// --------------------------------------------------------------------
476
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
477
0
    CHECK_GT(buf_size, 0);
478
0
    CHECK(buffer);
479
0
    static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
480
481
0
    buffer[buf_size - 1] = '\0';
482
0
    size_t result_size = 1;
483
484
0
    do {
485
0
        if (buf_size == result_size) { // Ran out of space.
486
0
            return 0;
487
0
        }
488
0
        int remainder = number % 36;
489
0
        number /= 36;
490
0
        buffer[buf_size - result_size - 1] = kAlphabet[remainder];
491
0
        result_size++;
492
0
    } while (number);
493
494
0
    memmove(buffer, buffer + buf_size - result_size, result_size);
495
496
0
    return result_size - 1;
497
0
}
498
499
// Generate functions that wrap safe_strtoXXX_base.
500
#define GEN_SAFE_STRTO(name, type)                                                  \
501
0
    bool name##_base(const string& str, type* value, int base) {                    \
502
0
        return name##_base(str.c_str(), value, base);                               \
503
0
    }                                                                               \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi
504
0
    bool name(const char* str, type* value) { return name##_base(str, value, 10); } \
Unexecuted instantiation: _Z12safe_strto32PKcPi
Unexecuted instantiation: _Z13safe_strtou32PKcPj
Unexecuted instantiation: _Z12safe_strto64PKcPl
Unexecuted instantiation: _Z13safe_strtou64PKcPm
505
0
    bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); }
Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj
Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl
Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm
506
GEN_SAFE_STRTO(safe_strto32, int32);
507
GEN_SAFE_STRTO(safe_strtou32, uint32);
508
GEN_SAFE_STRTO(safe_strto64, int64);
509
GEN_SAFE_STRTO(safe_strtou64, uint64);
510
#undef GEN_SAFE_STRTO
511
512
18.0M
bool safe_strtof(const char* str, float* value) {
513
18.0M
    char* endptr;
514
#ifdef _MSC_VER // has no strtof()
515
    *value = strtod(str, &endptr);
516
#else
517
18.0M
    *value = strtof(str, &endptr);
518
18.0M
#endif
519
18.0M
    if (endptr != str) {
520
18.0M
        while (ascii_isspace(*endptr)) ++endptr;
521
18.0M
    }
522
    // Ignore range errors from strtod/strtof.
523
    // The values it returns on underflow and
524
    // overflow are the right fallback in a
525
    // robust setting.
526
18.0M
    return *str != '\0' && *endptr == '\0';
527
18.0M
}
528
529
0
bool safe_strtod(const char* str, double* value) {
530
0
    char* endptr;
531
0
    *value = strtod(str, &endptr);
532
0
    if (endptr != str) {
533
0
        while (ascii_isspace(*endptr)) ++endptr;
534
0
    }
535
    // Ignore range errors from strtod.  The values it
536
    // returns on underflow and overflow are the right
537
    // fallback in a robust setting.
538
0
    return *str != '\0' && *endptr == '\0';
539
0
}
540
541
11
bool safe_strtof(const string& str, float* value) {
542
11
    return safe_strtof(str.c_str(), value);
543
11
}
544
545
0
bool safe_strtod(const string& str, double* value) {
546
0
    return safe_strtod(str.c_str(), value);
547
0
}
548
549
0
uint64 atoi_kmgt(const char* s) {
550
0
    char* endptr;
551
0
    uint64 n = strtou64(s, &endptr, 10);
552
0
    uint64 scale = 1;
553
0
    char c = *endptr;
554
0
    if (c != '\0') {
555
0
        c = ascii_toupper(c);
556
0
        switch (c) {
557
0
        case 'K':
558
0
            scale = GG_ULONGLONG(1) << 10;
559
0
            break;
560
0
        case 'M':
561
0
            scale = GG_ULONGLONG(1) << 20;
562
0
            break;
563
0
        case 'G':
564
0
            scale = GG_ULONGLONG(1) << 30;
565
0
            break;
566
0
        case 'T':
567
0
            scale = GG_ULONGLONG(1) << 40;
568
0
            break;
569
0
        default:
570
0
            throw doris::Exception(doris::Status::FatalError(
571
0
                    "Invalid mnemonic: `{}'; should be one of `K', `M', `G', and `T'.", c));
572
0
        }
573
0
    }
574
0
    return n * scale;
575
0
}
576
577
// ----------------------------------------------------------------------
578
// FastIntToBuffer()
579
// FastInt64ToBuffer()
580
// FastHexToBuffer()
581
// FastHex64ToBuffer()
582
// FastHex32ToBuffer()
583
// FastTimeToBuffer()
584
//    These are intended for speed.  FastHexToBuffer() assumes the
585
//    integer is non-negative.  FastHexToBuffer() puts output in
586
//    hex rather than decimal.  FastTimeToBuffer() puts the output
587
//    into RFC822 format.  If time is 0, uses the current time.
588
//
589
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
590
//    padded to exactly 16 bytes (plus one byte for '\0')
591
//
592
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
593
//    padded to exactly 8 bytes (plus one byte for '\0')
594
//
595
//       All functions take the output buffer as an arg.  FastInt()
596
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
597
//    They all return a pointer to the beginning of the output,
598
//    which may not be the beginning of the input buffer.  (Though
599
//    for FastTimeToBuffer(), we guarantee that it is.)
600
// ----------------------------------------------------------------------
601
602
0
char* FastInt64ToBuffer(int64 i, char* buffer) {
603
0
    FastInt64ToBufferLeft(i, buffer);
604
0
    return buffer;
605
0
}
606
607
0
char* FastInt32ToBuffer(int32 i, char* buffer) {
608
0
    FastInt32ToBufferLeft(i, buffer);
609
0
    return buffer;
610
0
}
611
612
0
char* FastHexToBuffer(int i, char* buffer) {
613
0
    CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
614
615
0
    static const char* hexdigits = "0123456789abcdef";
616
0
    char* p = buffer + 21;
617
0
    *p-- = '\0';
618
0
    do {
619
0
        *p-- = hexdigits[i & 15]; // mod by 16
620
0
        i >>= 4;                  // divide by 16
621
0
    } while (i > 0);
622
0
    return p + 1;
623
0
}
624
625
0
char* InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
626
0
    static const char* hexdigits = "0123456789abcdef";
627
0
    buffer[num_byte] = '\0';
628
0
    for (int i = num_byte - 1; i >= 0; i--) {
629
0
        buffer[i] = hexdigits[value & 0xf];
630
0
        value >>= 4;
631
0
    }
632
0
    return buffer;
633
0
}
634
635
0
char* FastHex64ToBuffer(uint64 value, char* buffer) {
636
0
    return InternalFastHexToBuffer(value, buffer, 16);
637
0
}
638
639
0
char* FastHex32ToBuffer(uint32 value, char* buffer) {
640
0
    return InternalFastHexToBuffer(value, buffer, 8);
641
0
}
642
643
// TODO(user): revisit the two_ASCII_digits optimization.
644
//
645
// Several converters use this table to reduce
646
// division and modulo operations.
647
extern const char two_ASCII_digits[100][2]; // from strutil.cc
648
649
// ----------------------------------------------------------------------
650
// FastInt32ToBufferLeft()
651
// FastUInt32ToBufferLeft()
652
// FastInt64ToBufferLeft()
653
// FastUInt64ToBufferLeft()
654
//
655
// Like the Fast*ToBuffer() functions above, these are intended for speed.
656
// Unlike the Fast*ToBuffer() functions, however, these functions write
657
// their output to the beginning of the buffer (hence the name, as the
658
// output is left-aligned).  The caller is responsible for ensuring that
659
// the buffer has enough space to hold the output.
660
//
661
// Returns a pointer to the end of the string (i.e. the null character
662
// terminating the string).
663
// ----------------------------------------------------------------------
664
665
0
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
666
0
    uint digits;
667
0
    const char* ASCII_digits = nullptr;
668
    // The idea of this implementation is to trim the number of divides to as few
669
    // as possible by using multiplication and subtraction rather than mod (%),
670
    // and by outputting two digits at a time rather than one.
671
    // The huge-number case is first, in the hopes that the compiler will output
672
    // that case in one branch-free block of code, and only output conditional
673
    // branches into it from below.
674
0
    if (u >= 1000000000) {      // >= 1,000,000,000
675
0
        digits = u / 100000000; // 100,000,000
676
0
        ASCII_digits = two_ASCII_digits[digits];
677
0
        buffer[0] = ASCII_digits[0];
678
0
        buffer[1] = ASCII_digits[1];
679
0
        buffer += 2;
680
0
    sublt100_000_000:
681
0
        u -= digits * 100000000; // 100,000,000
682
0
    lt100_000_000:
683
0
        digits = u / 1000000; // 1,000,000
684
0
        ASCII_digits = two_ASCII_digits[digits];
685
0
        buffer[0] = ASCII_digits[0];
686
0
        buffer[1] = ASCII_digits[1];
687
0
        buffer += 2;
688
0
    sublt1_000_000:
689
0
        u -= digits * 1000000; // 1,000,000
690
0
    lt1_000_000:
691
0
        digits = u / 10000; // 10,000
692
0
        ASCII_digits = two_ASCII_digits[digits];
693
0
        buffer[0] = ASCII_digits[0];
694
0
        buffer[1] = ASCII_digits[1];
695
0
        buffer += 2;
696
0
    sublt10_000:
697
0
        u -= digits * 10000; // 10,000
698
0
    lt10_000:
699
0
        digits = u / 100;
700
0
        ASCII_digits = two_ASCII_digits[digits];
701
0
        buffer[0] = ASCII_digits[0];
702
0
        buffer[1] = ASCII_digits[1];
703
0
        buffer += 2;
704
0
    sublt100:
705
0
        u -= digits * 100;
706
0
    lt100:
707
0
        digits = u;
708
0
        ASCII_digits = two_ASCII_digits[digits];
709
0
        buffer[0] = ASCII_digits[0];
710
0
        buffer[1] = ASCII_digits[1];
711
0
        buffer += 2;
712
0
    done:
713
0
        *buffer = 0;
714
0
        return buffer;
715
0
    }
716
717
0
    if (u < 100) {
718
0
        digits = u;
719
0
        if (u >= 10) goto lt100;
720
0
        *buffer++ = '0' + digits;
721
0
        goto done;
722
0
    }
723
0
    if (u < 10000) { // 10,000
724
0
        if (u >= 1000) goto lt10_000;
725
0
        digits = u / 100;
726
0
        *buffer++ = '0' + digits;
727
0
        goto sublt100;
728
0
    }
729
0
    if (u < 1000000) { // 1,000,000
730
0
        if (u >= 100000) goto lt1_000_000;
731
0
        digits = u / 10000; //    10,000
732
0
        *buffer++ = '0' + digits;
733
0
        goto sublt10_000;
734
0
    }
735
0
    if (u < 100000000) { // 100,000,000
736
0
        if (u >= 10000000) goto lt100_000_000;
737
0
        digits = u / 1000000; //   1,000,000
738
0
        *buffer++ = '0' + digits;
739
0
        goto sublt1_000_000;
740
0
    }
741
    // we already know that u < 1,000,000,000
742
0
    digits = u / 100000000; // 100,000,000
743
0
    *buffer++ = '0' + digits;
744
0
    goto sublt100_000_000;
745
0
}
746
747
0
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
748
0
    uint32 u = i;
749
0
    if (i < 0) {
750
0
        *buffer++ = '-';
751
        // We need to do the negation in modular (i.e., "unsigned")
752
        // arithmetic; MSVC++ apprently warns for plain "-u", so
753
        // we write the equivalent expression "0 - u" instead.
754
0
        u = 0 - u;
755
0
    }
756
0
    return FastUInt32ToBufferLeft(u, buffer);
757
0
}
758
759
0
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
760
0
    uint digits;
761
0
    const char* ASCII_digits = nullptr;
762
763
0
    uint32 u = static_cast<uint32>(u64);
764
0
    if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
765
766
0
    uint64 top_11_digits = u64 / 1000000000;
767
0
    buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
768
0
    u = u64 - (top_11_digits * 1000000000);
769
770
0
    digits = u / 10000000; // 10,000,000
771
0
    DCHECK_LT(digits, 100);
772
0
    ASCII_digits = two_ASCII_digits[digits];
773
0
    buffer[0] = ASCII_digits[0];
774
0
    buffer[1] = ASCII_digits[1];
775
0
    buffer += 2;
776
0
    u -= digits * 10000000; // 10,000,000
777
0
    digits = u / 100000;    // 100,000
778
0
    ASCII_digits = two_ASCII_digits[digits];
779
0
    buffer[0] = ASCII_digits[0];
780
0
    buffer[1] = ASCII_digits[1];
781
0
    buffer += 2;
782
0
    u -= digits * 100000; // 100,000
783
0
    digits = u / 1000;    // 1,000
784
0
    ASCII_digits = two_ASCII_digits[digits];
785
0
    buffer[0] = ASCII_digits[0];
786
0
    buffer[1] = ASCII_digits[1];
787
0
    buffer += 2;
788
0
    u -= digits * 1000; // 1,000
789
0
    digits = u / 10;
790
0
    ASCII_digits = two_ASCII_digits[digits];
791
0
    buffer[0] = ASCII_digits[0];
792
0
    buffer[1] = ASCII_digits[1];
793
0
    buffer += 2;
794
0
    u -= digits * 10;
795
0
    digits = u;
796
0
    *buffer++ = '0' + digits;
797
0
    *buffer = 0;
798
0
    return buffer;
799
0
}
800
801
0
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
802
0
    uint64 u = i;
803
0
    if (i < 0) {
804
0
        *buffer++ = '-';
805
0
        u = 0 - u;
806
0
    }
807
0
    return FastUInt64ToBufferLeft(u, buffer);
808
0
}
809
810
0
int HexDigitsPrefix(const char* buf, int num_digits) {
811
0
    for (int i = 0; i < num_digits; i++)
812
0
        if (!ascii_isxdigit(buf[i]))
813
0
            return 0; // This also detects end of string as '\0' is not xdigit.
814
0
    return 1;
815
0
}
816
817
// ----------------------------------------------------------------------
818
// AutoDigitStrCmp
819
// AutoDigitLessThan
820
// StrictAutoDigitLessThan
821
// autodigit_less
822
// autodigit_greater
823
// strict_autodigit_less
824
// strict_autodigit_greater
825
//    These are like less<string> and greater<string>, except when a
826
//    run of digits is encountered at corresponding points in the two
827
//    arguments.  Such digit strings are compared numerically instead
828
//    of lexicographically.  Therefore if you sort by
829
//    "autodigit_less", some machine names might get sorted as:
830
//        exaf1
831
//        exaf2
832
//        exaf10
833
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
834
//    set to true, or the strict version of the other functions),
835
//    strings that represent equal numbers will not be considered equal if
836
//    the string representations are not identical.  That is, "01" < "1" in
837
//    strict mode, but "01" == "1" otherwise.
838
// ----------------------------------------------------------------------
839
840
0
int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) {
841
0
    int aindex = 0;
842
0
    int bindex = 0;
843
0
    while ((aindex < alen) && (bindex < blen)) {
844
0
        if (isdigit(a[aindex]) && isdigit(b[bindex])) {
845
            // Compare runs of digits.  Instead of extracting numbers, we
846
            // just skip leading zeroes, and then get the run-lengths.  This
847
            // allows us to handle arbitrary precision numbers.  We remember
848
            // how many zeroes we found so that we can differentiate between
849
            // "1" and "01" in strict mode.
850
851
            // Skip leading zeroes, but remember how many we found
852
0
            int azeroes = aindex;
853
0
            int bzeroes = bindex;
854
0
            while ((aindex < alen) && (a[aindex] == '0')) aindex++;
855
0
            while ((bindex < blen) && (b[bindex] == '0')) bindex++;
856
0
            azeroes = aindex - azeroes;
857
0
            bzeroes = bindex - bzeroes;
858
859
            // Count digit lengths
860
0
            int astart = aindex;
861
0
            int bstart = bindex;
862
0
            while ((aindex < alen) && isdigit(a[aindex])) aindex++;
863
0
            while ((bindex < blen) && isdigit(b[bindex])) bindex++;
864
0
            if (aindex - astart < bindex - bstart) {
865
                // a has shorter run of digits: so smaller
866
0
                return -1;
867
0
            } else if (aindex - astart > bindex - bstart) {
868
                // a has longer run of digits: so larger
869
0
                return 1;
870
0
            } else {
871
                // Same lengths, so compare digit by digit
872
0
                for (int i = 0; i < aindex - astart; i++) {
873
0
                    if (a[astart + i] < b[bstart + i]) {
874
0
                        return -1;
875
0
                    } else if (a[astart + i] > b[bstart + i]) {
876
0
                        return 1;
877
0
                    }
878
0
                }
879
                // Equal: did one have more leading zeroes?
880
0
                if (strict && azeroes != bzeroes) {
881
0
                    if (azeroes > bzeroes) {
882
                        // a has more leading zeroes: a < b
883
0
                        return -1;
884
0
                    } else {
885
                        // b has more leading zeroes: a > b
886
0
                        return 1;
887
0
                    }
888
0
                }
889
                // Equal: so continue scanning
890
0
            }
891
0
        } else if (a[aindex] < b[bindex]) {
892
0
            return -1;
893
0
        } else if (a[aindex] > b[bindex]) {
894
0
            return 1;
895
0
        } else {
896
0
            aindex++;
897
0
            bindex++;
898
0
        }
899
0
    }
900
901
0
    if (aindex < alen) {
902
        // b is prefix of a
903
0
        return 1;
904
0
    } else if (bindex < blen) {
905
        // a is prefix of b
906
0
        return -1;
907
0
    } else {
908
        // a is equal to b
909
0
        return 0;
910
0
    }
911
0
}
912
913
0
bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
914
0
    return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
915
0
}
916
917
0
bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
918
0
    return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
919
0
}
920
921
// ----------------------------------------------------------------------
922
// SimpleDtoa()
923
// SimpleFtoa()
924
// DoubleToBuffer()
925
// FloatToBuffer()
926
//    We want to print the value without losing precision, but we also do
927
//    not want to print more digits than necessary.  This turns out to be
928
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
929
//    exactly in binary.  If we print 0.2 with a very large precision,
930
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
931
//    On the other hand, if we set the precision too low, we lose
932
//    significant digits when printing numbers that actually need them.
933
//    It turns out there is no precision value that does the right thing
934
//    for all numbers.
935
//
936
//    Our strategy is to first try printing with a precision that is never
937
//    over-precise, then parse the result with strtod() to see if it
938
//    matches.  If not, we print again with a precision that will always
939
//    give a precise result, but may use more digits than necessary.
940
//
941
//    An arguably better strategy would be to use the algorithm described
942
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
943
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
944
//    however, that the following implementation is about as fast as
945
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
946
//    will not scale well on multi-core machines.  DMG's code is slightly
947
//    more accurate (in that it will never use more digits than
948
//    necessary), but this is probably irrelevant for most users.
949
//
950
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
951
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
952
//    one in that it makes guesses and then uses strtod() to check them.
953
//    Their implementation is faster because they use their own code to
954
//    generate the digits in the first place rather than use snprintf(),
955
//    thus avoiding format string parsing overhead.  However, this makes
956
//    it considerably more complicated than the following implementation,
957
//    and it is embedded in a larger library.  If speed turns out to be
958
//    an issue, we could re-implement this in terms of their
959
//    implementation.
960
// ----------------------------------------------------------------------
961
962
0
string SimpleDtoa(double value) {
963
0
    char buffer[kDoubleToBufferSize];
964
0
    return DoubleToBuffer(value, buffer);
965
0
}
966
967
0
string SimpleFtoa(float value) {
968
0
    char buffer[kFloatToBufferSize];
969
0
    return FloatToBuffer(value, buffer);
970
0
}
971
972
0
char* DoubleToBuffer(double value, char* buffer) {
973
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
974
    // platforms these days.  Just in case some system exists where DBL_DIG
975
    // is significantly larger -- and risks overflowing our buffer -- we have
976
    // this assert.
977
0
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
978
979
0
    int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
980
981
    // The snprintf should never overflow because the buffer is significantly
982
    // larger than the precision we asked for.
983
0
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
984
985
0
    if (strtod(buffer, nullptr) != value) {
986
0
        snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value);
987
988
        // Should never overflow; see above.
989
0
        DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
990
0
    }
991
0
    return buffer;
992
0
}
993
994
0
char* FloatToBuffer(float value, char* buffer) {
995
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
996
    // platforms these days.  Just in case some system exists where FLT_DIG
997
    // is significantly larger -- and risks overflowing our buffer -- we have
998
    // this assert.
999
0
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1000
1001
0
    int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1002
1003
    // The snprintf should never overflow because the buffer is significantly
1004
    // larger than the precision we asked for.
1005
0
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1006
1007
0
    float parsed_value;
1008
0
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1009
0
        snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value);
1010
1011
        // Should never overflow; see above.
1012
0
        DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1013
0
    }
1014
0
    return buffer;
1015
0
}
1016
1017
11
int DoubleToBuffer(double value, int width, char* buffer) {
1018
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1019
    // platforms these days.  Just in case some system exists where DBL_DIG
1020
    // is significantly larger -- and risks overflowing our buffer -- we have
1021
    // this assert.
1022
11
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1023
1024
11
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
1025
1026
    // The snprintf should never overflow because the buffer is significantly
1027
    // larger than the precision we asked for.
1028
11
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1029
1030
11
    if (strtod(buffer, nullptr) != value) {
1031
3
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
1032
1033
        // Should never overflow; see above.
1034
3
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1035
3
    }
1036
1037
11
    return snprintf_result;
1038
11
}
1039
1040
18.0M
int FloatToBuffer(float value, int width, char* buffer) {
1041
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1042
    // platforms these days.  Just in case some system exists where FLT_DIG
1043
    // is significantly larger -- and risks overflowing our buffer -- we have
1044
    // this assert.
1045
18.0M
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1046
1047
18.0M
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
1048
1049
    // The snprintf should never overflow because the buffer is significantly
1050
    // larger than the precision we asked for.
1051
18.0M
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1052
1053
18.0M
    float parsed_value;
1054
18.0M
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1055
10
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
1056
1057
        // Should never overflow; see above.
1058
10
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1059
10
    }
1060
1061
18.0M
    return snprintf_result;
1062
18.0M
}
1063
1064
862
int FastDoubleToBuffer(double value, char* buffer) {
1065
862
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1066
862
    *end = '\0';
1067
862
    return end - buffer;
1068
862
}
1069
1070
761
int FastFloatToBuffer(float value, char* buffer) {
1071
761
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1072
761
    *end = '\0';
1073
761
    return end - buffer;
1074
761
}
1075
1076
// ----------------------------------------------------------------------
1077
// SimpleItoaWithCommas()
1078
//    Description: converts an integer to a string.
1079
//    Puts commas every 3 spaces.
1080
//    Faster than printf("%d")?
1081
//
1082
//    Return value: string
1083
// ----------------------------------------------------------------------
1084
0
string SimpleItoaWithCommas(int32 i) {
1085
    // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1086
    // Longest is -2,147,483,648.
1087
0
    char local[14];
1088
0
    char* p = local + sizeof(local);
1089
    // Need to use uint32 instead of int32 to correctly handle
1090
    // -2,147,483,648.
1091
0
    uint32 n = i;
1092
0
    if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow
1093
0
    *--p = '0' + n % 10;  // this case deals with the number "0"
1094
0
    n /= 10;
1095
0
    while (n) {
1096
0
        *--p = '0' + n % 10;
1097
0
        n /= 10;
1098
0
        if (n == 0) break;
1099
1100
0
        *--p = '0' + n % 10;
1101
0
        n /= 10;
1102
0
        if (n == 0) break;
1103
1104
0
        *--p = ',';
1105
0
        *--p = '0' + n % 10;
1106
0
        n /= 10;
1107
        // For this unrolling, we check if n == 0 in the main while loop
1108
0
    }
1109
0
    if (i < 0) *--p = '-';
1110
0
    return string(p, local + sizeof(local));
1111
0
}
1112
1113
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1114
// compile.
1115
0
string SimpleItoaWithCommas(uint32 i) {
1116
    // 10 digits and 3 commas are good for 32-bit or smaller ints.
1117
    // Longest is 4,294,967,295.
1118
0
    char local[13];
1119
0
    char* p = local + sizeof(local);
1120
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1121
0
    i /= 10;
1122
0
    while (i) {
1123
0
        *--p = '0' + i % 10;
1124
0
        i /= 10;
1125
0
        if (i == 0) break;
1126
1127
0
        *--p = '0' + i % 10;
1128
0
        i /= 10;
1129
0
        if (i == 0) break;
1130
1131
0
        *--p = ',';
1132
0
        *--p = '0' + i % 10;
1133
0
        i /= 10;
1134
        // For this unrolling, we check if i == 0 in the main while loop
1135
0
    }
1136
0
    return string(p, local + sizeof(local));
1137
0
}
1138
1139
0
string SimpleItoaWithCommas(int64 i) {
1140
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1141
0
    char local[26];
1142
0
    char* p = SimpleItoaWithCommas(i, local, sizeof(local));
1143
0
    return string(p, local + sizeof(local));
1144
0
}
1145
1146
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1147
// compile.
1148
0
string SimpleItoaWithCommas(uint64 i) {
1149
    // 20 digits and 6 commas are good for 64-bit or smaller ints.
1150
    // Longest is 18,446,744,073,709,551,615.
1151
0
    char local[26];
1152
0
    char* p = local + sizeof(local);
1153
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1154
0
    i /= 10;
1155
0
    while (i) {
1156
0
        *--p = '0' + i % 10;
1157
0
        i /= 10;
1158
0
        if (i == 0) break;
1159
1160
0
        *--p = '0' + i % 10;
1161
0
        i /= 10;
1162
0
        if (i == 0) break;
1163
1164
0
        *--p = ',';
1165
0
        *--p = '0' + i % 10;
1166
0
        i /= 10;
1167
        // For this unrolling, we check if i == 0 in the main while loop
1168
0
    }
1169
0
    return string(p, local + sizeof(local));
1170
0
}
1171
1172
27
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
1173
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1174
27
    char* p = buffer + buffer_size;
1175
    // Need to use uint64 instead of int64 to correctly handle
1176
    // -9,223,372,036,854,775,808.
1177
27
    uint64 n = i;
1178
27
    if (i < 0) n = 0 - n;
1179
27
    *--p = '0' + n % 10; // this case deals with the number "0"
1180
27
    n /= 10;
1181
68
    while (n) {
1182
63
        *--p = '0' + n % 10;
1183
63
        n /= 10;
1184
63
        if (n == 0) break;
1185
1186
46
        *--p = '0' + n % 10;
1187
46
        n /= 10;
1188
46
        if (n == 0) break;
1189
1190
41
        *--p = ',';
1191
41
        *--p = '0' + n % 10;
1192
41
        n /= 10;
1193
        // For this unrolling, we check if n == 0 in the main while loop
1194
41
    }
1195
27
    if (i < 0) *--p = '-';
1196
27
    return p;
1197
27
}
1198
1199
29
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
1200
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
1201
29
    char* p = buffer + buffer_size;
1202
    // Need to use uint128 instead of int128 to correctly handle
1203
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
1204
29
    __uint128_t n = i;
1205
29
    if (i < 0) n = 0 - n;
1206
29
    *--p = '0' + n % 10; // this case deals with the number "0"
1207
29
    n /= 10;
1208
74
    while (n) {
1209
64
        *--p = '0' + n % 10;
1210
64
        n /= 10;
1211
64
        if (n == 0) break;
1212
1213
56
        *--p = '0' + n % 10;
1214
56
        n /= 10;
1215
56
        if (n == 0) break;
1216
1217
45
        *--p = ',';
1218
45
        *--p = '0' + n % 10;
1219
45
        n /= 10;
1220
        // For this unrolling, we check if n == 0 in the main while loop
1221
45
    }
1222
29
    if (i < 0) *--p = '-';
1223
29
    return p;
1224
29
}
1225
1226
// ----------------------------------------------------------------------
1227
// ItoaKMGT()
1228
//    Description: converts an integer to a string
1229
//    Truncates values to a readable unit: K, G, M or T
1230
//    Opposite of atoi_kmgt()
1231
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1232
//
1233
//    Return value: string
1234
// ----------------------------------------------------------------------
1235
0
string ItoaKMGT(int64 i) {
1236
0
    const char *sign = "", *suffix = "";
1237
0
    if (i < 0) {
1238
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1239
        // that's OK as this function is only for human readability
1240
0
        if (i == numeric_limits<int64>::min()) i++;
1241
0
        sign = "-";
1242
0
        i = -i;
1243
0
    }
1244
1245
0
    int64 val;
1246
1247
0
    if ((val = (i >> 40)) > 1) {
1248
0
        suffix = "T";
1249
0
    } else if ((val = (i >> 30)) > 1) {
1250
0
        suffix = "G";
1251
0
    } else if ((val = (i >> 20)) > 1) {
1252
0
        suffix = "M";
1253
0
    } else if ((val = (i >> 10)) > 1) {
1254
0
        suffix = "K";
1255
0
    } else {
1256
0
        val = i;
1257
0
    }
1258
1259
0
    return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1260
0
}
1261
1262
0
string AccurateItoaKMGT(int64 i) {
1263
0
    const char* sign = "";
1264
0
    if (i < 0) {
1265
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1266
        // that's OK as this function is only for human readability
1267
0
        if (i == numeric_limits<int64>::min()) i++;
1268
0
        sign = "-";
1269
0
        i = -i;
1270
0
    }
1271
1272
0
    string ret = StringPrintf("%s", sign);
1273
0
    int64 val;
1274
0
    if ((val = (i >> 40)) > 1) {
1275
0
        ret += StringPrintf("%" PRId64
1276
0
                            "%s"
1277
0
                            ",",
1278
0
                            val, "T");
1279
0
        i = i - (val << 40);
1280
0
    }
1281
0
    if ((val = (i >> 30)) > 1) {
1282
0
        ret += StringPrintf("%" PRId64
1283
0
                            "%s"
1284
0
                            ",",
1285
0
                            val, "G");
1286
0
        i = i - (val << 30);
1287
0
    }
1288
0
    if ((val = (i >> 20)) > 1) {
1289
0
        ret += StringPrintf("%" PRId64
1290
0
                            "%s"
1291
0
                            ",",
1292
0
                            val, "M");
1293
0
        i = i - (val << 20);
1294
0
    }
1295
0
    if ((val = (i >> 10)) > 1) {
1296
0
        ret += StringPrintf("%" PRId64 "%s", val, "K");
1297
0
        i = i - (val << 10);
1298
0
    } else {
1299
0
        ret += StringPrintf("%" PRId64 "%s", i, "K");
1300
0
    }
1301
1302
0
    return ret;
1303
0
}
1304
1305
// DEPRECATED(wadetregaskis).
1306
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1307
1308
0
string FloatToString(float f, const char* format) {
1309
0
    return StringPrintf(format, f);
1310
0
}
1311
1312
0
string IntToString(int i, const char* format) {
1313
0
    return StringPrintf(format, i);
1314
0
}
1315
1316
0
string Int64ToString(int64 i64, const char* format) {
1317
0
    return StringPrintf(format, i64);
1318
0
}
1319
1320
0
string UInt64ToString(uint64 ui64, const char* format) {
1321
0
    return StringPrintf(format, ui64);
1322
0
}