Coverage Report

Created: 2025-04-29 17:56

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <assert.h>
10
#include <ctype.h>
11
#include <errno.h>
12
#include <float.h> // for DBL_DIG and FLT_DIG
13
#include <math.h>  // for HUGE_VAL
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <string.h>
17
#include <inttypes.h>
18
#include <sys/types.h>
19
#include <limits>
20
#include <ostream>
21
22
using std::numeric_limits;
23
#include <string>
24
25
using std::string;
26
27
#include <fmt/compile.h>
28
#include <fmt/format.h>
29
30
#include "common/logging.h"
31
32
#include "gutil/gscoped_ptr.h"
33
#include "gutil/int128.h"
34
#include "gutil/integral_types.h"
35
#include "gutil/stringprintf.h"
36
#include "gutil/strings/ascii_ctype.h"
37
#include "gutil/strtoint.h"
38
39
// Reads a <double> in *text, which may not be whitespace-initiated.
40
// *len is the length, or -1 if text is '\0'-terminated, which is more
41
// efficient.  Sets *text to the end of the double, and val to the
42
// converted value, and the length of the double is subtracted from
43
// *len. <double> may also be a '?', in which case val will be
44
// unchanged. Returns true upon success.  If initial_minus is
45
// non-NULL, then *initial_minus will indicate whether the first
46
// symbol seen was a '-', which will be ignored. Similarly, if
47
// final_period is non-NULL, then *final_period will indicate whether
48
// the last symbol seen was a '.', which will be ignored. This is
49
// useful in case that an initial '-' or final '.' would have another
50
// meaning (as a separator, e.g.).
51
static inline bool EatADouble(const char** text, int* len, bool allow_question, double* val,
52
0
                              bool* initial_minus, bool* final_period) {
53
0
    const char* pos = *text;
54
0
    int rem = *len; // remaining length, or -1 if null-terminated
55
56
0
    if (pos == nullptr || rem == 0) return false;
57
58
0
    if (allow_question && (*pos == '?')) {
59
0
        *text = pos + 1;
60
0
        if (rem != -1) *len = rem - 1;
61
0
        return true;
62
0
    }
63
64
0
    if (initial_minus) {
65
0
        if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment.
66
0
            if (rem == 1) return false;
67
0
            ++pos;
68
0
            if (rem != -1) --rem;
69
0
        }
70
0
    }
71
72
    // a double has to begin one of these (we don't allow 'inf' or whitespace)
73
    // this also serves as an optimization.
74
0
    if (!strchr("-+.0123456789", *pos)) return false;
75
76
    // strtod is evil in that the second param is a non-const char**
77
0
    char* end_nonconst;
78
0
    double retval;
79
0
    if (rem == -1) {
80
0
        retval = strtod(pos, &end_nonconst);
81
0
    } else {
82
        // not '\0'-terminated & no obvious terminator found. must copy.
83
0
        gscoped_array<char> buf(new char[rem + 1]);
84
0
        memcpy(buf.get(), pos, rem);
85
0
        buf[rem] = '\0';
86
0
        retval = strtod(buf.get(), &end_nonconst);
87
0
        end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
88
0
    }
89
90
0
    if (pos == end_nonconst) return false;
91
92
0
    if (final_period) {
93
0
        *final_period = (end_nonconst[-1] == '.');
94
0
        if (*final_period) {
95
0
            --end_nonconst;
96
0
        }
97
0
    }
98
99
0
    *text = end_nonconst;
100
0
    *val = retval;
101
0
    if (rem != -1) *len = rem - (end_nonconst - pos);
102
0
    return true;
103
0
}
104
105
// If update, consume one of acceptable_chars from string *text of
106
// length len and return that char, or '\0' otherwise. If len is -1,
107
// *text is null-terminated. If update is false, don't alter *text and
108
// *len. If null_ok, then update must be false, and, if text has no
109
// more chars, then return '\1' (arbitrary nonzero).
110
static inline char EatAChar(const char** text, int* len, const char* acceptable_chars, bool update,
111
0
                            bool null_ok) {
112
0
    assert(!(update && null_ok));
113
0
    if ((*len == 0) || (**text == '\0'))
114
0
        return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode.
115
116
0
    if (strchr(acceptable_chars, **text)) {
117
0
        char result = **text;
118
0
        if (update) {
119
0
            ++(*text);
120
0
            if (*len != -1) --(*len);
121
0
        }
122
0
        return result;
123
0
    }
124
125
0
    return '\0'; // no match; no update
126
0
}
127
128
// Parse an expression in 'text' of the form: <comparator><double> or
129
// <double><sep><double> See full comments in header file.
130
bool ParseDoubleRange(const char* text, int len, const char** end, double* from, double* to,
131
0
                      bool* is_currency, const DoubleRangeOptions& opts) {
132
0
    const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
133
134
0
    if (!opts.dont_modify_unbounded) {
135
0
        *from = -HUGE_VAL;
136
0
        *to = HUGE_VAL;
137
0
    }
138
0
    if (opts.allow_currency && (is_currency != nullptr)) *is_currency = false;
139
140
0
    assert(len >= -1);
141
0
    assert(opts.separators && (*opts.separators != '\0'));
142
    // these aren't valid separators
143
0
    assert(strlen(opts.separators) == strcspn(opts.separators, "+0123456789eE$"));
144
0
    assert(opts.num_required_bounds <= 2);
145
146
    // Handle easier cases of comparators (<, >) first
147
0
    if (opts.allow_comparators) {
148
0
        char comparator = EatAChar(&text, &len, "<>", true, false);
149
0
        if (comparator) {
150
0
            double* dest = (comparator == '>') ? from : to;
151
0
            EatAChar(&text, &len, "=", true, false);
152
0
            if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
153
0
                if (is_currency != nullptr) *is_currency = true;
154
0
            if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, nullptr))
155
0
                return false;
156
0
            *end = text;
157
0
            return EatAChar(&text, &len, opts.acceptable_terminators, false,
158
0
                            opts.null_terminator_ok);
159
0
        }
160
0
    }
161
162
0
    bool seen_dollar = (opts.allow_currency && EatAChar(&text, &len, "$", true, false));
163
164
    // If we see a '-', two things could be happening: -<to> or
165
    // <from>... where <from> is negative. Treat initial minus sign as a
166
    // separator if '-' is a valid separator.
167
    // Similarly, we prepare for the possibility of seeing a '.' at the
168
    // end of the number, in case '.' (which really means '..') is a
169
    // separator.
170
0
    bool initial_minus_sign = false;
171
0
    bool final_period = false;
172
0
    bool* check_initial_minus =
173
0
            (strchr(opts.separators, '-') && !seen_dollar && (opts.num_required_bounds < 2))
174
0
                    ? (&initial_minus_sign)
175
0
                    : nullptr;
176
0
    bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) : nullptr;
177
0
    bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, from,
178
0
                                  check_initial_minus, check_final_period);
179
180
    // if 2 bounds required, must see a double (or '?' if allowed)
181
0
    if ((opts.num_required_bounds == 2) && !double_seen) return false;
182
183
0
    if (seen_dollar && !double_seen) {
184
0
        --text;
185
0
        if (len != -1) ++len;
186
0
        seen_dollar = false;
187
0
    }
188
    // If we're here, we've read the first double and now expect a
189
    // separator and another <double>.
190
0
    char separator = EatAChar(&text, &len, opts.separators, true, false);
191
0
    if (separator == '.') {
192
        // seen one '.' as separator; must check for another; perhaps set seplen=2
193
0
        if (EatAChar(&text, &len, ".", true, false)) {
194
0
            if (final_period) {
195
                // We may have three periods in a row. The first is part of the
196
                // first number, the others are a separator. Policy: 234...567
197
                // is "234." to "567", not "234" to ".567".
198
0
                EatAChar(&text, &len, ".", true, false);
199
0
            }
200
0
        } else if (!EatAChar(&text, &len, opts.separators, true, false)) {
201
            // just one '.' and no other separator; uneat the first '.' we saw
202
0
            --text;
203
0
            if (len != -1) ++len;
204
0
            separator = '\0';
205
0
        }
206
0
    }
207
    // By now, we've consumed whatever separator there may have been,
208
    // and separator is true iff there was one.
209
0
    if (!separator) {
210
0
        if (final_period) // final period now considered part of first double
211
0
            EatAChar(&text, &len, ".", true, false);
212
0
        if (initial_minus_sign && double_seen) {
213
0
            *to = *from;
214
0
            *from = from_default;
215
0
        } else if (opts.require_separator || (opts.num_required_bounds > 0 && !double_seen) ||
216
0
                   (opts.num_required_bounds > 1)) {
217
0
            return false;
218
0
        }
219
0
    } else {
220
0
        if (initial_minus_sign && double_seen) *from = -(*from);
221
        // read second <double>
222
0
        bool second_dollar_seen = (seen_dollar || (opts.allow_currency && !double_seen)) &&
223
0
                                  EatAChar(&text, &len, "$", true, false);
224
0
        bool second_double_seen =
225
0
                EatADouble(&text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
226
0
        if (opts.num_required_bounds > double_seen + second_double_seen) return false;
227
0
        if (second_dollar_seen && !second_double_seen) {
228
0
            --text;
229
0
            if (len != -1) ++len;
230
0
            second_dollar_seen = false;
231
0
        }
232
0
        seen_dollar = seen_dollar || second_dollar_seen;
233
0
    }
234
235
0
    if (seen_dollar && (is_currency != nullptr)) *is_currency = true;
236
    // We're done. But we have to check that the next char is a proper
237
    // terminator.
238
0
    *end = text;
239
0
    char terminator =
240
0
            EatAChar(&text, &len, opts.acceptable_terminators, false, opts.null_terminator_ok);
241
0
    if (terminator == '.') --(*end);
242
0
    return terminator;
243
0
}
244
245
// ----------------------------------------------------------------------
246
// ConsumeStrayLeadingZeroes
247
//    Eliminates all leading zeroes (unless the string itself is composed
248
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
249
// --------------------------------------------------------------------
250
251
0
void ConsumeStrayLeadingZeroes(string* const str) {
252
0
    const string::size_type len(str->size());
253
0
    if (len > 1 && (*str)[0] == '0') {
254
0
        const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1);
255
0
        while (ptr != end && *ptr == '0') {
256
0
            ++ptr;
257
0
        }
258
0
        string::size_type remove(ptr - begin);
259
0
        DCHECK_GT(ptr, begin);
260
0
        if (remove == len) {
261
0
            --remove; // if they are all zero, leave one...
262
0
        }
263
0
        str->erase(0, remove);
264
0
    }
265
0
}
266
267
// ----------------------------------------------------------------------
268
// ParseLeadingInt32Value()
269
// ParseLeadingUInt32Value()
270
//    A simple parser for [u]int32 values. Returns the parsed value
271
//    if a valid value is found; else returns deflt
272
//    This cannot handle decimal numbers with leading 0s.
273
// --------------------------------------------------------------------
274
275
0
int32 ParseLeadingInt32Value(const char* str, int32 deflt) {
276
0
    char* error = nullptr;
277
0
    long value = strtol(str, &error, 0);
278
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
279
0
    if (value > numeric_limits<int32>::max()) {
280
0
        value = numeric_limits<int32>::max();
281
0
    } else if (value < numeric_limits<int32>::min()) {
282
0
        value = numeric_limits<int32>::min();
283
0
    }
284
0
    return (error == str) ? deflt : value;
285
0
}
286
287
0
uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) {
288
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
289
        // When long is 32 bits, we can use strtoul.
290
0
        char* error = nullptr;
291
0
        const uint32 value = strtoul(str, &error, 0);
292
0
        return (error == str) ? deflt : value;
293
0
    } else {
294
        // When long is 64 bits, we must use strto64 and handle limits
295
        // by hand.  The reason we cannot use a 64-bit strtoul is that
296
        // it would be impossible to differentiate "-2" (that should wrap
297
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
298
        // (that should be pegged to UINT_MAX due to overflow).
299
0
        char* error = nullptr;
300
0
        int64 value = strto64(str, &error, 0);
301
0
        if (value > numeric_limits<uint32>::max() ||
302
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
303
0
            value = numeric_limits<uint32>::max();
304
0
        }
305
        // Within these limits, truncation to 32 bits handles negatives correctly.
306
0
        return (error == str) ? deflt : value;
307
0
    }
308
0
}
309
310
// ----------------------------------------------------------------------
311
// ParseLeadingDec32Value
312
// ParseLeadingUDec32Value
313
//    A simple parser for [u]int32 values. Returns the parsed value
314
//    if a valid value is found; else returns deflt
315
//    The string passed in is treated as *10 based*.
316
//    This can handle strings with leading 0s.
317
// --------------------------------------------------------------------
318
319
0
int32 ParseLeadingDec32Value(const char* str, int32 deflt) {
320
0
    char* error = nullptr;
321
0
    long value = strtol(str, &error, 10);
322
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
323
0
    if (value > numeric_limits<int32>::max()) {
324
0
        value = numeric_limits<int32>::max();
325
0
    } else if (value < numeric_limits<int32>::min()) {
326
0
        value = numeric_limits<int32>::min();
327
0
    }
328
0
    return (error == str) ? deflt : value;
329
0
}
330
331
0
uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) {
332
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
333
        // When long is 32 bits, we can use strtoul.
334
0
        char* error = nullptr;
335
0
        const uint32 value = strtoul(str, &error, 10);
336
0
        return (error == str) ? deflt : value;
337
0
    } else {
338
        // When long is 64 bits, we must use strto64 and handle limits
339
        // by hand.  The reason we cannot use a 64-bit strtoul is that
340
        // it would be impossible to differentiate "-2" (that should wrap
341
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
342
        // (that should be pegged to UINT_MAX due to overflow).
343
0
        char* error = nullptr;
344
0
        int64 value = strto64(str, &error, 10);
345
0
        if (value > numeric_limits<uint32>::max() ||
346
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
347
0
            value = numeric_limits<uint32>::max();
348
0
        }
349
        // Within these limits, truncation to 32 bits handles negatives correctly.
350
0
        return (error == str) ? deflt : value;
351
0
    }
352
0
}
353
354
// ----------------------------------------------------------------------
355
// ParseLeadingUInt64Value
356
// ParseLeadingInt64Value
357
// ParseLeadingHex64Value
358
//    A simple parser for 64-bit values. Returns the parsed value if a
359
//    valid integer is found; else returns deflt
360
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
361
// --------------------------------------------------------------------
362
0
uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) {
363
0
    char* error = nullptr;
364
0
    const uint64 value = strtou64(str, &error, 0);
365
0
    return (error == str) ? deflt : value;
366
0
}
367
368
0
int64 ParseLeadingInt64Value(const char* str, int64 deflt) {
369
0
    char* error = nullptr;
370
0
    const int64 value = strto64(str, &error, 0);
371
0
    return (error == str) ? deflt : value;
372
0
}
373
374
0
uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) {
375
0
    char* error = nullptr;
376
0
    const uint64 value = strtou64(str, &error, 16);
377
0
    return (error == str) ? deflt : value;
378
0
}
379
380
// ----------------------------------------------------------------------
381
// ParseLeadingDec64Value
382
// ParseLeadingUDec64Value
383
//    A simple parser for [u]int64 values. Returns the parsed value
384
//    if a valid value is found; else returns deflt
385
//    The string passed in is treated as *10 based*.
386
//    This can handle strings with leading 0s.
387
// --------------------------------------------------------------------
388
389
0
int64 ParseLeadingDec64Value(const char* str, int64 deflt) {
390
0
    char* error = nullptr;
391
0
    const int64 value = strto64(str, &error, 10);
392
0
    return (error == str) ? deflt : value;
393
0
}
394
395
0
uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) {
396
0
    char* error = nullptr;
397
0
    const uint64 value = strtou64(str, &error, 10);
398
0
    return (error == str) ? deflt : value;
399
0
}
400
401
// ----------------------------------------------------------------------
402
// ParseLeadingDoubleValue()
403
//    A simple parser for double values. Returns the parsed value
404
//    if a valid value is found; else returns deflt
405
// --------------------------------------------------------------------
406
407
0
double ParseLeadingDoubleValue(const char* str, double deflt) {
408
0
    char* error = nullptr;
409
0
    errno = 0;
410
0
    const double value = strtod(str, &error);
411
0
    if (errno != 0 ||   // overflow/underflow happened
412
0
        error == str) { // no valid parse
413
0
        return deflt;
414
0
    } else {
415
0
        return value;
416
0
    }
417
0
}
418
419
// ----------------------------------------------------------------------
420
// ParseLeadingBoolValue()
421
//    A recognizer of boolean string values. Returns the parsed value
422
//    if a valid value is found; else returns deflt.  This skips leading
423
//    whitespace, is case insensitive, and recognizes these forms:
424
//    0/1, false/true, no/yes, n/y
425
// --------------------------------------------------------------------
426
0
bool ParseLeadingBoolValue(const char* str, bool deflt) {
427
0
    static const int kMaxLen = 5;
428
0
    char value[kMaxLen + 1];
429
    // Skip whitespace
430
0
    while (ascii_isspace(*str)) {
431
0
        ++str;
432
0
    }
433
0
    int len = 0;
434
0
    for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str);
435
0
    if (len == 0 || len > kMaxLen) return deflt;
436
0
    value[len] = '\0';
437
0
    switch (len) {
438
0
    case 1:
439
0
        if (value[0] == '0' || value[0] == 'n') return false;
440
0
        if (value[0] == '1' || value[0] == 'y') return true;
441
0
        break;
442
0
    case 2:
443
0
        if (!strcmp(value, "no")) return false;
444
0
        break;
445
0
    case 3:
446
0
        if (!strcmp(value, "yes")) return true;
447
0
        break;
448
0
    case 4:
449
0
        if (!strcmp(value, "true")) return true;
450
0
        break;
451
0
    case 5:
452
0
        if (!strcmp(value, "false")) return false;
453
0
        break;
454
0
    }
455
0
    return deflt;
456
0
}
457
458
// ----------------------------------------------------------------------
459
// Uint64ToString()
460
// FloatToString()
461
// IntToString()
462
//    Convert various types to their string representation, possibly padded
463
//    with spaces, using snprintf format specifiers.
464
// ----------------------------------------------------------------------
465
466
0
string Uint64ToString(uint64 fp) {
467
0
    char buf[17];
468
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
469
0
    return string(buf);
470
0
}
471
472
// Default arguments
473
0
string Uint128ToHexString(uint128 ui128) {
474
0
    char buf[33];
475
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, Uint128High64(ui128));
476
0
    snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64, Uint128Low64(ui128));
477
0
    return string(buf);
478
0
}
479
480
namespace {
481
482
// Represents integer values of digits.
483
// Uses 36 to indicate an invalid character since we support
484
// bases up to 36.
485
static const int8 kAsciiToInt[256] = {
486
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
487
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
488
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  36, 36,
489
        36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
490
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16,
491
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36,
492
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
493
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
494
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
495
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
496
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
497
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
498
499
// Input format based on POSIX.1-2008 strtol
500
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
501
template <typename IntType>
502
0
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
503
    // Consume whitespace.
504
0
    while (start < end && ascii_isspace(start[0])) {
505
0
        ++start;
506
0
    }
507
0
    while (start < end && ascii_isspace(end[-1])) {
508
0
        --end;
509
0
    }
510
0
    if (start >= end) {
511
0
        return false;
512
0
    }
513
514
    // Consume sign.
515
0
    const bool negative = (start[0] == '-');
516
0
    if (negative || start[0] == '+') {
517
0
        ++start;
518
0
        if (start >= end) {
519
0
            return false;
520
0
        }
521
0
    }
522
523
    // Consume base-dependent prefix.
524
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
525
    //  base 16: "0x" -> base 16
526
    // Also validate the base.
527
0
    if (base == 0) {
528
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
529
0
            base = 16;
530
0
            start += 2;
531
0
        } else if (end - start >= 1 && start[0] == '0') {
532
0
            base = 8;
533
0
            start += 1;
534
0
        } else {
535
0
            base = 10;
536
0
        }
537
0
    } else if (base == 16) {
538
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
539
0
            start += 2;
540
0
        }
541
0
    } else if (base >= 2 && base <= 36) {
542
        // okay
543
0
    } else {
544
0
        return false;
545
0
    }
546
547
    // Consume digits.
548
    //
549
    // The classic loop:
550
    //
551
    //   for each digit
552
    //     value = value * base + digit
553
    //   value *= sign
554
    //
555
    // The classic loop needs overflow checking.  It also fails on the most
556
    // negative integer, -2147483648 in 32-bit two's complement representation.
557
    //
558
    // My improved loop:
559
    //
560
    //  if (!negative)
561
    //    for each digit
562
    //      value = value * base
563
    //      value = value + digit
564
    //  else
565
    //    for each digit
566
    //      value = value * base
567
    //      value = value - digit
568
    //
569
    // Overflow checking becomes simple.
570
    //
571
    // I present the positive code first for easier reading.
572
0
    IntType value = 0;
573
0
    if (!negative) {
574
0
        const IntType vmax = std::numeric_limits<IntType>::max();
575
0
        assert(vmax > 0);
576
0
        assert(vmax >= base);
577
0
        const IntType vmax_over_base = vmax / base;
578
        // loop over digits
579
        // loop body is interleaved for perf, not readability
580
0
        for (; start < end; ++start) {
581
0
            unsigned char c = static_cast<unsigned char>(start[0]);
582
0
            int digit = kAsciiToInt[c];
583
0
            if (value > vmax_over_base) return false;
584
0
            value *= base;
585
0
            if (digit >= base) return false;
586
0
            if (value > vmax - digit) return false;
587
0
            value += digit;
588
0
        }
589
0
    } else {
590
0
        const IntType vmin = std::numeric_limits<IntType>::min();
591
0
        assert(vmin < 0);
592
0
        assert(vmin <= 0 - base);
593
0
        IntType vmin_over_base = vmin / base;
594
        // 2003 c++ standard [expr.mul]
595
        // "... the sign of the remainder is implementation-defined."
596
        // Although (vmin/base)*base + vmin%base is always vmin.
597
        // 2011 c++ standard tightens the spec but we cannot rely on it.
598
0
        if (vmin % base > 0) {
599
0
            vmin_over_base += 1;
600
0
        }
601
        // loop over digits
602
        // loop body is interleaved for perf, not readability
603
0
        for (; start < end; ++start) {
604
0
            unsigned char c = static_cast<unsigned char>(start[0]);
605
0
            int digit = kAsciiToInt[c];
606
0
            if (value < vmin_over_base) return false;
607
0
            value *= base;
608
0
            if (digit >= base) return false;
609
0
            if (value < vmin + digit) return false;
610
0
            value -= digit;
611
0
        }
612
0
    }
613
614
    // Store output.
615
0
    *value_p = value;
616
0
    return true;
617
0
}
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_
618
619
} // anonymous namespace
620
621
0
bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) {
622
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
623
0
}
624
625
0
bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) {
626
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
627
0
}
628
629
0
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
630
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
631
0
}
632
633
0
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
634
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
635
0
}
636
637
0
bool safe_strto32_base(const char* str, int32* value, int base) {
638
0
    char* endptr;
639
0
    errno = 0; // errno only gets set on errors
640
0
    *value = strto32(str, &endptr, base);
641
0
    if (endptr != str) {
642
0
        while (ascii_isspace(*endptr)) ++endptr;
643
0
    }
644
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
645
0
}
646
647
0
bool safe_strto64_base(const char* str, int64* value, int base) {
648
0
    char* endptr;
649
0
    errno = 0; // errno only gets set on errors
650
0
    *value = strto64(str, &endptr, base);
651
0
    if (endptr != str) {
652
0
        while (ascii_isspace(*endptr)) ++endptr;
653
0
    }
654
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
655
0
}
656
657
0
bool safe_strtou32_base(const char* str, uint32* value, int base) {
658
    // strtoul does not give any errors on negative numbers, so we have to
659
    // search the string for '-' manually.
660
0
    while (ascii_isspace(*str)) ++str;
661
0
    if (*str == '-') return false;
662
663
0
    char* endptr;
664
0
    errno = 0; // errno only gets set on errors
665
0
    *value = strtou32(str, &endptr, base);
666
0
    if (endptr != str) {
667
0
        while (ascii_isspace(*endptr)) ++endptr;
668
0
    }
669
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
670
0
}
671
672
0
bool safe_strtou64_base(const char* str, uint64* value, int base) {
673
    // strtou64 does not give any errors on negative numbers, so we have to
674
    // search the string for '-' manually.
675
0
    while (ascii_isspace(*str)) ++str;
676
0
    if (*str == '-') return false;
677
678
0
    char* endptr;
679
0
    errno = 0; // errno only gets set on errors
680
0
    *value = strtou64(str, &endptr, base);
681
0
    if (endptr != str) {
682
0
        while (ascii_isspace(*endptr)) ++endptr;
683
0
    }
684
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
685
0
}
686
687
// ----------------------------------------------------------------------
688
// u64tostr_base36()
689
//    Converts unsigned number to string representation in base-36.
690
// --------------------------------------------------------------------
691
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
692
0
    CHECK_GT(buf_size, 0);
693
0
    CHECK(buffer);
694
0
    static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
695
696
0
    buffer[buf_size - 1] = '\0';
697
0
    size_t result_size = 1;
698
699
0
    do {
700
0
        if (buf_size == result_size) { // Ran out of space.
701
0
            return 0;
702
0
        }
703
0
        int remainder = number % 36;
704
0
        number /= 36;
705
0
        buffer[buf_size - result_size - 1] = kAlphabet[remainder];
706
0
        result_size++;
707
0
    } while (number);
708
709
0
    memmove(buffer, buffer + buf_size - result_size, result_size);
710
711
0
    return result_size - 1;
712
0
}
713
714
// Generate functions that wrap safe_strtoXXX_base.
715
#define GEN_SAFE_STRTO(name, type)                                                  \
716
0
    bool name##_base(const string& str, type* value, int base) {                    \
717
0
        return name##_base(str.c_str(), value, base);                               \
718
0
    }                                                                               \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi
719
0
    bool name(const char* str, type* value) { return name##_base(str, value, 10); } \
Unexecuted instantiation: _Z12safe_strto32PKcPi
Unexecuted instantiation: _Z13safe_strtou32PKcPj
Unexecuted instantiation: _Z12safe_strto64PKcPl
Unexecuted instantiation: _Z13safe_strtou64PKcPm
720
0
    bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); }
Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj
Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl
Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm
721
GEN_SAFE_STRTO(safe_strto32, int32);
722
GEN_SAFE_STRTO(safe_strtou32, uint32);
723
GEN_SAFE_STRTO(safe_strto64, int64);
724
GEN_SAFE_STRTO(safe_strtou64, uint64);
725
#undef GEN_SAFE_STRTO
726
727
42
bool safe_strtof(const char* str, float* value) {
728
42
    char* endptr;
729
#ifdef _MSC_VER // has no strtof()
730
    *value = strtod(str, &endptr);
731
#else
732
42
    *value = strtof(str, &endptr);
733
42
#endif
734
42
    if (endptr != str) {
735
42
        while (ascii_isspace(*endptr)) ++endptr;
736
42
    }
737
    // Ignore range errors from strtod/strtof.
738
    // The values it returns on underflow and
739
    // overflow are the right fallback in a
740
    // robust setting.
741
42
    return *str != '\0' && *endptr == '\0';
742
42
}
743
744
0
bool safe_strtod(const char* str, double* value) {
745
0
    char* endptr;
746
0
    *value = strtod(str, &endptr);
747
0
    if (endptr != str) {
748
0
        while (ascii_isspace(*endptr)) ++endptr;
749
0
    }
750
    // Ignore range errors from strtod.  The values it
751
    // returns on underflow and overflow are the right
752
    // fallback in a robust setting.
753
0
    return *str != '\0' && *endptr == '\0';
754
0
}
755
756
0
bool safe_strtof(const string& str, float* value) {
757
0
    return safe_strtof(str.c_str(), value);
758
0
}
759
760
0
bool safe_strtod(const string& str, double* value) {
761
0
    return safe_strtod(str.c_str(), value);
762
0
}
763
764
0
uint64 atoi_kmgt(const char* s) {
765
0
    char* endptr;
766
0
    uint64 n = strtou64(s, &endptr, 10);
767
0
    uint64 scale = 1;
768
0
    char c = *endptr;
769
0
    if (c != '\0') {
770
0
        c = ascii_toupper(c);
771
0
        switch (c) {
772
0
        case 'K':
773
0
            scale = GG_ULONGLONG(1) << 10;
774
0
            break;
775
0
        case 'M':
776
0
            scale = GG_ULONGLONG(1) << 20;
777
0
            break;
778
0
        case 'G':
779
0
            scale = GG_ULONGLONG(1) << 30;
780
0
            break;
781
0
        case 'T':
782
0
            scale = GG_ULONGLONG(1) << 40;
783
0
            break;
784
0
        default:
785
0
            LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
786
0
                       << " should be one of `K', `M', `G', and `T'.";
787
0
        }
788
0
    }
789
0
    return n * scale;
790
0
}
791
792
// ----------------------------------------------------------------------
793
// FastIntToBuffer()
794
// FastInt64ToBuffer()
795
// FastHexToBuffer()
796
// FastHex64ToBuffer()
797
// FastHex32ToBuffer()
798
// FastTimeToBuffer()
799
//    These are intended for speed.  FastHexToBuffer() assumes the
800
//    integer is non-negative.  FastHexToBuffer() puts output in
801
//    hex rather than decimal.  FastTimeToBuffer() puts the output
802
//    into RFC822 format.  If time is 0, uses the current time.
803
//
804
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
805
//    padded to exactly 16 bytes (plus one byte for '\0')
806
//
807
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
808
//    padded to exactly 8 bytes (plus one byte for '\0')
809
//
810
//       All functions take the output buffer as an arg.  FastInt()
811
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
812
//    They all return a pointer to the beginning of the output,
813
//    which may not be the beginning of the input buffer.  (Though
814
//    for FastTimeToBuffer(), we guarantee that it is.)
815
// ----------------------------------------------------------------------
816
817
0
char* FastInt64ToBuffer(int64 i, char* buffer) {
818
0
    FastInt64ToBufferLeft(i, buffer);
819
0
    return buffer;
820
0
}
821
822
0
char* FastInt32ToBuffer(int32 i, char* buffer) {
823
0
    FastInt32ToBufferLeft(i, buffer);
824
0
    return buffer;
825
0
}
826
827
0
char* FastHexToBuffer(int i, char* buffer) {
828
0
    CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
829
830
0
    static const char* hexdigits = "0123456789abcdef";
831
0
    char* p = buffer + 21;
832
0
    *p-- = '\0';
833
0
    do {
834
0
        *p-- = hexdigits[i & 15]; // mod by 16
835
0
        i >>= 4;                  // divide by 16
836
0
    } while (i > 0);
837
0
    return p + 1;
838
0
}
839
840
0
char* InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
841
0
    static const char* hexdigits = "0123456789abcdef";
842
0
    buffer[num_byte] = '\0';
843
0
    for (int i = num_byte - 1; i >= 0; i--) {
844
0
        buffer[i] = hexdigits[value & 0xf];
845
0
        value >>= 4;
846
0
    }
847
0
    return buffer;
848
0
}
849
850
0
char* FastHex64ToBuffer(uint64 value, char* buffer) {
851
0
    return InternalFastHexToBuffer(value, buffer, 16);
852
0
}
853
854
0
char* FastHex32ToBuffer(uint32 value, char* buffer) {
855
0
    return InternalFastHexToBuffer(value, buffer, 8);
856
0
}
857
858
// TODO(user): revisit the two_ASCII_digits optimization.
859
//
860
// Several converters use this table to reduce
861
// division and modulo operations.
862
extern const char two_ASCII_digits[100][2]; // from strutil.cc
863
864
// ----------------------------------------------------------------------
865
// FastInt32ToBufferLeft()
866
// FastUInt32ToBufferLeft()
867
// FastInt64ToBufferLeft()
868
// FastUInt64ToBufferLeft()
869
//
870
// Like the Fast*ToBuffer() functions above, these are intended for speed.
871
// Unlike the Fast*ToBuffer() functions, however, these functions write
872
// their output to the beginning of the buffer (hence the name, as the
873
// output is left-aligned).  The caller is responsible for ensuring that
874
// the buffer has enough space to hold the output.
875
//
876
// Returns a pointer to the end of the string (i.e. the null character
877
// terminating the string).
878
// ----------------------------------------------------------------------
879
880
346k
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
881
346k
    uint digits;
882
346k
    const char* ASCII_digits = nullptr;
883
    // The idea of this implementation is to trim the number of divides to as few
884
    // as possible by using multiplication and subtraction rather than mod (%),
885
    // and by outputting two digits at a time rather than one.
886
    // The huge-number case is first, in the hopes that the compiler will output
887
    // that case in one branch-free block of code, and only output conditional
888
    // branches into it from below.
889
346k
    if (u >= 1000000000) {      // >= 1,000,000,000
890
0
        digits = u / 100000000; // 100,000,000
891
0
        ASCII_digits = two_ASCII_digits[digits];
892
0
        buffer[0] = ASCII_digits[0];
893
0
        buffer[1] = ASCII_digits[1];
894
0
        buffer += 2;
895
0
    sublt100_000_000:
896
0
        u -= digits * 100000000; // 100,000,000
897
0
    lt100_000_000:
898
0
        digits = u / 1000000; // 1,000,000
899
0
        ASCII_digits = two_ASCII_digits[digits];
900
0
        buffer[0] = ASCII_digits[0];
901
0
        buffer[1] = ASCII_digits[1];
902
0
        buffer += 2;
903
0
    sublt1_000_000:
904
0
        u -= digits * 1000000; // 1,000,000
905
0
    lt1_000_000:
906
0
        digits = u / 10000; // 10,000
907
0
        ASCII_digits = two_ASCII_digits[digits];
908
0
        buffer[0] = ASCII_digits[0];
909
0
        buffer[1] = ASCII_digits[1];
910
0
        buffer += 2;
911
4.08k
    sublt10_000:
912
4.08k
        u -= digits * 10000; // 10,000
913
4.21k
    lt10_000:
914
4.21k
        digits = u / 100;
915
4.21k
        ASCII_digits = two_ASCII_digits[digits];
916
4.21k
        buffer[0] = ASCII_digits[0];
917
4.21k
        buffer[1] = ASCII_digits[1];
918
4.21k
        buffer += 2;
919
16.6k
    sublt100:
920
16.6k
        u -= digits * 100;
921
324k
    lt100:
922
324k
        digits = u;
923
324k
        ASCII_digits = two_ASCII_digits[digits];
924
324k
        buffer[0] = ASCII_digits[0];
925
324k
        buffer[1] = ASCII_digits[1];
926
324k
        buffer += 2;
927
346k
    done:
928
346k
        *buffer = 0;
929
346k
        return buffer;
930
324k
    }
931
932
346k
    if (u < 100) {
933
329k
        digits = u;
934
329k
        if (u >= 10) goto lt100;
935
21.6k
        *buffer++ = '0' + digits;
936
21.6k
        goto done;
937
329k
    }
938
16.6k
    if (u < 10000) { // 10,000
939
12.5k
        if (u >= 1000) goto lt10_000;
940
12.4k
        digits = u / 100;
941
12.4k
        *buffer++ = '0' + digits;
942
12.4k
        goto sublt100;
943
12.5k
    }
944
4.08k
    if (u < 1000000) { // 1,000,000
945
4.08k
        if (u >= 100000) goto lt1_000_000;
946
4.08k
        digits = u / 10000; //    10,000
947
4.08k
        *buffer++ = '0' + digits;
948
4.08k
        goto sublt10_000;
949
4.08k
    }
950
1
    if (u < 100000000) { // 100,000,000
951
0
        if (u >= 10000000) goto lt100_000_000;
952
0
        digits = u / 1000000; //   1,000,000
953
0
        *buffer++ = '0' + digits;
954
0
        goto sublt1_000_000;
955
0
    }
956
    // we already know that u < 1,000,000,000
957
1
    digits = u / 100000000; // 100,000,000
958
1
    *buffer++ = '0' + digits;
959
1
    goto sublt100_000_000;
960
1
}
961
962
342k
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
963
342k
    uint32 u = i;
964
342k
    if (i < 0) {
965
0
        *buffer++ = '-';
966
        // We need to do the negation in modular (i.e., "unsigned")
967
        // arithmetic; MSVC++ apprently warns for plain "-u", so
968
        // we write the equivalent expression "0 - u" instead.
969
0
        u = 0 - u;
970
0
    }
971
342k
    return FastUInt32ToBufferLeft(u, buffer);
972
342k
}
973
974
4.22k
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
975
4.22k
    uint digits;
976
4.22k
    const char* ASCII_digits = nullptr;
977
978
4.22k
    uint32 u = static_cast<uint32>(u64);
979
4.22k
    if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
980
981
1
    uint64 top_11_digits = u64 / 1000000000;
982
1
    buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
983
1
    u = u64 - (top_11_digits * 1000000000);
984
985
1
    digits = u / 10000000; // 10,000,000
986
1
    DCHECK_LT(digits, 100);
987
1
    ASCII_digits = two_ASCII_digits[digits];
988
1
    buffer[0] = ASCII_digits[0];
989
1
    buffer[1] = ASCII_digits[1];
990
1
    buffer += 2;
991
1
    u -= digits * 10000000; // 10,000,000
992
1
    digits = u / 100000;    // 100,000
993
1
    ASCII_digits = two_ASCII_digits[digits];
994
1
    buffer[0] = ASCII_digits[0];
995
1
    buffer[1] = ASCII_digits[1];
996
1
    buffer += 2;
997
1
    u -= digits * 100000; // 100,000
998
1
    digits = u / 1000;    // 1,000
999
1
    ASCII_digits = two_ASCII_digits[digits];
1000
1
    buffer[0] = ASCII_digits[0];
1001
1
    buffer[1] = ASCII_digits[1];
1002
1
    buffer += 2;
1003
1
    u -= digits * 1000; // 1,000
1004
1
    digits = u / 10;
1005
1
    ASCII_digits = two_ASCII_digits[digits];
1006
1
    buffer[0] = ASCII_digits[0];
1007
1
    buffer[1] = ASCII_digits[1];
1008
1
    buffer += 2;
1009
1
    u -= digits * 10;
1010
1
    digits = u;
1011
1
    *buffer++ = '0' + digits;
1012
1
    *buffer = 0;
1013
1
    return buffer;
1014
4.22k
}
1015
1016
4.22k
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1017
4.22k
    uint64 u = i;
1018
4.22k
    if (i < 0) {
1019
0
        *buffer++ = '-';
1020
0
        u = 0 - u;
1021
0
    }
1022
4.22k
    return FastUInt64ToBufferLeft(u, buffer);
1023
4.22k
}
1024
1025
0
int HexDigitsPrefix(const char* buf, int num_digits) {
1026
0
    for (int i = 0; i < num_digits; i++)
1027
0
        if (!ascii_isxdigit(buf[i]))
1028
0
            return 0; // This also detects end of string as '\0' is not xdigit.
1029
0
    return 1;
1030
0
}
1031
1032
// ----------------------------------------------------------------------
1033
// AutoDigitStrCmp
1034
// AutoDigitLessThan
1035
// StrictAutoDigitLessThan
1036
// autodigit_less
1037
// autodigit_greater
1038
// strict_autodigit_less
1039
// strict_autodigit_greater
1040
//    These are like less<string> and greater<string>, except when a
1041
//    run of digits is encountered at corresponding points in the two
1042
//    arguments.  Such digit strings are compared numerically instead
1043
//    of lexicographically.  Therefore if you sort by
1044
//    "autodigit_less", some machine names might get sorted as:
1045
//        exaf1
1046
//        exaf2
1047
//        exaf10
1048
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
1049
//    set to true, or the strict version of the other functions),
1050
//    strings that represent equal numbers will not be considered equal if
1051
//    the string representations are not identical.  That is, "01" < "1" in
1052
//    strict mode, but "01" == "1" otherwise.
1053
// ----------------------------------------------------------------------
1054
1055
0
int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) {
1056
0
    int aindex = 0;
1057
0
    int bindex = 0;
1058
0
    while ((aindex < alen) && (bindex < blen)) {
1059
0
        if (isdigit(a[aindex]) && isdigit(b[bindex])) {
1060
            // Compare runs of digits.  Instead of extracting numbers, we
1061
            // just skip leading zeroes, and then get the run-lengths.  This
1062
            // allows us to handle arbitrary precision numbers.  We remember
1063
            // how many zeroes we found so that we can differentiate between
1064
            // "1" and "01" in strict mode.
1065
1066
            // Skip leading zeroes, but remember how many we found
1067
0
            int azeroes = aindex;
1068
0
            int bzeroes = bindex;
1069
0
            while ((aindex < alen) && (a[aindex] == '0')) aindex++;
1070
0
            while ((bindex < blen) && (b[bindex] == '0')) bindex++;
1071
0
            azeroes = aindex - azeroes;
1072
0
            bzeroes = bindex - bzeroes;
1073
1074
            // Count digit lengths
1075
0
            int astart = aindex;
1076
0
            int bstart = bindex;
1077
0
            while ((aindex < alen) && isdigit(a[aindex])) aindex++;
1078
0
            while ((bindex < blen) && isdigit(b[bindex])) bindex++;
1079
0
            if (aindex - astart < bindex - bstart) {
1080
                // a has shorter run of digits: so smaller
1081
0
                return -1;
1082
0
            } else if (aindex - astart > bindex - bstart) {
1083
                // a has longer run of digits: so larger
1084
0
                return 1;
1085
0
            } else {
1086
                // Same lengths, so compare digit by digit
1087
0
                for (int i = 0; i < aindex - astart; i++) {
1088
0
                    if (a[astart + i] < b[bstart + i]) {
1089
0
                        return -1;
1090
0
                    } else if (a[astart + i] > b[bstart + i]) {
1091
0
                        return 1;
1092
0
                    }
1093
0
                }
1094
                // Equal: did one have more leading zeroes?
1095
0
                if (strict && azeroes != bzeroes) {
1096
0
                    if (azeroes > bzeroes) {
1097
                        // a has more leading zeroes: a < b
1098
0
                        return -1;
1099
0
                    } else {
1100
                        // b has more leading zeroes: a > b
1101
0
                        return 1;
1102
0
                    }
1103
0
                }
1104
                // Equal: so continue scanning
1105
0
            }
1106
0
        } else if (a[aindex] < b[bindex]) {
1107
0
            return -1;
1108
0
        } else if (a[aindex] > b[bindex]) {
1109
0
            return 1;
1110
0
        } else {
1111
0
            aindex++;
1112
0
            bindex++;
1113
0
        }
1114
0
    }
1115
1116
0
    if (aindex < alen) {
1117
        // b is prefix of a
1118
0
        return 1;
1119
0
    } else if (bindex < blen) {
1120
        // a is prefix of b
1121
0
        return -1;
1122
0
    } else {
1123
        // a is equal to b
1124
0
        return 0;
1125
0
    }
1126
0
}
1127
1128
0
bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1129
0
    return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
1130
0
}
1131
1132
0
bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1133
0
    return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
1134
0
}
1135
1136
// ----------------------------------------------------------------------
1137
// SimpleDtoa()
1138
// SimpleFtoa()
1139
// DoubleToBuffer()
1140
// FloatToBuffer()
1141
//    We want to print the value without losing precision, but we also do
1142
//    not want to print more digits than necessary.  This turns out to be
1143
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
1144
//    exactly in binary.  If we print 0.2 with a very large precision,
1145
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1146
//    On the other hand, if we set the precision too low, we lose
1147
//    significant digits when printing numbers that actually need them.
1148
//    It turns out there is no precision value that does the right thing
1149
//    for all numbers.
1150
//
1151
//    Our strategy is to first try printing with a precision that is never
1152
//    over-precise, then parse the result with strtod() to see if it
1153
//    matches.  If not, we print again with a precision that will always
1154
//    give a precise result, but may use more digits than necessary.
1155
//
1156
//    An arguably better strategy would be to use the algorithm described
1157
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
1158
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
1159
//    however, that the following implementation is about as fast as
1160
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
1161
//    will not scale well on multi-core machines.  DMG's code is slightly
1162
//    more accurate (in that it will never use more digits than
1163
//    necessary), but this is probably irrelevant for most users.
1164
//
1165
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
1166
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
1167
//    one in that it makes guesses and then uses strtod() to check them.
1168
//    Their implementation is faster because they use their own code to
1169
//    generate the digits in the first place rather than use snprintf(),
1170
//    thus avoiding format string parsing overhead.  However, this makes
1171
//    it considerably more complicated than the following implementation,
1172
//    and it is embedded in a larger library.  If speed turns out to be
1173
//    an issue, we could re-implement this in terms of their
1174
//    implementation.
1175
// ----------------------------------------------------------------------
1176
1177
0
string SimpleDtoa(double value) {
1178
0
    char buffer[kDoubleToBufferSize];
1179
0
    return DoubleToBuffer(value, buffer);
1180
0
}
1181
1182
0
string SimpleFtoa(float value) {
1183
0
    char buffer[kFloatToBufferSize];
1184
0
    return FloatToBuffer(value, buffer);
1185
0
}
1186
1187
0
char* DoubleToBuffer(double value, char* buffer) {
1188
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1189
    // platforms these days.  Just in case some system exists where DBL_DIG
1190
    // is significantly larger -- and risks overflowing our buffer -- we have
1191
    // this assert.
1192
0
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1193
1194
0
    int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1195
1196
    // The snprintf should never overflow because the buffer is significantly
1197
    // larger than the precision we asked for.
1198
0
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1199
1200
0
    if (strtod(buffer, nullptr) != value) {
1201
0
        snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value);
1202
1203
        // Should never overflow; see above.
1204
0
        DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1205
0
    }
1206
0
    return buffer;
1207
0
}
1208
1209
0
char* FloatToBuffer(float value, char* buffer) {
1210
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1211
    // platforms these days.  Just in case some system exists where FLT_DIG
1212
    // is significantly larger -- and risks overflowing our buffer -- we have
1213
    // this assert.
1214
0
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1215
1216
0
    int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1217
1218
    // The snprintf should never overflow because the buffer is significantly
1219
    // larger than the precision we asked for.
1220
0
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1221
1222
0
    float parsed_value;
1223
0
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1224
0
        snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value);
1225
1226
        // Should never overflow; see above.
1227
0
        DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1228
0
    }
1229
0
    return buffer;
1230
0
}
1231
1232
11
int DoubleToBuffer(double value, int width, char* buffer) {
1233
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1234
    // platforms these days.  Just in case some system exists where DBL_DIG
1235
    // is significantly larger -- and risks overflowing our buffer -- we have
1236
    // this assert.
1237
11
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1238
1239
11
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
1240
1241
    // The snprintf should never overflow because the buffer is significantly
1242
    // larger than the precision we asked for.
1243
11
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1244
1245
11
    if (strtod(buffer, nullptr) != value) {
1246
3
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
1247
1248
        // Should never overflow; see above.
1249
3
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1250
3
    }
1251
1252
11
    return snprintf_result;
1253
11
}
1254
1255
42
int FloatToBuffer(float value, int width, char* buffer) {
1256
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1257
    // platforms these days.  Just in case some system exists where FLT_DIG
1258
    // is significantly larger -- and risks overflowing our buffer -- we have
1259
    // this assert.
1260
42
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1261
1262
42
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
1263
1264
    // The snprintf should never overflow because the buffer is significantly
1265
    // larger than the precision we asked for.
1266
42
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1267
1268
42
    float parsed_value;
1269
42
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1270
8
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
1271
1272
        // Should never overflow; see above.
1273
8
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1274
8
    }
1275
1276
42
    return snprintf_result;
1277
42
}
1278
1279
10
int FastDoubleToBuffer(double value, char* buffer) {
1280
10
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1281
10
    *end = '\0';
1282
10
    return end - buffer;
1283
10
}
1284
1285
10
int FastFloatToBuffer(float value, char* buffer) {
1286
10
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1287
10
    *end = '\0';
1288
10
    return end - buffer;
1289
10
}
1290
1291
// ----------------------------------------------------------------------
1292
// SimpleItoaWithCommas()
1293
//    Description: converts an integer to a string.
1294
//    Puts commas every 3 spaces.
1295
//    Faster than printf("%d")?
1296
//
1297
//    Return value: string
1298
// ----------------------------------------------------------------------
1299
0
string SimpleItoaWithCommas(int32 i) {
1300
    // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1301
    // Longest is -2,147,483,648.
1302
0
    char local[14];
1303
0
    char* p = local + sizeof(local);
1304
    // Need to use uint32 instead of int32 to correctly handle
1305
    // -2,147,483,648.
1306
0
    uint32 n = i;
1307
0
    if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow
1308
0
    *--p = '0' + n % 10;  // this case deals with the number "0"
1309
0
    n /= 10;
1310
0
    while (n) {
1311
0
        *--p = '0' + n % 10;
1312
0
        n /= 10;
1313
0
        if (n == 0) break;
1314
1315
0
        *--p = '0' + n % 10;
1316
0
        n /= 10;
1317
0
        if (n == 0) break;
1318
1319
0
        *--p = ',';
1320
0
        *--p = '0' + n % 10;
1321
0
        n /= 10;
1322
        // For this unrolling, we check if n == 0 in the main while loop
1323
0
    }
1324
0
    if (i < 0) *--p = '-';
1325
0
    return string(p, local + sizeof(local));
1326
0
}
1327
1328
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1329
// compile.
1330
0
string SimpleItoaWithCommas(uint32 i) {
1331
    // 10 digits and 3 commas are good for 32-bit or smaller ints.
1332
    // Longest is 4,294,967,295.
1333
0
    char local[13];
1334
0
    char* p = local + sizeof(local);
1335
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1336
0
    i /= 10;
1337
0
    while (i) {
1338
0
        *--p = '0' + i % 10;
1339
0
        i /= 10;
1340
0
        if (i == 0) break;
1341
1342
0
        *--p = '0' + i % 10;
1343
0
        i /= 10;
1344
0
        if (i == 0) break;
1345
1346
0
        *--p = ',';
1347
0
        *--p = '0' + i % 10;
1348
0
        i /= 10;
1349
        // For this unrolling, we check if i == 0 in the main while loop
1350
0
    }
1351
0
    return string(p, local + sizeof(local));
1352
0
}
1353
1354
0
string SimpleItoaWithCommas(int64 i) {
1355
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1356
0
    char local[26];
1357
0
    char* p = SimpleItoaWithCommas(i, local, sizeof(local));
1358
0
    return string(p, local + sizeof(local));
1359
0
}
1360
1361
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1362
// compile.
1363
0
string SimpleItoaWithCommas(uint64 i) {
1364
    // 20 digits and 6 commas are good for 64-bit or smaller ints.
1365
    // Longest is 18,446,744,073,709,551,615.
1366
0
    char local[26];
1367
0
    char* p = local + sizeof(local);
1368
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1369
0
    i /= 10;
1370
0
    while (i) {
1371
0
        *--p = '0' + i % 10;
1372
0
        i /= 10;
1373
0
        if (i == 0) break;
1374
1375
0
        *--p = '0' + i % 10;
1376
0
        i /= 10;
1377
0
        if (i == 0) break;
1378
1379
0
        *--p = ',';
1380
0
        *--p = '0' + i % 10;
1381
0
        i /= 10;
1382
        // For this unrolling, we check if i == 0 in the main while loop
1383
0
    }
1384
0
    return string(p, local + sizeof(local));
1385
0
}
1386
1387
3
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
1388
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1389
3
    char* p = buffer + buffer_size;
1390
    // Need to use uint64 instead of int64 to correctly handle
1391
    // -9,223,372,036,854,775,808.
1392
3
    uint64 n = i;
1393
3
    if (i < 0) n = 0 - n;
1394
3
    *--p = '0' + n % 10; // this case deals with the number "0"
1395
3
    n /= 10;
1396
7
    while (n) {
1397
6
        *--p = '0' + n % 10;
1398
6
        n /= 10;
1399
6
        if (n == 0) break;
1400
1401
4
        *--p = '0' + n % 10;
1402
4
        n /= 10;
1403
4
        if (n == 0) break;
1404
1405
4
        *--p = ',';
1406
4
        *--p = '0' + n % 10;
1407
4
        n /= 10;
1408
        // For this unrolling, we check if n == 0 in the main while loop
1409
4
    }
1410
3
    if (i < 0) *--p = '-';
1411
3
    return p;
1412
3
}
1413
1414
17
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
1415
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
1416
17
    char* p = buffer + buffer_size;
1417
    // Need to use uint128 instead of int128 to correctly handle
1418
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
1419
17
    __uint128_t n = i;
1420
17
    if (i < 0) n = 0 - n;
1421
17
    *--p = '0' + n % 10; // this case deals with the number "0"
1422
17
    n /= 10;
1423
45
    while (n) {
1424
38
        *--p = '0' + n % 10;
1425
38
        n /= 10;
1426
38
        if (n == 0) break;
1427
1428
34
        *--p = '0' + n % 10;
1429
34
        n /= 10;
1430
34
        if (n == 0) break;
1431
1432
28
        *--p = ',';
1433
28
        *--p = '0' + n % 10;
1434
28
        n /= 10;
1435
        // For this unrolling, we check if n == 0 in the main while loop
1436
28
    }
1437
17
    if (i < 0) *--p = '-';
1438
17
    return p;
1439
17
}
1440
1441
// ----------------------------------------------------------------------
1442
// ItoaKMGT()
1443
//    Description: converts an integer to a string
1444
//    Truncates values to a readable unit: K, G, M or T
1445
//    Opposite of atoi_kmgt()
1446
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1447
//
1448
//    Return value: string
1449
// ----------------------------------------------------------------------
1450
0
string ItoaKMGT(int64 i) {
1451
0
    const char *sign = "", *suffix = "";
1452
0
    if (i < 0) {
1453
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1454
        // that's OK as this function is only for human readability
1455
0
        if (i == numeric_limits<int64>::min()) i++;
1456
0
        sign = "-";
1457
0
        i = -i;
1458
0
    }
1459
1460
0
    int64 val;
1461
1462
0
    if ((val = (i >> 40)) > 1) {
1463
0
        suffix = "T";
1464
0
    } else if ((val = (i >> 30)) > 1) {
1465
0
        suffix = "G";
1466
0
    } else if ((val = (i >> 20)) > 1) {
1467
0
        suffix = "M";
1468
0
    } else if ((val = (i >> 10)) > 1) {
1469
0
        suffix = "K";
1470
0
    } else {
1471
0
        val = i;
1472
0
    }
1473
1474
0
    return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1475
0
}
1476
1477
0
string AccurateItoaKMGT(int64 i) {
1478
0
    const char* sign = "";
1479
0
    if (i < 0) {
1480
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1481
        // that's OK as this function is only for human readability
1482
0
        if (i == numeric_limits<int64>::min()) i++;
1483
0
        sign = "-";
1484
0
        i = -i;
1485
0
    }
1486
1487
0
    string ret = StringPrintf("%s", sign);
1488
0
    int64 val;
1489
0
    if ((val = (i >> 40)) > 1) {
1490
0
        ret += StringPrintf("%" PRId64
1491
0
                            "%s"
1492
0
                            ",",
1493
0
                            val, "T");
1494
0
        i = i - (val << 40);
1495
0
    }
1496
0
    if ((val = (i >> 30)) > 1) {
1497
0
        ret += StringPrintf("%" PRId64
1498
0
                            "%s"
1499
0
                            ",",
1500
0
                            val, "G");
1501
0
        i = i - (val << 30);
1502
0
    }
1503
0
    if ((val = (i >> 20)) > 1) {
1504
0
        ret += StringPrintf("%" PRId64
1505
0
                            "%s"
1506
0
                            ",",
1507
0
                            val, "M");
1508
0
        i = i - (val << 20);
1509
0
    }
1510
0
    if ((val = (i >> 10)) > 1) {
1511
0
        ret += StringPrintf("%" PRId64 "%s", val, "K");
1512
0
        i = i - (val << 10);
1513
0
    } else {
1514
0
        ret += StringPrintf("%" PRId64 "%s", i, "K");
1515
0
    }
1516
1517
0
    return ret;
1518
0
}
1519
1520
// DEPRECATED(wadetregaskis).
1521
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1522
1523
0
string FloatToString(float f, const char* format) {
1524
0
    return StringPrintf(format, f);
1525
0
}
1526
1527
0
string IntToString(int i, const char* format) {
1528
0
    return StringPrintf(format, i);
1529
0
}
1530
1531
0
string Int64ToString(int64 i64, const char* format) {
1532
0
    return StringPrintf(format, i64);
1533
0
}
1534
1535
0
string UInt64ToString(uint64 ui64, const char* format) {
1536
0
    return StringPrintf(format, ui64);
1537
0
}