Coverage Report

Created: 2024-11-21 15:53

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <assert.h>
10
#include <ctype.h>
11
#include <errno.h>
12
#include <float.h> // for DBL_DIG and FLT_DIG
13
#include <math.h>  // for HUGE_VAL
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <string.h>
17
#include <inttypes.h>
18
#include <sys/types.h>
19
#include <limits>
20
#include <ostream>
21
22
using std::numeric_limits;
23
#include <string>
24
25
using std::string;
26
27
#include <fmt/compile.h>
28
#include <fmt/format.h>
29
30
#include "common/logging.h"
31
32
#include "gutil/gscoped_ptr.h"
33
#include "gutil/integral_types.h"
34
#include "gutil/stringprintf.h"
35
#include "gutil/strings/ascii_ctype.h"
36
#include "gutil/strtoint.h"
37
38
// Reads a <double> in *text, which may not be whitespace-initiated.
39
// *len is the length, or -1 if text is '\0'-terminated, which is more
40
// efficient.  Sets *text to the end of the double, and val to the
41
// converted value, and the length of the double is subtracted from
42
// *len. <double> may also be a '?', in which case val will be
43
// unchanged. Returns true upon success.  If initial_minus is
44
// non-NULL, then *initial_minus will indicate whether the first
45
// symbol seen was a '-', which will be ignored. Similarly, if
46
// final_period is non-NULL, then *final_period will indicate whether
47
// the last symbol seen was a '.', which will be ignored. This is
48
// useful in case that an initial '-' or final '.' would have another
49
// meaning (as a separator, e.g.).
50
static inline bool EatADouble(const char** text, int* len, bool allow_question, double* val,
51
0
                              bool* initial_minus, bool* final_period) {
52
0
    const char* pos = *text;
53
0
    int rem = *len; // remaining length, or -1 if null-terminated
54
55
0
    if (pos == nullptr || rem == 0) return false;
56
57
0
    if (allow_question && (*pos == '?')) {
58
0
        *text = pos + 1;
59
0
        if (rem != -1) *len = rem - 1;
60
0
        return true;
61
0
    }
62
63
0
    if (initial_minus) {
64
0
        if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment.
65
0
            if (rem == 1) return false;
66
0
            ++pos;
67
0
            if (rem != -1) --rem;
68
0
        }
69
0
    }
70
71
    // a double has to begin one of these (we don't allow 'inf' or whitespace)
72
    // this also serves as an optimization.
73
0
    if (!strchr("-+.0123456789", *pos)) return false;
74
75
    // strtod is evil in that the second param is a non-const char**
76
0
    char* end_nonconst;
77
0
    double retval;
78
0
    if (rem == -1) {
79
0
        retval = strtod(pos, &end_nonconst);
80
0
    } else {
81
        // not '\0'-terminated & no obvious terminator found. must copy.
82
0
        gscoped_array<char> buf(new char[rem + 1]);
83
0
        memcpy(buf.get(), pos, rem);
84
0
        buf[rem] = '\0';
85
0
        retval = strtod(buf.get(), &end_nonconst);
86
0
        end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
87
0
    }
88
89
0
    if (pos == end_nonconst) return false;
90
91
0
    if (final_period) {
92
0
        *final_period = (end_nonconst[-1] == '.');
93
0
        if (*final_period) {
94
0
            --end_nonconst;
95
0
        }
96
0
    }
97
98
0
    *text = end_nonconst;
99
0
    *val = retval;
100
0
    if (rem != -1) *len = rem - (end_nonconst - pos);
101
0
    return true;
102
0
}
103
104
// If update, consume one of acceptable_chars from string *text of
105
// length len and return that char, or '\0' otherwise. If len is -1,
106
// *text is null-terminated. If update is false, don't alter *text and
107
// *len. If null_ok, then update must be false, and, if text has no
108
// more chars, then return '\1' (arbitrary nonzero).
109
static inline char EatAChar(const char** text, int* len, const char* acceptable_chars, bool update,
110
0
                            bool null_ok) {
111
0
    assert(!(update && null_ok));
112
0
    if ((*len == 0) || (**text == '\0'))
113
0
        return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode.
114
115
0
    if (strchr(acceptable_chars, **text)) {
116
0
        char result = **text;
117
0
        if (update) {
118
0
            ++(*text);
119
0
            if (*len != -1) --(*len);
120
0
        }
121
0
        return result;
122
0
    }
123
124
0
    return '\0'; // no match; no update
125
0
}
126
127
// Parse an expression in 'text' of the form: <comparator><double> or
128
// <double><sep><double> See full comments in header file.
129
bool ParseDoubleRange(const char* text, int len, const char** end, double* from, double* to,
130
0
                      bool* is_currency, const DoubleRangeOptions& opts) {
131
0
    const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
132
133
0
    if (!opts.dont_modify_unbounded) {
134
0
        *from = -HUGE_VAL;
135
0
        *to = HUGE_VAL;
136
0
    }
137
0
    if (opts.allow_currency && (is_currency != nullptr)) *is_currency = false;
138
139
0
    assert(len >= -1);
140
0
    assert(opts.separators && (*opts.separators != '\0'));
141
    // these aren't valid separators
142
0
    assert(strlen(opts.separators) == strcspn(opts.separators, "+0123456789eE$"));
143
0
    assert(opts.num_required_bounds <= 2);
144
145
    // Handle easier cases of comparators (<, >) first
146
0
    if (opts.allow_comparators) {
147
0
        char comparator = EatAChar(&text, &len, "<>", true, false);
148
0
        if (comparator) {
149
0
            double* dest = (comparator == '>') ? from : to;
150
0
            EatAChar(&text, &len, "=", true, false);
151
0
            if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
152
0
                if (is_currency != nullptr) *is_currency = true;
153
0
            if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, nullptr))
154
0
                return false;
155
0
            *end = text;
156
0
            return EatAChar(&text, &len, opts.acceptable_terminators, false,
157
0
                            opts.null_terminator_ok);
158
0
        }
159
0
    }
160
161
0
    bool seen_dollar = (opts.allow_currency && EatAChar(&text, &len, "$", true, false));
162
163
    // If we see a '-', two things could be happening: -<to> or
164
    // <from>... where <from> is negative. Treat initial minus sign as a
165
    // separator if '-' is a valid separator.
166
    // Similarly, we prepare for the possibility of seeing a '.' at the
167
    // end of the number, in case '.' (which really means '..') is a
168
    // separator.
169
0
    bool initial_minus_sign = false;
170
0
    bool final_period = false;
171
0
    bool* check_initial_minus =
172
0
            (strchr(opts.separators, '-') && !seen_dollar && (opts.num_required_bounds < 2))
173
0
                    ? (&initial_minus_sign)
174
0
                    : nullptr;
175
0
    bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) : nullptr;
176
0
    bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, from,
177
0
                                  check_initial_minus, check_final_period);
178
179
    // if 2 bounds required, must see a double (or '?' if allowed)
180
0
    if ((opts.num_required_bounds == 2) && !double_seen) return false;
181
182
0
    if (seen_dollar && !double_seen) {
183
0
        --text;
184
0
        if (len != -1) ++len;
185
0
        seen_dollar = false;
186
0
    }
187
    // If we're here, we've read the first double and now expect a
188
    // separator and another <double>.
189
0
    char separator = EatAChar(&text, &len, opts.separators, true, false);
190
0
    if (separator == '.') {
191
        // seen one '.' as separator; must check for another; perhaps set seplen=2
192
0
        if (EatAChar(&text, &len, ".", true, false)) {
193
0
            if (final_period) {
194
                // We may have three periods in a row. The first is part of the
195
                // first number, the others are a separator. Policy: 234...567
196
                // is "234." to "567", not "234" to ".567".
197
0
                EatAChar(&text, &len, ".", true, false);
198
0
            }
199
0
        } else if (!EatAChar(&text, &len, opts.separators, true, false)) {
200
            // just one '.' and no other separator; uneat the first '.' we saw
201
0
            --text;
202
0
            if (len != -1) ++len;
203
0
            separator = '\0';
204
0
        }
205
0
    }
206
    // By now, we've consumed whatever separator there may have been,
207
    // and separator is true iff there was one.
208
0
    if (!separator) {
209
0
        if (final_period) // final period now considered part of first double
210
0
            EatAChar(&text, &len, ".", true, false);
211
0
        if (initial_minus_sign && double_seen) {
212
0
            *to = *from;
213
0
            *from = from_default;
214
0
        } else if (opts.require_separator || (opts.num_required_bounds > 0 && !double_seen) ||
215
0
                   (opts.num_required_bounds > 1)) {
216
0
            return false;
217
0
        }
218
0
    } else {
219
0
        if (initial_minus_sign && double_seen) *from = -(*from);
220
        // read second <double>
221
0
        bool second_dollar_seen = (seen_dollar || (opts.allow_currency && !double_seen)) &&
222
0
                                  EatAChar(&text, &len, "$", true, false);
223
0
        bool second_double_seen =
224
0
                EatADouble(&text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
225
0
        if (opts.num_required_bounds > double_seen + second_double_seen) return false;
226
0
        if (second_dollar_seen && !second_double_seen) {
227
0
            --text;
228
0
            if (len != -1) ++len;
229
0
            second_dollar_seen = false;
230
0
        }
231
0
        seen_dollar = seen_dollar || second_dollar_seen;
232
0
    }
233
234
0
    if (seen_dollar && (is_currency != nullptr)) *is_currency = true;
235
    // We're done. But we have to check that the next char is a proper
236
    // terminator.
237
0
    *end = text;
238
0
    char terminator =
239
0
            EatAChar(&text, &len, opts.acceptable_terminators, false, opts.null_terminator_ok);
240
0
    if (terminator == '.') --(*end);
241
0
    return terminator;
242
0
}
243
244
// ----------------------------------------------------------------------
245
// ConsumeStrayLeadingZeroes
246
//    Eliminates all leading zeroes (unless the string itself is composed
247
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
248
// --------------------------------------------------------------------
249
250
0
void ConsumeStrayLeadingZeroes(string* const str) {
251
0
    const string::size_type len(str->size());
252
0
    if (len > 1 && (*str)[0] == '0') {
253
0
        const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1);
254
0
        while (ptr != end && *ptr == '0') {
255
0
            ++ptr;
256
0
        }
257
0
        string::size_type remove(ptr - begin);
258
0
        DCHECK_GT(ptr, begin);
259
0
        if (remove == len) {
260
0
            --remove; // if they are all zero, leave one...
261
0
        }
262
0
        str->erase(0, remove);
263
0
    }
264
0
}
265
266
// ----------------------------------------------------------------------
267
// ParseLeadingInt32Value()
268
// ParseLeadingUInt32Value()
269
//    A simple parser for [u]int32 values. Returns the parsed value
270
//    if a valid value is found; else returns deflt
271
//    This cannot handle decimal numbers with leading 0s.
272
// --------------------------------------------------------------------
273
274
0
int32 ParseLeadingInt32Value(const char* str, int32 deflt) {
275
0
    char* error = nullptr;
276
0
    long value = strtol(str, &error, 0);
277
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
278
0
    if (value > numeric_limits<int32>::max()) {
279
0
        value = numeric_limits<int32>::max();
280
0
    } else if (value < numeric_limits<int32>::min()) {
281
0
        value = numeric_limits<int32>::min();
282
0
    }
283
0
    return (error == str) ? deflt : value;
284
0
}
285
286
0
uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) {
287
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
288
        // When long is 32 bits, we can use strtoul.
289
0
        char* error = nullptr;
290
0
        const uint32 value = strtoul(str, &error, 0);
291
0
        return (error == str) ? deflt : value;
292
0
    } else {
293
        // When long is 64 bits, we must use strto64 and handle limits
294
        // by hand.  The reason we cannot use a 64-bit strtoul is that
295
        // it would be impossible to differentiate "-2" (that should wrap
296
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
297
        // (that should be pegged to UINT_MAX due to overflow).
298
0
        char* error = nullptr;
299
0
        int64 value = strto64(str, &error, 0);
300
0
        if (value > numeric_limits<uint32>::max() ||
301
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
302
0
            value = numeric_limits<uint32>::max();
303
0
        }
304
        // Within these limits, truncation to 32 bits handles negatives correctly.
305
0
        return (error == str) ? deflt : value;
306
0
    }
307
0
}
308
309
// ----------------------------------------------------------------------
310
// ParseLeadingDec32Value
311
// ParseLeadingUDec32Value
312
//    A simple parser for [u]int32 values. Returns the parsed value
313
//    if a valid value is found; else returns deflt
314
//    The string passed in is treated as *10 based*.
315
//    This can handle strings with leading 0s.
316
// --------------------------------------------------------------------
317
318
0
int32 ParseLeadingDec32Value(const char* str, int32 deflt) {
319
0
    char* error = nullptr;
320
0
    long value = strtol(str, &error, 10);
321
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
322
0
    if (value > numeric_limits<int32>::max()) {
323
0
        value = numeric_limits<int32>::max();
324
0
    } else if (value < numeric_limits<int32>::min()) {
325
0
        value = numeric_limits<int32>::min();
326
0
    }
327
0
    return (error == str) ? deflt : value;
328
0
}
329
330
0
uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) {
331
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
332
        // When long is 32 bits, we can use strtoul.
333
0
        char* error = nullptr;
334
0
        const uint32 value = strtoul(str, &error, 10);
335
0
        return (error == str) ? deflt : value;
336
0
    } else {
337
        // When long is 64 bits, we must use strto64 and handle limits
338
        // by hand.  The reason we cannot use a 64-bit strtoul is that
339
        // it would be impossible to differentiate "-2" (that should wrap
340
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
341
        // (that should be pegged to UINT_MAX due to overflow).
342
0
        char* error = nullptr;
343
0
        int64 value = strto64(str, &error, 10);
344
0
        if (value > numeric_limits<uint32>::max() ||
345
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
346
0
            value = numeric_limits<uint32>::max();
347
0
        }
348
        // Within these limits, truncation to 32 bits handles negatives correctly.
349
0
        return (error == str) ? deflt : value;
350
0
    }
351
0
}
352
353
// ----------------------------------------------------------------------
354
// ParseLeadingUInt64Value
355
// ParseLeadingInt64Value
356
// ParseLeadingHex64Value
357
//    A simple parser for 64-bit values. Returns the parsed value if a
358
//    valid integer is found; else returns deflt
359
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
360
// --------------------------------------------------------------------
361
0
uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) {
362
0
    char* error = nullptr;
363
0
    const uint64 value = strtou64(str, &error, 0);
364
0
    return (error == str) ? deflt : value;
365
0
}
366
367
0
int64 ParseLeadingInt64Value(const char* str, int64 deflt) {
368
0
    char* error = nullptr;
369
0
    const int64 value = strto64(str, &error, 0);
370
0
    return (error == str) ? deflt : value;
371
0
}
372
373
0
uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) {
374
0
    char* error = nullptr;
375
0
    const uint64 value = strtou64(str, &error, 16);
376
0
    return (error == str) ? deflt : value;
377
0
}
378
379
// ----------------------------------------------------------------------
380
// ParseLeadingDec64Value
381
// ParseLeadingUDec64Value
382
//    A simple parser for [u]int64 values. Returns the parsed value
383
//    if a valid value is found; else returns deflt
384
//    The string passed in is treated as *10 based*.
385
//    This can handle strings with leading 0s.
386
// --------------------------------------------------------------------
387
388
0
int64 ParseLeadingDec64Value(const char* str, int64 deflt) {
389
0
    char* error = nullptr;
390
0
    const int64 value = strto64(str, &error, 10);
391
0
    return (error == str) ? deflt : value;
392
0
}
393
394
0
uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) {
395
0
    char* error = nullptr;
396
0
    const uint64 value = strtou64(str, &error, 10);
397
0
    return (error == str) ? deflt : value;
398
0
}
399
400
// ----------------------------------------------------------------------
401
// ParseLeadingDoubleValue()
402
//    A simple parser for double values. Returns the parsed value
403
//    if a valid value is found; else returns deflt
404
// --------------------------------------------------------------------
405
406
0
double ParseLeadingDoubleValue(const char* str, double deflt) {
407
0
    char* error = nullptr;
408
0
    errno = 0;
409
0
    const double value = strtod(str, &error);
410
0
    if (errno != 0 ||   // overflow/underflow happened
411
0
        error == str) { // no valid parse
412
0
        return deflt;
413
0
    } else {
414
0
        return value;
415
0
    }
416
0
}
417
418
// ----------------------------------------------------------------------
419
// ParseLeadingBoolValue()
420
//    A recognizer of boolean string values. Returns the parsed value
421
//    if a valid value is found; else returns deflt.  This skips leading
422
//    whitespace, is case insensitive, and recognizes these forms:
423
//    0/1, false/true, no/yes, n/y
424
// --------------------------------------------------------------------
425
0
bool ParseLeadingBoolValue(const char* str, bool deflt) {
426
0
    static const int kMaxLen = 5;
427
0
    char value[kMaxLen + 1];
428
    // Skip whitespace
429
0
    while (ascii_isspace(*str)) {
430
0
        ++str;
431
0
    }
432
0
    int len = 0;
433
0
    for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str);
434
0
    if (len == 0 || len > kMaxLen) return deflt;
435
0
    value[len] = '\0';
436
0
    switch (len) {
437
0
    case 1:
438
0
        if (value[0] == '0' || value[0] == 'n') return false;
439
0
        if (value[0] == '1' || value[0] == 'y') return true;
440
0
        break;
441
0
    case 2:
442
0
        if (!strcmp(value, "no")) return false;
443
0
        break;
444
0
    case 3:
445
0
        if (!strcmp(value, "yes")) return true;
446
0
        break;
447
0
    case 4:
448
0
        if (!strcmp(value, "true")) return true;
449
0
        break;
450
0
    case 5:
451
0
        if (!strcmp(value, "false")) return false;
452
0
        break;
453
0
    }
454
0
    return deflt;
455
0
}
456
457
// ----------------------------------------------------------------------
458
// Uint64ToString()
459
// FloatToString()
460
// IntToString()
461
//    Convert various types to their string representation, possibly padded
462
//    with spaces, using snprintf format specifiers.
463
// ----------------------------------------------------------------------
464
465
0
string Uint64ToString(uint64 fp) {
466
0
    char buf[17];
467
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
468
0
    return string(buf);
469
0
}
470
namespace {
471
472
// Represents integer values of digits.
473
// Uses 36 to indicate an invalid character since we support
474
// bases up to 36.
475
static const int8 kAsciiToInt[256] = {
476
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
477
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
478
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  36, 36,
479
        36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
480
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16,
481
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36,
482
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
483
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
484
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
485
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
486
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
487
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
488
489
// Input format based on POSIX.1-2008 strtol
490
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
491
template <typename IntType>
492
0
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
493
    // Consume whitespace.
494
0
    while (start < end && ascii_isspace(start[0])) {
495
0
        ++start;
496
0
    }
497
0
    while (start < end && ascii_isspace(end[-1])) {
498
0
        --end;
499
0
    }
500
0
    if (start >= end) {
501
0
        return false;
502
0
    }
503
504
    // Consume sign.
505
0
    const bool negative = (start[0] == '-');
506
0
    if (negative || start[0] == '+') {
507
0
        ++start;
508
0
        if (start >= end) {
509
0
            return false;
510
0
        }
511
0
    }
512
513
    // Consume base-dependent prefix.
514
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
515
    //  base 16: "0x" -> base 16
516
    // Also validate the base.
517
0
    if (base == 0) {
518
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
519
0
            base = 16;
520
0
            start += 2;
521
0
        } else if (end - start >= 1 && start[0] == '0') {
522
0
            base = 8;
523
0
            start += 1;
524
0
        } else {
525
0
            base = 10;
526
0
        }
527
0
    } else if (base == 16) {
528
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
529
0
            start += 2;
530
0
        }
531
0
    } else if (base >= 2 && base <= 36) {
532
        // okay
533
0
    } else {
534
0
        return false;
535
0
    }
536
537
    // Consume digits.
538
    //
539
    // The classic loop:
540
    //
541
    //   for each digit
542
    //     value = value * base + digit
543
    //   value *= sign
544
    //
545
    // The classic loop needs overflow checking.  It also fails on the most
546
    // negative integer, -2147483648 in 32-bit two's complement representation.
547
    //
548
    // My improved loop:
549
    //
550
    //  if (!negative)
551
    //    for each digit
552
    //      value = value * base
553
    //      value = value + digit
554
    //  else
555
    //    for each digit
556
    //      value = value * base
557
    //      value = value - digit
558
    //
559
    // Overflow checking becomes simple.
560
    //
561
    // I present the positive code first for easier reading.
562
0
    IntType value = 0;
563
0
    if (!negative) {
564
0
        const IntType vmax = std::numeric_limits<IntType>::max();
565
0
        assert(vmax > 0);
566
0
        assert(vmax >= base);
567
0
        const IntType vmax_over_base = vmax / base;
568
        // loop over digits
569
        // loop body is interleaved for perf, not readability
570
0
        for (; start < end; ++start) {
571
0
            unsigned char c = static_cast<unsigned char>(start[0]);
572
0
            int digit = kAsciiToInt[c];
573
0
            if (value > vmax_over_base) return false;
574
0
            value *= base;
575
0
            if (digit >= base) return false;
576
0
            if (value > vmax - digit) return false;
577
0
            value += digit;
578
0
        }
579
0
    } else {
580
0
        const IntType vmin = std::numeric_limits<IntType>::min();
581
0
        assert(vmin < 0);
582
0
        assert(vmin <= 0 - base);
583
0
        IntType vmin_over_base = vmin / base;
584
        // 2003 c++ standard [expr.mul]
585
        // "... the sign of the remainder is implementation-defined."
586
        // Although (vmin/base)*base + vmin%base is always vmin.
587
        // 2011 c++ standard tightens the spec but we cannot rely on it.
588
0
        if (vmin % base > 0) {
589
0
            vmin_over_base += 1;
590
0
        }
591
        // loop over digits
592
        // loop body is interleaved for perf, not readability
593
0
        for (; start < end; ++start) {
594
0
            unsigned char c = static_cast<unsigned char>(start[0]);
595
0
            int digit = kAsciiToInt[c];
596
0
            if (value < vmin_over_base) return false;
597
0
            value *= base;
598
0
            if (digit >= base) return false;
599
0
            if (value < vmin + digit) return false;
600
0
            value -= digit;
601
0
        }
602
0
    }
603
604
    // Store output.
605
0
    *value_p = value;
606
0
    return true;
607
0
}
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_
608
609
} // anonymous namespace
610
611
0
bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) {
612
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
613
0
}
614
615
0
bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) {
616
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
617
0
}
618
619
0
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
620
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
621
0
}
622
623
0
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
624
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
625
0
}
626
627
0
bool safe_strto32_base(const char* str, int32* value, int base) {
628
0
    char* endptr;
629
0
    errno = 0; // errno only gets set on errors
630
0
    *value = strto32(str, &endptr, base);
631
0
    if (endptr != str) {
632
0
        while (ascii_isspace(*endptr)) ++endptr;
633
0
    }
634
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
635
0
}
636
637
0
bool safe_strto64_base(const char* str, int64* value, int base) {
638
0
    char* endptr;
639
0
    errno = 0; // errno only gets set on errors
640
0
    *value = strto64(str, &endptr, base);
641
0
    if (endptr != str) {
642
0
        while (ascii_isspace(*endptr)) ++endptr;
643
0
    }
644
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
645
0
}
646
647
0
bool safe_strtou32_base(const char* str, uint32* value, int base) {
648
    // strtoul does not give any errors on negative numbers, so we have to
649
    // search the string for '-' manually.
650
0
    while (ascii_isspace(*str)) ++str;
651
0
    if (*str == '-') return false;
652
653
0
    char* endptr;
654
0
    errno = 0; // errno only gets set on errors
655
0
    *value = strtou32(str, &endptr, base);
656
0
    if (endptr != str) {
657
0
        while (ascii_isspace(*endptr)) ++endptr;
658
0
    }
659
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
660
0
}
661
662
0
bool safe_strtou64_base(const char* str, uint64* value, int base) {
663
    // strtou64 does not give any errors on negative numbers, so we have to
664
    // search the string for '-' manually.
665
0
    while (ascii_isspace(*str)) ++str;
666
0
    if (*str == '-') return false;
667
668
0
    char* endptr;
669
0
    errno = 0; // errno only gets set on errors
670
0
    *value = strtou64(str, &endptr, base);
671
0
    if (endptr != str) {
672
0
        while (ascii_isspace(*endptr)) ++endptr;
673
0
    }
674
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
675
0
}
676
677
// ----------------------------------------------------------------------
678
// u64tostr_base36()
679
//    Converts unsigned number to string representation in base-36.
680
// --------------------------------------------------------------------
681
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
682
0
    CHECK_GT(buf_size, 0);
683
0
    CHECK(buffer);
684
0
    static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
685
686
0
    buffer[buf_size - 1] = '\0';
687
0
    size_t result_size = 1;
688
689
0
    do {
690
0
        if (buf_size == result_size) { // Ran out of space.
691
0
            return 0;
692
0
        }
693
0
        int remainder = number % 36;
694
0
        number /= 36;
695
0
        buffer[buf_size - result_size - 1] = kAlphabet[remainder];
696
0
        result_size++;
697
0
    } while (number);
698
699
0
    memmove(buffer, buffer + buf_size - result_size, result_size);
700
701
0
    return result_size - 1;
702
0
}
703
704
// Generate functions that wrap safe_strtoXXX_base.
705
#define GEN_SAFE_STRTO(name, type)                                                  \
706
0
    bool name##_base(const string& str, type* value, int base) {                    \
707
0
        return name##_base(str.c_str(), value, base);                               \
708
0
    }                                                                               \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi
709
0
    bool name(const char* str, type* value) { return name##_base(str, value, 10); } \
Unexecuted instantiation: _Z12safe_strto32PKcPi
Unexecuted instantiation: _Z13safe_strtou32PKcPj
Unexecuted instantiation: _Z12safe_strto64PKcPl
Unexecuted instantiation: _Z13safe_strtou64PKcPm
710
0
    bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); }
Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj
Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl
Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm
711
GEN_SAFE_STRTO(safe_strto32, int32);
712
GEN_SAFE_STRTO(safe_strtou32, uint32);
713
GEN_SAFE_STRTO(safe_strto64, int64);
714
GEN_SAFE_STRTO(safe_strtou64, uint64);
715
#undef GEN_SAFE_STRTO
716
717
42
bool safe_strtof(const char* str, float* value) {
718
42
    char* endptr;
719
#ifdef _MSC_VER // has no strtof()
720
    *value = strtod(str, &endptr);
721
#else
722
42
    *value = strtof(str, &endptr);
723
42
#endif
724
42
    if (endptr != str) {
725
42
        while (ascii_isspace(*endptr)) ++endptr;
726
42
    }
727
    // Ignore range errors from strtod/strtof.
728
    // The values it returns on underflow and
729
    // overflow are the right fallback in a
730
    // robust setting.
731
42
    return *str != '\0' && *endptr == '\0';
732
42
}
733
734
0
bool safe_strtod(const char* str, double* value) {
735
0
    char* endptr;
736
0
    *value = strtod(str, &endptr);
737
0
    if (endptr != str) {
738
0
        while (ascii_isspace(*endptr)) ++endptr;
739
0
    }
740
    // Ignore range errors from strtod.  The values it
741
    // returns on underflow and overflow are the right
742
    // fallback in a robust setting.
743
0
    return *str != '\0' && *endptr == '\0';
744
0
}
745
746
0
bool safe_strtof(const string& str, float* value) {
747
0
    return safe_strtof(str.c_str(), value);
748
0
}
749
750
0
bool safe_strtod(const string& str, double* value) {
751
0
    return safe_strtod(str.c_str(), value);
752
0
}
753
754
0
uint64 atoi_kmgt(const char* s) {
755
0
    char* endptr;
756
0
    uint64 n = strtou64(s, &endptr, 10);
757
0
    uint64 scale = 1;
758
0
    char c = *endptr;
759
0
    if (c != '\0') {
760
0
        c = ascii_toupper(c);
761
0
        switch (c) {
762
0
        case 'K':
763
0
            scale = GG_ULONGLONG(1) << 10;
764
0
            break;
765
0
        case 'M':
766
0
            scale = GG_ULONGLONG(1) << 20;
767
0
            break;
768
0
        case 'G':
769
0
            scale = GG_ULONGLONG(1) << 30;
770
0
            break;
771
0
        case 'T':
772
0
            scale = GG_ULONGLONG(1) << 40;
773
0
            break;
774
0
        default:
775
0
            LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
776
0
                       << " should be one of `K', `M', `G', and `T'.";
777
0
        }
778
0
    }
779
0
    return n * scale;
780
0
}
781
782
// ----------------------------------------------------------------------
783
// FastIntToBuffer()
784
// FastInt64ToBuffer()
785
// FastHexToBuffer()
786
// FastHex64ToBuffer()
787
// FastHex32ToBuffer()
788
// FastTimeToBuffer()
789
//    These are intended for speed.  FastHexToBuffer() assumes the
790
//    integer is non-negative.  FastHexToBuffer() puts output in
791
//    hex rather than decimal.  FastTimeToBuffer() puts the output
792
//    into RFC822 format.  If time is 0, uses the current time.
793
//
794
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
795
//    padded to exactly 16 bytes (plus one byte for '\0')
796
//
797
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
798
//    padded to exactly 8 bytes (plus one byte for '\0')
799
//
800
//       All functions take the output buffer as an arg.  FastInt()
801
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
802
//    They all return a pointer to the beginning of the output,
803
//    which may not be the beginning of the input buffer.  (Though
804
//    for FastTimeToBuffer(), we guarantee that it is.)
805
// ----------------------------------------------------------------------
806
807
0
char* FastInt64ToBuffer(int64 i, char* buffer) {
808
0
    FastInt64ToBufferLeft(i, buffer);
809
0
    return buffer;
810
0
}
811
812
0
char* FastInt32ToBuffer(int32 i, char* buffer) {
813
0
    FastInt32ToBufferLeft(i, buffer);
814
0
    return buffer;
815
0
}
816
817
0
char* FastHexToBuffer(int i, char* buffer) {
818
0
    CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
819
820
0
    static const char* hexdigits = "0123456789abcdef";
821
0
    char* p = buffer + 21;
822
0
    *p-- = '\0';
823
0
    do {
824
0
        *p-- = hexdigits[i & 15]; // mod by 16
825
0
        i >>= 4;                  // divide by 16
826
0
    } while (i > 0);
827
0
    return p + 1;
828
0
}
829
830
0
char* InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
831
0
    static const char* hexdigits = "0123456789abcdef";
832
0
    buffer[num_byte] = '\0';
833
0
    for (int i = num_byte - 1; i >= 0; i--) {
834
0
        buffer[i] = hexdigits[value & 0xf];
835
0
        value >>= 4;
836
0
    }
837
0
    return buffer;
838
0
}
839
840
0
char* FastHex64ToBuffer(uint64 value, char* buffer) {
841
0
    return InternalFastHexToBuffer(value, buffer, 16);
842
0
}
843
844
0
char* FastHex32ToBuffer(uint32 value, char* buffer) {
845
0
    return InternalFastHexToBuffer(value, buffer, 8);
846
0
}
847
848
// TODO(user): revisit the two_ASCII_digits optimization.
849
//
850
// Several converters use this table to reduce
851
// division and modulo operations.
852
extern const char two_ASCII_digits[100][2]; // from strutil.cc
853
854
// ----------------------------------------------------------------------
855
// FastInt32ToBufferLeft()
856
// FastUInt32ToBufferLeft()
857
// FastInt64ToBufferLeft()
858
// FastUInt64ToBufferLeft()
859
//
860
// Like the Fast*ToBuffer() functions above, these are intended for speed.
861
// Unlike the Fast*ToBuffer() functions, however, these functions write
862
// their output to the beginning of the buffer (hence the name, as the
863
// output is left-aligned).  The caller is responsible for ensuring that
864
// the buffer has enough space to hold the output.
865
//
866
// Returns a pointer to the end of the string (i.e. the null character
867
// terminating the string).
868
// ----------------------------------------------------------------------
869
870
341k
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
871
341k
    uint digits;
872
341k
    const char* ASCII_digits = nullptr;
873
    // The idea of this implementation is to trim the number of divides to as few
874
    // as possible by using multiplication and subtraction rather than mod (%),
875
    // and by outputting two digits at a time rather than one.
876
    // The huge-number case is first, in the hopes that the compiler will output
877
    // that case in one branch-free block of code, and only output conditional
878
    // branches into it from below.
879
341k
    if (u >= 1000000000) {      // >= 1,000,000,000
880
0
        digits = u / 100000000; // 100,000,000
881
0
        ASCII_digits = two_ASCII_digits[digits];
882
0
        buffer[0] = ASCII_digits[0];
883
0
        buffer[1] = ASCII_digits[1];
884
0
        buffer += 2;
885
0
    sublt100_000_000:
886
0
        u -= digits * 100000000; // 100,000,000
887
0
    lt100_000_000:
888
0
        digits = u / 1000000; // 1,000,000
889
0
        ASCII_digits = two_ASCII_digits[digits];
890
0
        buffer[0] = ASCII_digits[0];
891
0
        buffer[1] = ASCII_digits[1];
892
0
        buffer += 2;
893
0
    sublt1_000_000:
894
0
        u -= digits * 1000000; // 1,000,000
895
0
    lt1_000_000:
896
0
        digits = u / 10000; // 10,000
897
0
        ASCII_digits = two_ASCII_digits[digits];
898
0
        buffer[0] = ASCII_digits[0];
899
0
        buffer[1] = ASCII_digits[1];
900
0
        buffer += 2;
901
121
    sublt10_000:
902
121
        u -= digits * 10000; // 10,000
903
2.51k
    lt10_000:
904
2.51k
        digits = u / 100;
905
2.51k
        ASCII_digits = two_ASCII_digits[digits];
906
2.51k
        buffer[0] = ASCII_digits[0];
907
2.51k
        buffer[1] = ASCII_digits[1];
908
2.51k
        buffer += 2;
909
14.8k
    sublt100:
910
14.8k
        u -= digits * 100;
911
316k
    lt100:
912
316k
        digits = u;
913
316k
        ASCII_digits = two_ASCII_digits[digits];
914
316k
        buffer[0] = ASCII_digits[0];
915
316k
        buffer[1] = ASCII_digits[1];
916
316k
        buffer += 2;
917
341k
    done:
918
341k
        *buffer = 0;
919
341k
        return buffer;
920
316k
    }
921
922
341k
    if (u < 100) {
923
326k
        digits = u;
924
326k
        if (u >= 10) goto lt100;
925
24.9k
        *buffer++ = '0' + digits;
926
24.9k
        goto done;
927
326k
    }
928
14.8k
    if (u < 10000) { // 10,000
929
14.7k
        if (u >= 1000) goto lt10_000;
930
12.3k
        digits = u / 100;
931
12.3k
        *buffer++ = '0' + digits;
932
12.3k
        goto sublt100;
933
14.7k
    }
934
121
    if (u < 1000000) { // 1,000,000
935
121
        if (u >= 100000) goto lt1_000_000;
936
121
        digits = u / 10000; //    10,000
937
121
        *buffer++ = '0' + digits;
938
121
        goto sublt10_000;
939
121
    }
940
0
    if (u < 100000000) { // 100,000,000
941
0
        if (u >= 10000000) goto lt100_000_000;
942
0
        digits = u / 1000000; //   1,000,000
943
0
        *buffer++ = '0' + digits;
944
0
        goto sublt1_000_000;
945
0
    }
946
    // we already know that u < 1,000,000,000
947
0
    digits = u / 100000000; // 100,000,000
948
0
    *buffer++ = '0' + digits;
949
0
    goto sublt100_000_000;
950
0
}
951
952
338k
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
953
338k
    uint32 u = i;
954
338k
    if (i < 0) {
955
0
        *buffer++ = '-';
956
        // We need to do the negation in modular (i.e., "unsigned")
957
        // arithmetic; MSVC++ apprently warns for plain "-u", so
958
        // we write the equivalent expression "0 - u" instead.
959
0
        u = 0 - u;
960
0
    }
961
338k
    return FastUInt32ToBufferLeft(u, buffer);
962
338k
}
963
964
2.54k
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
965
2.54k
    uint digits;
966
2.54k
    const char* ASCII_digits = nullptr;
967
968
2.54k
    uint32 u = static_cast<uint32>(u64);
969
2.54k
    if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
970
971
1
    uint64 top_11_digits = u64 / 1000000000;
972
1
    buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
973
1
    u = u64 - (top_11_digits * 1000000000);
974
975
1
    digits = u / 10000000; // 10,000,000
976
1
    DCHECK_LT(digits, 100);
977
1
    ASCII_digits = two_ASCII_digits[digits];
978
1
    buffer[0] = ASCII_digits[0];
979
1
    buffer[1] = ASCII_digits[1];
980
1
    buffer += 2;
981
1
    u -= digits * 10000000; // 10,000,000
982
1
    digits = u / 100000;    // 100,000
983
1
    ASCII_digits = two_ASCII_digits[digits];
984
1
    buffer[0] = ASCII_digits[0];
985
1
    buffer[1] = ASCII_digits[1];
986
1
    buffer += 2;
987
1
    u -= digits * 100000; // 100,000
988
1
    digits = u / 1000;    // 1,000
989
1
    ASCII_digits = two_ASCII_digits[digits];
990
1
    buffer[0] = ASCII_digits[0];
991
1
    buffer[1] = ASCII_digits[1];
992
1
    buffer += 2;
993
1
    u -= digits * 1000; // 1,000
994
1
    digits = u / 10;
995
1
    ASCII_digits = two_ASCII_digits[digits];
996
1
    buffer[0] = ASCII_digits[0];
997
1
    buffer[1] = ASCII_digits[1];
998
1
    buffer += 2;
999
1
    u -= digits * 10;
1000
1
    digits = u;
1001
1
    *buffer++ = '0' + digits;
1002
1
    *buffer = 0;
1003
1
    return buffer;
1004
2.54k
}
1005
1006
2.54k
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1007
2.54k
    uint64 u = i;
1008
2.54k
    if (i < 0) {
1009
0
        *buffer++ = '-';
1010
0
        u = 0 - u;
1011
0
    }
1012
2.54k
    return FastUInt64ToBufferLeft(u, buffer);
1013
2.54k
}
1014
1015
0
int HexDigitsPrefix(const char* buf, int num_digits) {
1016
0
    for (int i = 0; i < num_digits; i++)
1017
0
        if (!ascii_isxdigit(buf[i]))
1018
0
            return 0; // This also detects end of string as '\0' is not xdigit.
1019
0
    return 1;
1020
0
}
1021
1022
// ----------------------------------------------------------------------
1023
// AutoDigitStrCmp
1024
// AutoDigitLessThan
1025
// StrictAutoDigitLessThan
1026
// autodigit_less
1027
// autodigit_greater
1028
// strict_autodigit_less
1029
// strict_autodigit_greater
1030
//    These are like less<string> and greater<string>, except when a
1031
//    run of digits is encountered at corresponding points in the two
1032
//    arguments.  Such digit strings are compared numerically instead
1033
//    of lexicographically.  Therefore if you sort by
1034
//    "autodigit_less", some machine names might get sorted as:
1035
//        exaf1
1036
//        exaf2
1037
//        exaf10
1038
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
1039
//    set to true, or the strict version of the other functions),
1040
//    strings that represent equal numbers will not be considered equal if
1041
//    the string representations are not identical.  That is, "01" < "1" in
1042
//    strict mode, but "01" == "1" otherwise.
1043
// ----------------------------------------------------------------------
1044
1045
0
int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) {
1046
0
    int aindex = 0;
1047
0
    int bindex = 0;
1048
0
    while ((aindex < alen) && (bindex < blen)) {
1049
0
        if (isdigit(a[aindex]) && isdigit(b[bindex])) {
1050
            // Compare runs of digits.  Instead of extracting numbers, we
1051
            // just skip leading zeroes, and then get the run-lengths.  This
1052
            // allows us to handle arbitrary precision numbers.  We remember
1053
            // how many zeroes we found so that we can differentiate between
1054
            // "1" and "01" in strict mode.
1055
1056
            // Skip leading zeroes, but remember how many we found
1057
0
            int azeroes = aindex;
1058
0
            int bzeroes = bindex;
1059
0
            while ((aindex < alen) && (a[aindex] == '0')) aindex++;
1060
0
            while ((bindex < blen) && (b[bindex] == '0')) bindex++;
1061
0
            azeroes = aindex - azeroes;
1062
0
            bzeroes = bindex - bzeroes;
1063
1064
            // Count digit lengths
1065
0
            int astart = aindex;
1066
0
            int bstart = bindex;
1067
0
            while ((aindex < alen) && isdigit(a[aindex])) aindex++;
1068
0
            while ((bindex < blen) && isdigit(b[bindex])) bindex++;
1069
0
            if (aindex - astart < bindex - bstart) {
1070
                // a has shorter run of digits: so smaller
1071
0
                return -1;
1072
0
            } else if (aindex - astart > bindex - bstart) {
1073
                // a has longer run of digits: so larger
1074
0
                return 1;
1075
0
            } else {
1076
                // Same lengths, so compare digit by digit
1077
0
                for (int i = 0; i < aindex - astart; i++) {
1078
0
                    if (a[astart + i] < b[bstart + i]) {
1079
0
                        return -1;
1080
0
                    } else if (a[astart + i] > b[bstart + i]) {
1081
0
                        return 1;
1082
0
                    }
1083
0
                }
1084
                // Equal: did one have more leading zeroes?
1085
0
                if (strict && azeroes != bzeroes) {
1086
0
                    if (azeroes > bzeroes) {
1087
                        // a has more leading zeroes: a < b
1088
0
                        return -1;
1089
0
                    } else {
1090
                        // b has more leading zeroes: a > b
1091
0
                        return 1;
1092
0
                    }
1093
0
                }
1094
                // Equal: so continue scanning
1095
0
            }
1096
0
        } else if (a[aindex] < b[bindex]) {
1097
0
            return -1;
1098
0
        } else if (a[aindex] > b[bindex]) {
1099
0
            return 1;
1100
0
        } else {
1101
0
            aindex++;
1102
0
            bindex++;
1103
0
        }
1104
0
    }
1105
1106
0
    if (aindex < alen) {
1107
        // b is prefix of a
1108
0
        return 1;
1109
0
    } else if (bindex < blen) {
1110
        // a is prefix of b
1111
0
        return -1;
1112
0
    } else {
1113
        // a is equal to b
1114
0
        return 0;
1115
0
    }
1116
0
}
1117
1118
0
bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1119
0
    return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
1120
0
}
1121
1122
0
bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1123
0
    return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
1124
0
}
1125
1126
// ----------------------------------------------------------------------
1127
// SimpleDtoa()
1128
// SimpleFtoa()
1129
// DoubleToBuffer()
1130
// FloatToBuffer()
1131
//    We want to print the value without losing precision, but we also do
1132
//    not want to print more digits than necessary.  This turns out to be
1133
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
1134
//    exactly in binary.  If we print 0.2 with a very large precision,
1135
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1136
//    On the other hand, if we set the precision too low, we lose
1137
//    significant digits when printing numbers that actually need them.
1138
//    It turns out there is no precision value that does the right thing
1139
//    for all numbers.
1140
//
1141
//    Our strategy is to first try printing with a precision that is never
1142
//    over-precise, then parse the result with strtod() to see if it
1143
//    matches.  If not, we print again with a precision that will always
1144
//    give a precise result, but may use more digits than necessary.
1145
//
1146
//    An arguably better strategy would be to use the algorithm described
1147
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
1148
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
1149
//    however, that the following implementation is about as fast as
1150
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
1151
//    will not scale well on multi-core machines.  DMG's code is slightly
1152
//    more accurate (in that it will never use more digits than
1153
//    necessary), but this is probably irrelevant for most users.
1154
//
1155
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
1156
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
1157
//    one in that it makes guesses and then uses strtod() to check them.
1158
//    Their implementation is faster because they use their own code to
1159
//    generate the digits in the first place rather than use snprintf(),
1160
//    thus avoiding format string parsing overhead.  However, this makes
1161
//    it considerably more complicated than the following implementation,
1162
//    and it is embedded in a larger library.  If speed turns out to be
1163
//    an issue, we could re-implement this in terms of their
1164
//    implementation.
1165
// ----------------------------------------------------------------------
1166
1167
0
string SimpleDtoa(double value) {
1168
0
    char buffer[kDoubleToBufferSize];
1169
0
    return DoubleToBuffer(value, buffer);
1170
0
}
1171
1172
0
string SimpleFtoa(float value) {
1173
0
    char buffer[kFloatToBufferSize];
1174
0
    return FloatToBuffer(value, buffer);
1175
0
}
1176
1177
0
char* DoubleToBuffer(double value, char* buffer) {
1178
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1179
    // platforms these days.  Just in case some system exists where DBL_DIG
1180
    // is significantly larger -- and risks overflowing our buffer -- we have
1181
    // this assert.
1182
0
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1183
1184
0
    int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1185
1186
    // The snprintf should never overflow because the buffer is significantly
1187
    // larger than the precision we asked for.
1188
0
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1189
1190
0
    if (strtod(buffer, nullptr) != value) {
1191
0
        snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value);
1192
1193
        // Should never overflow; see above.
1194
0
        DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1195
0
    }
1196
0
    return buffer;
1197
0
}
1198
1199
0
char* FloatToBuffer(float value, char* buffer) {
1200
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1201
    // platforms these days.  Just in case some system exists where FLT_DIG
1202
    // is significantly larger -- and risks overflowing our buffer -- we have
1203
    // this assert.
1204
0
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1205
1206
0
    int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1207
1208
    // The snprintf should never overflow because the buffer is significantly
1209
    // larger than the precision we asked for.
1210
0
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1211
1212
0
    float parsed_value;
1213
0
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1214
0
        snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value);
1215
1216
        // Should never overflow; see above.
1217
0
        DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1218
0
    }
1219
0
    return buffer;
1220
0
}
1221
1222
11
int DoubleToBuffer(double value, int width, char* buffer) {
1223
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1224
    // platforms these days.  Just in case some system exists where DBL_DIG
1225
    // is significantly larger -- and risks overflowing our buffer -- we have
1226
    // this assert.
1227
11
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1228
1229
11
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
1230
1231
    // The snprintf should never overflow because the buffer is significantly
1232
    // larger than the precision we asked for.
1233
11
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1234
1235
11
    if (strtod(buffer, nullptr) != value) {
1236
3
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
1237
1238
        // Should never overflow; see above.
1239
3
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1240
3
    }
1241
1242
11
    return snprintf_result;
1243
11
}
1244
1245
42
int FloatToBuffer(float value, int width, char* buffer) {
1246
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1247
    // platforms these days.  Just in case some system exists where FLT_DIG
1248
    // is significantly larger -- and risks overflowing our buffer -- we have
1249
    // this assert.
1250
42
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1251
1252
42
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
1253
1254
    // The snprintf should never overflow because the buffer is significantly
1255
    // larger than the precision we asked for.
1256
42
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1257
1258
42
    float parsed_value;
1259
42
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1260
8
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
1261
1262
        // Should never overflow; see above.
1263
8
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1264
8
    }
1265
1266
42
    return snprintf_result;
1267
42
}
1268
1269
10
int FastDoubleToBuffer(double value, char* buffer) {
1270
10
    auto end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1271
10
    *end = '\0';
1272
10
    return end - buffer;
1273
10
}
1274
1275
10
int FastFloatToBuffer(float value, char* buffer) {
1276
10
    auto* end = fmt::format_to(buffer, FMT_COMPILE("{}"), value);
1277
10
    *end = '\0';
1278
10
    return end - buffer;
1279
10
}
1280
1281
// ----------------------------------------------------------------------
1282
// SimpleItoaWithCommas()
1283
//    Description: converts an integer to a string.
1284
//    Puts commas every 3 spaces.
1285
//    Faster than printf("%d")?
1286
//
1287
//    Return value: string
1288
// ----------------------------------------------------------------------
1289
0
string SimpleItoaWithCommas(int32 i) {
1290
    // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1291
    // Longest is -2,147,483,648.
1292
0
    char local[14];
1293
0
    char* p = local + sizeof(local);
1294
    // Need to use uint32 instead of int32 to correctly handle
1295
    // -2,147,483,648.
1296
0
    uint32 n = i;
1297
0
    if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow
1298
0
    *--p = '0' + n % 10;  // this case deals with the number "0"
1299
0
    n /= 10;
1300
0
    while (n) {
1301
0
        *--p = '0' + n % 10;
1302
0
        n /= 10;
1303
0
        if (n == 0) break;
1304
1305
0
        *--p = '0' + n % 10;
1306
0
        n /= 10;
1307
0
        if (n == 0) break;
1308
1309
0
        *--p = ',';
1310
0
        *--p = '0' + n % 10;
1311
0
        n /= 10;
1312
        // For this unrolling, we check if n == 0 in the main while loop
1313
0
    }
1314
0
    if (i < 0) *--p = '-';
1315
0
    return string(p, local + sizeof(local));
1316
0
}
1317
1318
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1319
// compile.
1320
0
string SimpleItoaWithCommas(uint32 i) {
1321
    // 10 digits and 3 commas are good for 32-bit or smaller ints.
1322
    // Longest is 4,294,967,295.
1323
0
    char local[13];
1324
0
    char* p = local + sizeof(local);
1325
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1326
0
    i /= 10;
1327
0
    while (i) {
1328
0
        *--p = '0' + i % 10;
1329
0
        i /= 10;
1330
0
        if (i == 0) break;
1331
1332
0
        *--p = '0' + i % 10;
1333
0
        i /= 10;
1334
0
        if (i == 0) break;
1335
1336
0
        *--p = ',';
1337
0
        *--p = '0' + i % 10;
1338
0
        i /= 10;
1339
        // For this unrolling, we check if i == 0 in the main while loop
1340
0
    }
1341
0
    return string(p, local + sizeof(local));
1342
0
}
1343
1344
0
string SimpleItoaWithCommas(int64 i) {
1345
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1346
0
    char local[26];
1347
0
    char* p = SimpleItoaWithCommas(i, local, sizeof(local));
1348
0
    return string(p, local + sizeof(local));
1349
0
}
1350
1351
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1352
// compile.
1353
0
string SimpleItoaWithCommas(uint64 i) {
1354
    // 20 digits and 6 commas are good for 64-bit or smaller ints.
1355
    // Longest is 18,446,744,073,709,551,615.
1356
0
    char local[26];
1357
0
    char* p = local + sizeof(local);
1358
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1359
0
    i /= 10;
1360
0
    while (i) {
1361
0
        *--p = '0' + i % 10;
1362
0
        i /= 10;
1363
0
        if (i == 0) break;
1364
1365
0
        *--p = '0' + i % 10;
1366
0
        i /= 10;
1367
0
        if (i == 0) break;
1368
1369
0
        *--p = ',';
1370
0
        *--p = '0' + i % 10;
1371
0
        i /= 10;
1372
        // For this unrolling, we check if i == 0 in the main while loop
1373
0
    }
1374
0
    return string(p, local + sizeof(local));
1375
0
}
1376
1377
8
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
1378
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1379
8
    char* p = buffer + buffer_size;
1380
    // Need to use uint64 instead of int64 to correctly handle
1381
    // -9,223,372,036,854,775,808.
1382
8
    uint64 n = i;
1383
8
    if (i < 0) n = 0 - n;
1384
8
    *--p = '0' + n % 10; // this case deals with the number "0"
1385
8
    n /= 10;
1386
20
    while (n) {
1387
18
        *--p = '0' + n % 10;
1388
18
        n /= 10;
1389
18
        if (n == 0) break;
1390
1391
12
        *--p = '0' + n % 10;
1392
12
        n /= 10;
1393
12
        if (n == 0) break;
1394
1395
12
        *--p = ',';
1396
12
        *--p = '0' + n % 10;
1397
12
        n /= 10;
1398
        // For this unrolling, we check if n == 0 in the main while loop
1399
12
    }
1400
8
    if (i < 0) *--p = '-';
1401
8
    return p;
1402
8
}
1403
1404
17
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
1405
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
1406
17
    char* p = buffer + buffer_size;
1407
    // Need to use uint128 instead of int128 to correctly handle
1408
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
1409
17
    __uint128_t n = i;
1410
17
    if (i < 0) n = 0 - n;
1411
17
    *--p = '0' + n % 10; // this case deals with the number "0"
1412
17
    n /= 10;
1413
45
    while (n) {
1414
38
        *--p = '0' + n % 10;
1415
38
        n /= 10;
1416
38
        if (n == 0) break;
1417
1418
34
        *--p = '0' + n % 10;
1419
34
        n /= 10;
1420
34
        if (n == 0) break;
1421
1422
28
        *--p = ',';
1423
28
        *--p = '0' + n % 10;
1424
28
        n /= 10;
1425
        // For this unrolling, we check if n == 0 in the main while loop
1426
28
    }
1427
17
    if (i < 0) *--p = '-';
1428
17
    return p;
1429
17
}
1430
1431
// ----------------------------------------------------------------------
1432
// ItoaKMGT()
1433
//    Description: converts an integer to a string
1434
//    Truncates values to a readable unit: K, G, M or T
1435
//    Opposite of atoi_kmgt()
1436
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1437
//
1438
//    Return value: string
1439
// ----------------------------------------------------------------------
1440
0
string ItoaKMGT(int64 i) {
1441
0
    const char *sign = "", *suffix = "";
1442
0
    if (i < 0) {
1443
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1444
        // that's OK as this function is only for human readability
1445
0
        if (i == numeric_limits<int64>::min()) i++;
1446
0
        sign = "-";
1447
0
        i = -i;
1448
0
    }
1449
1450
0
    int64 val;
1451
1452
0
    if ((val = (i >> 40)) > 1) {
1453
0
        suffix = "T";
1454
0
    } else if ((val = (i >> 30)) > 1) {
1455
0
        suffix = "G";
1456
0
    } else if ((val = (i >> 20)) > 1) {
1457
0
        suffix = "M";
1458
0
    } else if ((val = (i >> 10)) > 1) {
1459
0
        suffix = "K";
1460
0
    } else {
1461
0
        val = i;
1462
0
    }
1463
1464
0
    return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1465
0
}
1466
1467
0
string AccurateItoaKMGT(int64 i) {
1468
0
    const char* sign = "";
1469
0
    if (i < 0) {
1470
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1471
        // that's OK as this function is only for human readability
1472
0
        if (i == numeric_limits<int64>::min()) i++;
1473
0
        sign = "-";
1474
0
        i = -i;
1475
0
    }
1476
1477
0
    string ret = StringPrintf("%s", sign);
1478
0
    int64 val;
1479
0
    if ((val = (i >> 40)) > 1) {
1480
0
        ret += StringPrintf("%" PRId64
1481
0
                            "%s"
1482
0
                            ",",
1483
0
                            val, "T");
1484
0
        i = i - (val << 40);
1485
0
    }
1486
0
    if ((val = (i >> 30)) > 1) {
1487
0
        ret += StringPrintf("%" PRId64
1488
0
                            "%s"
1489
0
                            ",",
1490
0
                            val, "G");
1491
0
        i = i - (val << 30);
1492
0
    }
1493
0
    if ((val = (i >> 20)) > 1) {
1494
0
        ret += StringPrintf("%" PRId64
1495
0
                            "%s"
1496
0
                            ",",
1497
0
                            val, "M");
1498
0
        i = i - (val << 20);
1499
0
    }
1500
0
    if ((val = (i >> 10)) > 1) {
1501
0
        ret += StringPrintf("%" PRId64 "%s", val, "K");
1502
0
        i = i - (val << 10);
1503
0
    } else {
1504
0
        ret += StringPrintf("%" PRId64 "%s", i, "K");
1505
0
    }
1506
1507
0
    return ret;
1508
0
}
1509
1510
// DEPRECATED(wadetregaskis).
1511
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1512
1513
0
string FloatToString(float f, const char* format) {
1514
0
    return StringPrintf(format, f);
1515
0
}
1516
1517
0
string IntToString(int i, const char* format) {
1518
0
    return StringPrintf(format, i);
1519
0
}
1520
1521
0
string Int64ToString(int64 i64, const char* format) {
1522
0
    return StringPrintf(format, i64);
1523
0
}
1524
1525
0
string UInt64ToString(uint64 ui64, const char* format) {
1526
0
    return StringPrintf(format, ui64);
1527
0
}