Coverage Report

Created: 2024-11-18 10:37

/root/doris/be/src/gutil/strings/numbers.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2010 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.cc
3
//
4
// This file contains string processing functions related to
5
// numeric values.
6
7
#include "gutil/strings/numbers.h"
8
9
#include <assert.h>
10
#include <ctype.h>
11
#include <errno.h>
12
#include <float.h> // for DBL_DIG and FLT_DIG
13
#include <math.h>  // for HUGE_VAL
14
#include <stdio.h>
15
#include <stdlib.h>
16
#include <string.h>
17
#include <inttypes.h>
18
#include <sys/types.h>
19
#include <limits>
20
#include <ostream>
21
22
using std::numeric_limits;
23
#include <string>
24
25
using std::string;
26
27
#include "common/logging.h"
28
#include <fmt/format.h>
29
30
#include "dragonbox/dragonbox_to_chars.h"
31
#include "gutil/gscoped_ptr.h"
32
#include "gutil/int128.h"
33
#include "gutil/integral_types.h"
34
#include "gutil/stringprintf.h"
35
#include "gutil/strings/ascii_ctype.h"
36
#include "gutil/strtoint.h"
37
38
// Reads a <double> in *text, which may not be whitespace-initiated.
39
// *len is the length, or -1 if text is '\0'-terminated, which is more
40
// efficient.  Sets *text to the end of the double, and val to the
41
// converted value, and the length of the double is subtracted from
42
// *len. <double> may also be a '?', in which case val will be
43
// unchanged. Returns true upon success.  If initial_minus is
44
// non-NULL, then *initial_minus will indicate whether the first
45
// symbol seen was a '-', which will be ignored. Similarly, if
46
// final_period is non-NULL, then *final_period will indicate whether
47
// the last symbol seen was a '.', which will be ignored. This is
48
// useful in case that an initial '-' or final '.' would have another
49
// meaning (as a separator, e.g.).
50
static inline bool EatADouble(const char** text, int* len, bool allow_question, double* val,
51
0
                              bool* initial_minus, bool* final_period) {
52
0
    const char* pos = *text;
53
0
    int rem = *len; // remaining length, or -1 if null-terminated
54
55
0
    if (pos == nullptr || rem == 0) return false;
56
57
0
    if (allow_question && (*pos == '?')) {
58
0
        *text = pos + 1;
59
0
        if (rem != -1) *len = rem - 1;
60
0
        return true;
61
0
    }
62
63
0
    if (initial_minus) {
64
0
        if ((*initial_minus = (*pos == '-'))) { // Yes, we want assignment.
65
0
            if (rem == 1) return false;
66
0
            ++pos;
67
0
            if (rem != -1) --rem;
68
0
        }
69
0
    }
70
71
    // a double has to begin one of these (we don't allow 'inf' or whitespace)
72
    // this also serves as an optimization.
73
0
    if (!strchr("-+.0123456789", *pos)) return false;
74
75
    // strtod is evil in that the second param is a non-const char**
76
0
    char* end_nonconst;
77
0
    double retval;
78
0
    if (rem == -1) {
79
0
        retval = strtod(pos, &end_nonconst);
80
0
    } else {
81
        // not '\0'-terminated & no obvious terminator found. must copy.
82
0
        gscoped_array<char> buf(new char[rem + 1]);
83
0
        memcpy(buf.get(), pos, rem);
84
0
        buf[rem] = '\0';
85
0
        retval = strtod(buf.get(), &end_nonconst);
86
0
        end_nonconst = const_cast<char*>(pos) + (end_nonconst - buf.get());
87
0
    }
88
89
0
    if (pos == end_nonconst) return false;
90
91
0
    if (final_period) {
92
0
        *final_period = (end_nonconst[-1] == '.');
93
0
        if (*final_period) {
94
0
            --end_nonconst;
95
0
        }
96
0
    }
97
98
0
    *text = end_nonconst;
99
0
    *val = retval;
100
0
    if (rem != -1) *len = rem - (end_nonconst - pos);
101
0
    return true;
102
0
}
103
104
// If update, consume one of acceptable_chars from string *text of
105
// length len and return that char, or '\0' otherwise. If len is -1,
106
// *text is null-terminated. If update is false, don't alter *text and
107
// *len. If null_ok, then update must be false, and, if text has no
108
// more chars, then return '\1' (arbitrary nonzero).
109
static inline char EatAChar(const char** text, int* len, const char* acceptable_chars, bool update,
110
0
                            bool null_ok) {
111
0
    assert(!(update && null_ok));
112
0
    if ((*len == 0) || (**text == '\0'))
113
0
        return (null_ok ? '\1' : '\0'); // if null_ok, we're in predicate mode.
114
115
0
    if (strchr(acceptable_chars, **text)) {
116
0
        char result = **text;
117
0
        if (update) {
118
0
            ++(*text);
119
0
            if (*len != -1) --(*len);
120
0
        }
121
0
        return result;
122
0
    }
123
124
0
    return '\0'; // no match; no update
125
0
}
126
127
// Parse an expression in 'text' of the form: <comparator><double> or
128
// <double><sep><double> See full comments in header file.
129
bool ParseDoubleRange(const char* text, int len, const char** end, double* from, double* to,
130
0
                      bool* is_currency, const DoubleRangeOptions& opts) {
131
0
    const double from_default = opts.dont_modify_unbounded ? *from : -HUGE_VAL;
132
133
0
    if (!opts.dont_modify_unbounded) {
134
0
        *from = -HUGE_VAL;
135
0
        *to = HUGE_VAL;
136
0
    }
137
0
    if (opts.allow_currency && (is_currency != nullptr)) *is_currency = false;
138
139
0
    assert(len >= -1);
140
0
    assert(opts.separators && (*opts.separators != '\0'));
141
    // these aren't valid separators
142
0
    assert(strlen(opts.separators) == strcspn(opts.separators, "+0123456789eE$"));
143
0
    assert(opts.num_required_bounds <= 2);
144
145
    // Handle easier cases of comparators (<, >) first
146
0
    if (opts.allow_comparators) {
147
0
        char comparator = EatAChar(&text, &len, "<>", true, false);
148
0
        if (comparator) {
149
0
            double* dest = (comparator == '>') ? from : to;
150
0
            EatAChar(&text, &len, "=", true, false);
151
0
            if (opts.allow_currency && EatAChar(&text, &len, "$", true, false))
152
0
                if (is_currency != nullptr) *is_currency = true;
153
0
            if (!EatADouble(&text, &len, opts.allow_unbounded_markers, dest, nullptr, nullptr))
154
0
                return false;
155
0
            *end = text;
156
0
            return EatAChar(&text, &len, opts.acceptable_terminators, false,
157
0
                            opts.null_terminator_ok);
158
0
        }
159
0
    }
160
161
0
    bool seen_dollar = (opts.allow_currency && EatAChar(&text, &len, "$", true, false));
162
163
    // If we see a '-', two things could be happening: -<to> or
164
    // <from>... where <from> is negative. Treat initial minus sign as a
165
    // separator if '-' is a valid separator.
166
    // Similarly, we prepare for the possibility of seeing a '.' at the
167
    // end of the number, in case '.' (which really means '..') is a
168
    // separator.
169
0
    bool initial_minus_sign = false;
170
0
    bool final_period = false;
171
0
    bool* check_initial_minus =
172
0
            (strchr(opts.separators, '-') && !seen_dollar && (opts.num_required_bounds < 2))
173
0
                    ? (&initial_minus_sign)
174
0
                    : nullptr;
175
0
    bool* check_final_period = strchr(opts.separators, '.') ? (&final_period) : nullptr;
176
0
    bool double_seen = EatADouble(&text, &len, opts.allow_unbounded_markers, from,
177
0
                                  check_initial_minus, check_final_period);
178
179
    // if 2 bounds required, must see a double (or '?' if allowed)
180
0
    if ((opts.num_required_bounds == 2) && !double_seen) return false;
181
182
0
    if (seen_dollar && !double_seen) {
183
0
        --text;
184
0
        if (len != -1) ++len;
185
0
        seen_dollar = false;
186
0
    }
187
    // If we're here, we've read the first double and now expect a
188
    // separator and another <double>.
189
0
    char separator = EatAChar(&text, &len, opts.separators, true, false);
190
0
    if (separator == '.') {
191
        // seen one '.' as separator; must check for another; perhaps set seplen=2
192
0
        if (EatAChar(&text, &len, ".", true, false)) {
193
0
            if (final_period) {
194
                // We may have three periods in a row. The first is part of the
195
                // first number, the others are a separator. Policy: 234...567
196
                // is "234." to "567", not "234" to ".567".
197
0
                EatAChar(&text, &len, ".", true, false);
198
0
            }
199
0
        } else if (!EatAChar(&text, &len, opts.separators, true, false)) {
200
            // just one '.' and no other separator; uneat the first '.' we saw
201
0
            --text;
202
0
            if (len != -1) ++len;
203
0
            separator = '\0';
204
0
        }
205
0
    }
206
    // By now, we've consumed whatever separator there may have been,
207
    // and separator is true iff there was one.
208
0
    if (!separator) {
209
0
        if (final_period) // final period now considered part of first double
210
0
            EatAChar(&text, &len, ".", true, false);
211
0
        if (initial_minus_sign && double_seen) {
212
0
            *to = *from;
213
0
            *from = from_default;
214
0
        } else if (opts.require_separator || (opts.num_required_bounds > 0 && !double_seen) ||
215
0
                   (opts.num_required_bounds > 1)) {
216
0
            return false;
217
0
        }
218
0
    } else {
219
0
        if (initial_minus_sign && double_seen) *from = -(*from);
220
        // read second <double>
221
0
        bool second_dollar_seen = (seen_dollar || (opts.allow_currency && !double_seen)) &&
222
0
                                  EatAChar(&text, &len, "$", true, false);
223
0
        bool second_double_seen =
224
0
                EatADouble(&text, &len, opts.allow_unbounded_markers, to, nullptr, nullptr);
225
0
        if (opts.num_required_bounds > double_seen + second_double_seen) return false;
226
0
        if (second_dollar_seen && !second_double_seen) {
227
0
            --text;
228
0
            if (len != -1) ++len;
229
0
            second_dollar_seen = false;
230
0
        }
231
0
        seen_dollar = seen_dollar || second_dollar_seen;
232
0
    }
233
234
0
    if (seen_dollar && (is_currency != nullptr)) *is_currency = true;
235
    // We're done. But we have to check that the next char is a proper
236
    // terminator.
237
0
    *end = text;
238
0
    char terminator =
239
0
            EatAChar(&text, &len, opts.acceptable_terminators, false, opts.null_terminator_ok);
240
0
    if (terminator == '.') --(*end);
241
0
    return terminator;
242
0
}
243
244
// ----------------------------------------------------------------------
245
// ConsumeStrayLeadingZeroes
246
//    Eliminates all leading zeroes (unless the string itself is composed
247
//    of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
248
// --------------------------------------------------------------------
249
250
0
void ConsumeStrayLeadingZeroes(string* const str) {
251
0
    const string::size_type len(str->size());
252
0
    if (len > 1 && (*str)[0] == '0') {
253
0
        const char *const begin(str->c_str()), *const end(begin + len), *ptr(begin + 1);
254
0
        while (ptr != end && *ptr == '0') {
255
0
            ++ptr;
256
0
        }
257
0
        string::size_type remove(ptr - begin);
258
0
        DCHECK_GT(ptr, begin);
259
0
        if (remove == len) {
260
0
            --remove; // if they are all zero, leave one...
261
0
        }
262
0
        str->erase(0, remove);
263
0
    }
264
0
}
265
266
// ----------------------------------------------------------------------
267
// ParseLeadingInt32Value()
268
// ParseLeadingUInt32Value()
269
//    A simple parser for [u]int32 values. Returns the parsed value
270
//    if a valid value is found; else returns deflt
271
//    This cannot handle decimal numbers with leading 0s.
272
// --------------------------------------------------------------------
273
274
0
int32 ParseLeadingInt32Value(const char* str, int32 deflt) {
275
0
    char* error = nullptr;
276
0
    long value = strtol(str, &error, 0);
277
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
278
0
    if (value > numeric_limits<int32>::max()) {
279
0
        value = numeric_limits<int32>::max();
280
0
    } else if (value < numeric_limits<int32>::min()) {
281
0
        value = numeric_limits<int32>::min();
282
0
    }
283
0
    return (error == str) ? deflt : value;
284
0
}
285
286
0
uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt) {
287
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
288
        // When long is 32 bits, we can use strtoul.
289
0
        char* error = nullptr;
290
0
        const uint32 value = strtoul(str, &error, 0);
291
0
        return (error == str) ? deflt : value;
292
0
    } else {
293
        // When long is 64 bits, we must use strto64 and handle limits
294
        // by hand.  The reason we cannot use a 64-bit strtoul is that
295
        // it would be impossible to differentiate "-2" (that should wrap
296
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
297
        // (that should be pegged to UINT_MAX due to overflow).
298
0
        char* error = nullptr;
299
0
        int64 value = strto64(str, &error, 0);
300
0
        if (value > numeric_limits<uint32>::max() ||
301
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
302
0
            value = numeric_limits<uint32>::max();
303
0
        }
304
        // Within these limits, truncation to 32 bits handles negatives correctly.
305
0
        return (error == str) ? deflt : value;
306
0
    }
307
0
}
308
309
// ----------------------------------------------------------------------
310
// ParseLeadingDec32Value
311
// ParseLeadingUDec32Value
312
//    A simple parser for [u]int32 values. Returns the parsed value
313
//    if a valid value is found; else returns deflt
314
//    The string passed in is treated as *10 based*.
315
//    This can handle strings with leading 0s.
316
// --------------------------------------------------------------------
317
318
0
int32 ParseLeadingDec32Value(const char* str, int32 deflt) {
319
0
    char* error = nullptr;
320
0
    long value = strtol(str, &error, 10);
321
    // Limit long values to int32 min/max.  Needed for lp64; no-op on 32 bits.
322
0
    if (value > numeric_limits<int32>::max()) {
323
0
        value = numeric_limits<int32>::max();
324
0
    } else if (value < numeric_limits<int32>::min()) {
325
0
        value = numeric_limits<int32>::min();
326
0
    }
327
0
    return (error == str) ? deflt : value;
328
0
}
329
330
0
uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt) {
331
0
    if (numeric_limits<unsigned long>::max() == numeric_limits<uint32>::max()) {
332
        // When long is 32 bits, we can use strtoul.
333
0
        char* error = nullptr;
334
0
        const uint32 value = strtoul(str, &error, 10);
335
0
        return (error == str) ? deflt : value;
336
0
    } else {
337
        // When long is 64 bits, we must use strto64 and handle limits
338
        // by hand.  The reason we cannot use a 64-bit strtoul is that
339
        // it would be impossible to differentiate "-2" (that should wrap
340
        // around to the value UINT_MAX-1) from a string with ULONG_MAX-1
341
        // (that should be pegged to UINT_MAX due to overflow).
342
0
        char* error = nullptr;
343
0
        int64 value = strto64(str, &error, 10);
344
0
        if (value > numeric_limits<uint32>::max() ||
345
0
            value < -static_cast<int64>(numeric_limits<uint32>::max())) {
346
0
            value = numeric_limits<uint32>::max();
347
0
        }
348
        // Within these limits, truncation to 32 bits handles negatives correctly.
349
0
        return (error == str) ? deflt : value;
350
0
    }
351
0
}
352
353
// ----------------------------------------------------------------------
354
// ParseLeadingUInt64Value
355
// ParseLeadingInt64Value
356
// ParseLeadingHex64Value
357
//    A simple parser for 64-bit values. Returns the parsed value if a
358
//    valid integer is found; else returns deflt
359
//    UInt64 and Int64 cannot handle decimal numbers with leading 0s.
360
// --------------------------------------------------------------------
361
0
uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt) {
362
0
    char* error = nullptr;
363
0
    const uint64 value = strtou64(str, &error, 0);
364
0
    return (error == str) ? deflt : value;
365
0
}
366
367
0
int64 ParseLeadingInt64Value(const char* str, int64 deflt) {
368
0
    char* error = nullptr;
369
0
    const int64 value = strto64(str, &error, 0);
370
0
    return (error == str) ? deflt : value;
371
0
}
372
373
0
uint64 ParseLeadingHex64Value(const char* str, uint64 deflt) {
374
0
    char* error = nullptr;
375
0
    const uint64 value = strtou64(str, &error, 16);
376
0
    return (error == str) ? deflt : value;
377
0
}
378
379
// ----------------------------------------------------------------------
380
// ParseLeadingDec64Value
381
// ParseLeadingUDec64Value
382
//    A simple parser for [u]int64 values. Returns the parsed value
383
//    if a valid value is found; else returns deflt
384
//    The string passed in is treated as *10 based*.
385
//    This can handle strings with leading 0s.
386
// --------------------------------------------------------------------
387
388
0
int64 ParseLeadingDec64Value(const char* str, int64 deflt) {
389
0
    char* error = nullptr;
390
0
    const int64 value = strto64(str, &error, 10);
391
0
    return (error == str) ? deflt : value;
392
0
}
393
394
0
uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt) {
395
0
    char* error = nullptr;
396
0
    const uint64 value = strtou64(str, &error, 10);
397
0
    return (error == str) ? deflt : value;
398
0
}
399
400
// ----------------------------------------------------------------------
401
// ParseLeadingDoubleValue()
402
//    A simple parser for double values. Returns the parsed value
403
//    if a valid value is found; else returns deflt
404
// --------------------------------------------------------------------
405
406
0
double ParseLeadingDoubleValue(const char* str, double deflt) {
407
0
    char* error = nullptr;
408
0
    errno = 0;
409
0
    const double value = strtod(str, &error);
410
0
    if (errno != 0 ||   // overflow/underflow happened
411
0
        error == str) { // no valid parse
412
0
        return deflt;
413
0
    } else {
414
0
        return value;
415
0
    }
416
0
}
417
418
// ----------------------------------------------------------------------
419
// ParseLeadingBoolValue()
420
//    A recognizer of boolean string values. Returns the parsed value
421
//    if a valid value is found; else returns deflt.  This skips leading
422
//    whitespace, is case insensitive, and recognizes these forms:
423
//    0/1, false/true, no/yes, n/y
424
// --------------------------------------------------------------------
425
0
bool ParseLeadingBoolValue(const char* str, bool deflt) {
426
0
    static const int kMaxLen = 5;
427
0
    char value[kMaxLen + 1];
428
    // Skip whitespace
429
0
    while (ascii_isspace(*str)) {
430
0
        ++str;
431
0
    }
432
0
    int len = 0;
433
0
    for (; len <= kMaxLen && ascii_isalnum(*str); ++str) value[len++] = ascii_tolower(*str);
434
0
    if (len == 0 || len > kMaxLen) return deflt;
435
0
    value[len] = '\0';
436
0
    switch (len) {
437
0
    case 1:
438
0
        if (value[0] == '0' || value[0] == 'n') return false;
439
0
        if (value[0] == '1' || value[0] == 'y') return true;
440
0
        break;
441
0
    case 2:
442
0
        if (!strcmp(value, "no")) return false;
443
0
        break;
444
0
    case 3:
445
0
        if (!strcmp(value, "yes")) return true;
446
0
        break;
447
0
    case 4:
448
0
        if (!strcmp(value, "true")) return true;
449
0
        break;
450
0
    case 5:
451
0
        if (!strcmp(value, "false")) return false;
452
0
        break;
453
0
    }
454
0
    return deflt;
455
0
}
456
457
// ----------------------------------------------------------------------
458
// Uint64ToString()
459
// FloatToString()
460
// IntToString()
461
//    Convert various types to their string representation, possibly padded
462
//    with spaces, using snprintf format specifiers.
463
// ----------------------------------------------------------------------
464
465
0
string Uint64ToString(uint64 fp) {
466
0
    char buf[17];
467
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, fp);
468
0
    return string(buf);
469
0
}
470
471
// Default arguments
472
0
string Uint128ToHexString(uint128 ui128) {
473
0
    char buf[33];
474
0
    snprintf(buf, sizeof(buf), "%016" PRIx64, Uint128High64(ui128));
475
0
    snprintf(buf + 16, sizeof(buf) - 16, "%016" PRIx64, Uint128Low64(ui128));
476
0
    return string(buf);
477
0
}
478
479
namespace {
480
481
// Represents integer values of digits.
482
// Uses 36 to indicate an invalid character since we support
483
// bases up to 36.
484
static const int8 kAsciiToInt[256] = {
485
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
486
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
487
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  36, 36,
488
        36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
489
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16,
490
        17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36,
491
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
492
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
493
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
494
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
495
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
496
        36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
497
498
// Input format based on POSIX.1-2008 strtol
499
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
500
template <typename IntType>
501
0
bool safe_int_internal(const char* start, const char* end, int base, IntType* value_p) {
502
    // Consume whitespace.
503
0
    while (start < end && ascii_isspace(start[0])) {
504
0
        ++start;
505
0
    }
506
0
    while (start < end && ascii_isspace(end[-1])) {
507
0
        --end;
508
0
    }
509
0
    if (start >= end) {
510
0
        return false;
511
0
    }
512
513
    // Consume sign.
514
0
    const bool negative = (start[0] == '-');
515
0
    if (negative || start[0] == '+') {
516
0
        ++start;
517
0
        if (start >= end) {
518
0
            return false;
519
0
        }
520
0
    }
521
522
    // Consume base-dependent prefix.
523
    //  base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
524
    //  base 16: "0x" -> base 16
525
    // Also validate the base.
526
0
    if (base == 0) {
527
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
528
0
            base = 16;
529
0
            start += 2;
530
0
        } else if (end - start >= 1 && start[0] == '0') {
531
0
            base = 8;
532
0
            start += 1;
533
0
        } else {
534
0
            base = 10;
535
0
        }
536
0
    } else if (base == 16) {
537
0
        if (end - start >= 2 && start[0] == '0' && (start[1] == 'x' || start[1] == 'X')) {
538
0
            start += 2;
539
0
        }
540
0
    } else if (base >= 2 && base <= 36) {
541
        // okay
542
0
    } else {
543
0
        return false;
544
0
    }
545
546
    // Consume digits.
547
    //
548
    // The classic loop:
549
    //
550
    //   for each digit
551
    //     value = value * base + digit
552
    //   value *= sign
553
    //
554
    // The classic loop needs overflow checking.  It also fails on the most
555
    // negative integer, -2147483648 in 32-bit two's complement representation.
556
    //
557
    // My improved loop:
558
    //
559
    //  if (!negative)
560
    //    for each digit
561
    //      value = value * base
562
    //      value = value + digit
563
    //  else
564
    //    for each digit
565
    //      value = value * base
566
    //      value = value - digit
567
    //
568
    // Overflow checking becomes simple.
569
    //
570
    // I present the positive code first for easier reading.
571
0
    IntType value = 0;
572
0
    if (!negative) {
573
0
        const IntType vmax = std::numeric_limits<IntType>::max();
574
0
        assert(vmax > 0);
575
0
        assert(vmax >= base);
576
0
        const IntType vmax_over_base = vmax / base;
577
        // loop over digits
578
        // loop body is interleaved for perf, not readability
579
0
        for (; start < end; ++start) {
580
0
            unsigned char c = static_cast<unsigned char>(start[0]);
581
0
            int digit = kAsciiToInt[c];
582
0
            if (value > vmax_over_base) return false;
583
0
            value *= base;
584
0
            if (digit >= base) return false;
585
0
            if (value > vmax - digit) return false;
586
0
            value += digit;
587
0
        }
588
0
    } else {
589
0
        const IntType vmin = std::numeric_limits<IntType>::min();
590
0
        assert(vmin < 0);
591
0
        assert(vmin <= 0 - base);
592
0
        IntType vmin_over_base = vmin / base;
593
        // 2003 c++ standard [expr.mul]
594
        // "... the sign of the remainder is implementation-defined."
595
        // Although (vmin/base)*base + vmin%base is always vmin.
596
        // 2011 c++ standard tightens the spec but we cannot rely on it.
597
0
        if (vmin % base > 0) {
598
0
            vmin_over_base += 1;
599
0
        }
600
        // loop over digits
601
        // loop body is interleaved for perf, not readability
602
0
        for (; start < end; ++start) {
603
0
            unsigned char c = static_cast<unsigned char>(start[0]);
604
0
            int digit = kAsciiToInt[c];
605
0
            if (value < vmin_over_base) return false;
606
0
            value *= base;
607
0
            if (digit >= base) return false;
608
0
            if (value < vmin + digit) return false;
609
0
            value -= digit;
610
0
        }
611
0
    }
612
613
    // Store output.
614
0
    *value_p = value;
615
0
    return true;
616
0
}
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIiEEbPKcS2_iPT_
Unexecuted instantiation: numbers.cc:_ZN12_GLOBAL__N_117safe_int_internalIlEEbPKcS2_iPT_
617
618
} // anonymous namespace
619
620
0
bool safe_strto32_base(const char* startptr, const int buffer_size, int32* v, int base) {
621
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, base, v);
622
0
}
623
624
0
bool safe_strto64_base(const char* startptr, const int buffer_size, int64* v, int base) {
625
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, base, v);
626
0
}
627
628
0
bool safe_strto32(const char* startptr, const int buffer_size, int32* value) {
629
0
    return safe_int_internal<int32>(startptr, startptr + buffer_size, 10, value);
630
0
}
631
632
0
bool safe_strto64(const char* startptr, const int buffer_size, int64* value) {
633
0
    return safe_int_internal<int64>(startptr, startptr + buffer_size, 10, value);
634
0
}
635
636
0
bool safe_strto32_base(const char* str, int32* value, int base) {
637
0
    char* endptr;
638
0
    errno = 0; // errno only gets set on errors
639
0
    *value = strto32(str, &endptr, base);
640
0
    if (endptr != str) {
641
0
        while (ascii_isspace(*endptr)) ++endptr;
642
0
    }
643
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
644
0
}
645
646
0
bool safe_strto64_base(const char* str, int64* value, int base) {
647
0
    char* endptr;
648
0
    errno = 0; // errno only gets set on errors
649
0
    *value = strto64(str, &endptr, base);
650
0
    if (endptr != str) {
651
0
        while (ascii_isspace(*endptr)) ++endptr;
652
0
    }
653
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
654
0
}
655
656
0
bool safe_strtou32_base(const char* str, uint32* value, int base) {
657
    // strtoul does not give any errors on negative numbers, so we have to
658
    // search the string for '-' manually.
659
0
    while (ascii_isspace(*str)) ++str;
660
0
    if (*str == '-') return false;
661
662
0
    char* endptr;
663
0
    errno = 0; // errno only gets set on errors
664
0
    *value = strtou32(str, &endptr, base);
665
0
    if (endptr != str) {
666
0
        while (ascii_isspace(*endptr)) ++endptr;
667
0
    }
668
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
669
0
}
670
671
0
bool safe_strtou64_base(const char* str, uint64* value, int base) {
672
    // strtou64 does not give any errors on negative numbers, so we have to
673
    // search the string for '-' manually.
674
0
    while (ascii_isspace(*str)) ++str;
675
0
    if (*str == '-') return false;
676
677
0
    char* endptr;
678
0
    errno = 0; // errno only gets set on errors
679
0
    *value = strtou64(str, &endptr, base);
680
0
    if (endptr != str) {
681
0
        while (ascii_isspace(*endptr)) ++endptr;
682
0
    }
683
0
    return *str != '\0' && *endptr == '\0' && errno == 0;
684
0
}
685
686
// ----------------------------------------------------------------------
687
// u64tostr_base36()
688
//    Converts unsigned number to string representation in base-36.
689
// --------------------------------------------------------------------
690
0
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer) {
691
0
    CHECK_GT(buf_size, 0);
692
0
    CHECK(buffer);
693
0
    static const char kAlphabet[] = "0123456789abcdefghijklmnopqrstuvwxyz";
694
695
0
    buffer[buf_size - 1] = '\0';
696
0
    size_t result_size = 1;
697
698
0
    do {
699
0
        if (buf_size == result_size) { // Ran out of space.
700
0
            return 0;
701
0
        }
702
0
        int remainder = number % 36;
703
0
        number /= 36;
704
0
        buffer[buf_size - result_size - 1] = kAlphabet[remainder];
705
0
        result_size++;
706
0
    } while (number);
707
708
0
    memmove(buffer, buffer + buf_size - result_size, result_size);
709
710
0
    return result_size - 1;
711
0
}
712
713
// Generate functions that wrap safe_strtoXXX_base.
714
#define GEN_SAFE_STRTO(name, type)                                                  \
715
0
    bool name##_base(const string& str, type* value, int base) {                    \
716
0
        return name##_base(str.c_str(), value, base);                               \
717
0
    }                                                                               \
Unexecuted instantiation: _Z17safe_strto32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPii
Unexecuted instantiation: _Z18safe_strtou32_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPji
Unexecuted instantiation: _Z17safe_strto64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPli
Unexecuted instantiation: _Z18safe_strtou64_baseRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPmi
718
0
    bool name(const char* str, type* value) { return name##_base(str, value, 10); } \
Unexecuted instantiation: _Z12safe_strto32PKcPi
Unexecuted instantiation: _Z13safe_strtou32PKcPj
Unexecuted instantiation: _Z12safe_strto64PKcPl
Unexecuted instantiation: _Z13safe_strtou64PKcPm
719
0
    bool name(const string& str, type* value) { return name##_base(str.c_str(), value, 10); }
Unexecuted instantiation: _Z12safe_strto32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPi
Unexecuted instantiation: _Z13safe_strtou32RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPj
Unexecuted instantiation: _Z12safe_strto64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPl
Unexecuted instantiation: _Z13safe_strtou64RKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPm
720
GEN_SAFE_STRTO(safe_strto32, int32);
721
GEN_SAFE_STRTO(safe_strtou32, uint32);
722
GEN_SAFE_STRTO(safe_strto64, int64);
723
GEN_SAFE_STRTO(safe_strtou64, uint64);
724
#undef GEN_SAFE_STRTO
725
726
42
bool safe_strtof(const char* str, float* value) {
727
42
    char* endptr;
728
#ifdef _MSC_VER // has no strtof()
729
    *value = strtod(str, &endptr);
730
#else
731
42
    *value = strtof(str, &endptr);
732
42
#endif
733
42
    if (endptr != str) {
734
42
        while (ascii_isspace(*endptr)) ++endptr;
735
42
    }
736
    // Ignore range errors from strtod/strtof.
737
    // The values it returns on underflow and
738
    // overflow are the right fallback in a
739
    // robust setting.
740
42
    return *str != '\0' && *endptr == '\0';
741
42
}
742
743
0
bool safe_strtod(const char* str, double* value) {
744
0
    char* endptr;
745
0
    *value = strtod(str, &endptr);
746
0
    if (endptr != str) {
747
0
        while (ascii_isspace(*endptr)) ++endptr;
748
0
    }
749
    // Ignore range errors from strtod.  The values it
750
    // returns on underflow and overflow are the right
751
    // fallback in a robust setting.
752
0
    return *str != '\0' && *endptr == '\0';
753
0
}
754
755
0
bool safe_strtof(const string& str, float* value) {
756
0
    return safe_strtof(str.c_str(), value);
757
0
}
758
759
0
bool safe_strtod(const string& str, double* value) {
760
0
    return safe_strtod(str.c_str(), value);
761
0
}
762
763
0
uint64 atoi_kmgt(const char* s) {
764
0
    char* endptr;
765
0
    uint64 n = strtou64(s, &endptr, 10);
766
0
    uint64 scale = 1;
767
0
    char c = *endptr;
768
0
    if (c != '\0') {
769
0
        c = ascii_toupper(c);
770
0
        switch (c) {
771
0
        case 'K':
772
0
            scale = GG_ULONGLONG(1) << 10;
773
0
            break;
774
0
        case 'M':
775
0
            scale = GG_ULONGLONG(1) << 20;
776
0
            break;
777
0
        case 'G':
778
0
            scale = GG_ULONGLONG(1) << 30;
779
0
            break;
780
0
        case 'T':
781
0
            scale = GG_ULONGLONG(1) << 40;
782
0
            break;
783
0
        default:
784
0
            LOG(FATAL) << "Invalid mnemonic: `" << c << "';"
785
0
                       << " should be one of `K', `M', `G', and `T'.";
786
0
        }
787
0
    }
788
0
    return n * scale;
789
0
}
790
791
// ----------------------------------------------------------------------
792
// FastIntToBuffer()
793
// FastInt64ToBuffer()
794
// FastHexToBuffer()
795
// FastHex64ToBuffer()
796
// FastHex32ToBuffer()
797
// FastTimeToBuffer()
798
//    These are intended for speed.  FastHexToBuffer() assumes the
799
//    integer is non-negative.  FastHexToBuffer() puts output in
800
//    hex rather than decimal.  FastTimeToBuffer() puts the output
801
//    into RFC822 format.  If time is 0, uses the current time.
802
//
803
//    FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
804
//    padded to exactly 16 bytes (plus one byte for '\0')
805
//
806
//    FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
807
//    padded to exactly 8 bytes (plus one byte for '\0')
808
//
809
//       All functions take the output buffer as an arg.  FastInt()
810
//    uses at most 22 bytes, FastTime() uses exactly 30 bytes.
811
//    They all return a pointer to the beginning of the output,
812
//    which may not be the beginning of the input buffer.  (Though
813
//    for FastTimeToBuffer(), we guarantee that it is.)
814
// ----------------------------------------------------------------------
815
816
0
char* FastInt64ToBuffer(int64 i, char* buffer) {
817
0
    FastInt64ToBufferLeft(i, buffer);
818
0
    return buffer;
819
0
}
820
821
0
char* FastInt32ToBuffer(int32 i, char* buffer) {
822
0
    FastInt32ToBufferLeft(i, buffer);
823
0
    return buffer;
824
0
}
825
826
0
char* FastHexToBuffer(int i, char* buffer) {
827
0
    CHECK_GE(i, 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
828
829
0
    static const char* hexdigits = "0123456789abcdef";
830
0
    char* p = buffer + 21;
831
0
    *p-- = '\0';
832
0
    do {
833
0
        *p-- = hexdigits[i & 15]; // mod by 16
834
0
        i >>= 4;                  // divide by 16
835
0
    } while (i > 0);
836
0
    return p + 1;
837
0
}
838
839
0
char* InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
840
0
    static const char* hexdigits = "0123456789abcdef";
841
0
    buffer[num_byte] = '\0';
842
0
    for (int i = num_byte - 1; i >= 0; i--) {
843
0
        buffer[i] = hexdigits[value & 0xf];
844
0
        value >>= 4;
845
0
    }
846
0
    return buffer;
847
0
}
848
849
0
char* FastHex64ToBuffer(uint64 value, char* buffer) {
850
0
    return InternalFastHexToBuffer(value, buffer, 16);
851
0
}
852
853
0
char* FastHex32ToBuffer(uint32 value, char* buffer) {
854
0
    return InternalFastHexToBuffer(value, buffer, 8);
855
0
}
856
857
// TODO(user): revisit the two_ASCII_digits optimization.
858
//
859
// Several converters use this table to reduce
860
// division and modulo operations.
861
extern const char two_ASCII_digits[100][2]; // from strutil.cc
862
863
// ----------------------------------------------------------------------
864
// FastInt32ToBufferLeft()
865
// FastUInt32ToBufferLeft()
866
// FastInt64ToBufferLeft()
867
// FastUInt64ToBufferLeft()
868
//
869
// Like the Fast*ToBuffer() functions above, these are intended for speed.
870
// Unlike the Fast*ToBuffer() functions, however, these functions write
871
// their output to the beginning of the buffer (hence the name, as the
872
// output is left-aligned).  The caller is responsible for ensuring that
873
// the buffer has enough space to hold the output.
874
//
875
// Returns a pointer to the end of the string (i.e. the null character
876
// terminating the string).
877
// ----------------------------------------------------------------------
878
879
339k
char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
880
339k
    uint digits;
881
339k
    const char* ASCII_digits = nullptr;
882
    // The idea of this implementation is to trim the number of divides to as few
883
    // as possible by using multiplication and subtraction rather than mod (%),
884
    // and by outputting two digits at a time rather than one.
885
    // The huge-number case is first, in the hopes that the compiler will output
886
    // that case in one branch-free block of code, and only output conditional
887
    // branches into it from below.
888
339k
    if (u >= 1000000000) {      // >= 1,000,000,000
889
0
        digits = u / 100000000; // 100,000,000
890
0
        ASCII_digits = two_ASCII_digits[digits];
891
0
        buffer[0] = ASCII_digits[0];
892
0
        buffer[1] = ASCII_digits[1];
893
0
        buffer += 2;
894
0
    sublt100_000_000:
895
0
        u -= digits * 100000000; // 100,000,000
896
0
    lt100_000_000:
897
0
        digits = u / 1000000; // 1,000,000
898
0
        ASCII_digits = two_ASCII_digits[digits];
899
0
        buffer[0] = ASCII_digits[0];
900
0
        buffer[1] = ASCII_digits[1];
901
0
        buffer += 2;
902
0
    sublt1_000_000:
903
0
        u -= digits * 1000000; // 1,000,000
904
0
    lt1_000_000:
905
0
        digits = u / 10000; // 10,000
906
0
        ASCII_digits = two_ASCII_digits[digits];
907
0
        buffer[0] = ASCII_digits[0];
908
0
        buffer[1] = ASCII_digits[1];
909
0
        buffer += 2;
910
1.88k
    sublt10_000:
911
1.88k
        u -= digits * 10000; // 10,000
912
4.06k
    lt10_000:
913
4.06k
        digits = u / 100;
914
4.06k
        ASCII_digits = two_ASCII_digits[digits];
915
4.06k
        buffer[0] = ASCII_digits[0];
916
4.06k
        buffer[1] = ASCII_digits[1];
917
4.06k
        buffer += 2;
918
19.0k
    sublt100:
919
19.0k
        u -= digits * 100;
920
315k
    lt100:
921
315k
        digits = u;
922
315k
        ASCII_digits = two_ASCII_digits[digits];
923
315k
        buffer[0] = ASCII_digits[0];
924
315k
        buffer[1] = ASCII_digits[1];
925
315k
        buffer += 2;
926
339k
    done:
927
339k
        *buffer = 0;
928
339k
        return buffer;
929
315k
    }
930
931
339k
    if (u < 100) {
932
320k
        digits = u;
933
320k
        if (u >= 10) goto lt100;
934
24.5k
        *buffer++ = '0' + digits;
935
24.5k
        goto done;
936
320k
    }
937
19.0k
    if (u < 10000) { // 10,000
938
17.1k
        if (u >= 1000) goto lt10_000;
939
14.9k
        digits = u / 100;
940
14.9k
        *buffer++ = '0' + digits;
941
14.9k
        goto sublt100;
942
17.1k
    }
943
1.89k
    if (u < 1000000) { // 1,000,000
944
1.89k
        if (u >= 100000) goto lt1_000_000;
945
1.89k
        digits = u / 10000; //    10,000
946
1.89k
        *buffer++ = '0' + digits;
947
1.89k
        goto sublt10_000;
948
1.89k
    }
949
0
    if (u < 100000000) { // 100,000,000
950
0
        if (u >= 10000000) goto lt100_000_000;
951
0
        digits = u / 1000000; //   1,000,000
952
0
        *buffer++ = '0' + digits;
953
0
        goto sublt1_000_000;
954
0
    }
955
    // we already know that u < 1,000,000,000
956
0
    digits = u / 100000000; // 100,000,000
957
0
    *buffer++ = '0' + digits;
958
0
    goto sublt100_000_000;
959
0
}
960
961
336k
char* FastInt32ToBufferLeft(int32 i, char* buffer) {
962
336k
    uint32 u = i;
963
336k
    if (i < 0) {
964
0
        *buffer++ = '-';
965
        // We need to do the negation in modular (i.e., "unsigned")
966
        // arithmetic; MSVC++ apprently warns for plain "-u", so
967
        // we write the equivalent expression "0 - u" instead.
968
0
        u = 0 - u;
969
0
    }
970
336k
    return FastUInt32ToBufferLeft(u, buffer);
971
336k
}
972
973
3.66k
char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
974
3.66k
    uint digits;
975
3.66k
    const char* ASCII_digits = nullptr;
976
977
3.66k
    uint32 u = static_cast<uint32>(u64);
978
3.66k
    if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
979
980
3
    uint64 top_11_digits = u64 / 1000000000;
981
3
    buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
982
3
    u = u64 - (top_11_digits * 1000000000);
983
984
3
    digits = u / 10000000; // 10,000,000
985
3
    DCHECK_LT(digits, 100);
986
3
    ASCII_digits = two_ASCII_digits[digits];
987
3
    buffer[0] = ASCII_digits[0];
988
3
    buffer[1] = ASCII_digits[1];
989
3
    buffer += 2;
990
3
    u -= digits * 10000000; // 10,000,000
991
3
    digits = u / 100000;    // 100,000
992
3
    ASCII_digits = two_ASCII_digits[digits];
993
3
    buffer[0] = ASCII_digits[0];
994
3
    buffer[1] = ASCII_digits[1];
995
3
    buffer += 2;
996
3
    u -= digits * 100000; // 100,000
997
3
    digits = u / 1000;    // 1,000
998
3
    ASCII_digits = two_ASCII_digits[digits];
999
3
    buffer[0] = ASCII_digits[0];
1000
3
    buffer[1] = ASCII_digits[1];
1001
3
    buffer += 2;
1002
3
    u -= digits * 1000; // 1,000
1003
3
    digits = u / 10;
1004
3
    ASCII_digits = two_ASCII_digits[digits];
1005
3
    buffer[0] = ASCII_digits[0];
1006
3
    buffer[1] = ASCII_digits[1];
1007
3
    buffer += 2;
1008
3
    u -= digits * 10;
1009
3
    digits = u;
1010
3
    *buffer++ = '0' + digits;
1011
3
    *buffer = 0;
1012
3
    return buffer;
1013
3.66k
}
1014
1015
3.65k
char* FastInt64ToBufferLeft(int64 i, char* buffer) {
1016
3.65k
    uint64 u = i;
1017
3.65k
    if (i < 0) {
1018
0
        *buffer++ = '-';
1019
0
        u = 0 - u;
1020
0
    }
1021
3.65k
    return FastUInt64ToBufferLeft(u, buffer);
1022
3.65k
}
1023
1024
0
int HexDigitsPrefix(const char* buf, int num_digits) {
1025
0
    for (int i = 0; i < num_digits; i++)
1026
0
        if (!ascii_isxdigit(buf[i]))
1027
0
            return 0; // This also detects end of string as '\0' is not xdigit.
1028
0
    return 1;
1029
0
}
1030
1031
// ----------------------------------------------------------------------
1032
// AutoDigitStrCmp
1033
// AutoDigitLessThan
1034
// StrictAutoDigitLessThan
1035
// autodigit_less
1036
// autodigit_greater
1037
// strict_autodigit_less
1038
// strict_autodigit_greater
1039
//    These are like less<string> and greater<string>, except when a
1040
//    run of digits is encountered at corresponding points in the two
1041
//    arguments.  Such digit strings are compared numerically instead
1042
//    of lexicographically.  Therefore if you sort by
1043
//    "autodigit_less", some machine names might get sorted as:
1044
//        exaf1
1045
//        exaf2
1046
//        exaf10
1047
//    When using "strict" comparison (AutoDigitStrCmp with the strict flag
1048
//    set to true, or the strict version of the other functions),
1049
//    strings that represent equal numbers will not be considered equal if
1050
//    the string representations are not identical.  That is, "01" < "1" in
1051
//    strict mode, but "01" == "1" otherwise.
1052
// ----------------------------------------------------------------------
1053
1054
0
int AutoDigitStrCmp(const char* a, int alen, const char* b, int blen, bool strict) {
1055
0
    int aindex = 0;
1056
0
    int bindex = 0;
1057
0
    while ((aindex < alen) && (bindex < blen)) {
1058
0
        if (isdigit(a[aindex]) && isdigit(b[bindex])) {
1059
            // Compare runs of digits.  Instead of extracting numbers, we
1060
            // just skip leading zeroes, and then get the run-lengths.  This
1061
            // allows us to handle arbitrary precision numbers.  We remember
1062
            // how many zeroes we found so that we can differentiate between
1063
            // "1" and "01" in strict mode.
1064
1065
            // Skip leading zeroes, but remember how many we found
1066
0
            int azeroes = aindex;
1067
0
            int bzeroes = bindex;
1068
0
            while ((aindex < alen) && (a[aindex] == '0')) aindex++;
1069
0
            while ((bindex < blen) && (b[bindex] == '0')) bindex++;
1070
0
            azeroes = aindex - azeroes;
1071
0
            bzeroes = bindex - bzeroes;
1072
1073
            // Count digit lengths
1074
0
            int astart = aindex;
1075
0
            int bstart = bindex;
1076
0
            while ((aindex < alen) && isdigit(a[aindex])) aindex++;
1077
0
            while ((bindex < blen) && isdigit(b[bindex])) bindex++;
1078
0
            if (aindex - astart < bindex - bstart) {
1079
                // a has shorter run of digits: so smaller
1080
0
                return -1;
1081
0
            } else if (aindex - astart > bindex - bstart) {
1082
                // a has longer run of digits: so larger
1083
0
                return 1;
1084
0
            } else {
1085
                // Same lengths, so compare digit by digit
1086
0
                for (int i = 0; i < aindex - astart; i++) {
1087
0
                    if (a[astart + i] < b[bstart + i]) {
1088
0
                        return -1;
1089
0
                    } else if (a[astart + i] > b[bstart + i]) {
1090
0
                        return 1;
1091
0
                    }
1092
0
                }
1093
                // Equal: did one have more leading zeroes?
1094
0
                if (strict && azeroes != bzeroes) {
1095
0
                    if (azeroes > bzeroes) {
1096
                        // a has more leading zeroes: a < b
1097
0
                        return -1;
1098
0
                    } else {
1099
                        // b has more leading zeroes: a > b
1100
0
                        return 1;
1101
0
                    }
1102
0
                }
1103
                // Equal: so continue scanning
1104
0
            }
1105
0
        } else if (a[aindex] < b[bindex]) {
1106
0
            return -1;
1107
0
        } else if (a[aindex] > b[bindex]) {
1108
0
            return 1;
1109
0
        } else {
1110
0
            aindex++;
1111
0
            bindex++;
1112
0
        }
1113
0
    }
1114
1115
0
    if (aindex < alen) {
1116
        // b is prefix of a
1117
0
        return 1;
1118
0
    } else if (bindex < blen) {
1119
        // a is prefix of b
1120
0
        return -1;
1121
0
    } else {
1122
        // a is equal to b
1123
0
        return 0;
1124
0
    }
1125
0
}
1126
1127
0
bool AutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1128
0
    return AutoDigitStrCmp(a, alen, b, blen, false) < 0;
1129
0
}
1130
1131
0
bool StrictAutoDigitLessThan(const char* a, int alen, const char* b, int blen) {
1132
0
    return AutoDigitStrCmp(a, alen, b, blen, true) < 0;
1133
0
}
1134
1135
// ----------------------------------------------------------------------
1136
// SimpleDtoa()
1137
// SimpleFtoa()
1138
// DoubleToBuffer()
1139
// FloatToBuffer()
1140
//    We want to print the value without losing precision, but we also do
1141
//    not want to print more digits than necessary.  This turns out to be
1142
//    trickier than it sounds.  Numbers like 0.2 cannot be represented
1143
//    exactly in binary.  If we print 0.2 with a very large precision,
1144
//    e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1145
//    On the other hand, if we set the precision too low, we lose
1146
//    significant digits when printing numbers that actually need them.
1147
//    It turns out there is no precision value that does the right thing
1148
//    for all numbers.
1149
//
1150
//    Our strategy is to first try printing with a precision that is never
1151
//    over-precise, then parse the result with strtod() to see if it
1152
//    matches.  If not, we print again with a precision that will always
1153
//    give a precise result, but may use more digits than necessary.
1154
//
1155
//    An arguably better strategy would be to use the algorithm described
1156
//    in "How to Print Floating-Point Numbers Accurately" by Steele &
1157
//    White, e.g. as implemented by David M. Gay's dtoa().  It turns out,
1158
//    however, that the following implementation is about as fast as
1159
//    DMG's code.  Furthermore, DMG's code locks mutexes, which means it
1160
//    will not scale well on multi-core machines.  DMG's code is slightly
1161
//    more accurate (in that it will never use more digits than
1162
//    necessary), but this is probably irrelevant for most users.
1163
//
1164
//    Rob Pike and Ken Thompson also have an implementation of dtoa() in
1165
//    third_party/fmt/fltfmt.cc.  Their implementation is similar to this
1166
//    one in that it makes guesses and then uses strtod() to check them.
1167
//    Their implementation is faster because they use their own code to
1168
//    generate the digits in the first place rather than use snprintf(),
1169
//    thus avoiding format string parsing overhead.  However, this makes
1170
//    it considerably more complicated than the following implementation,
1171
//    and it is embedded in a larger library.  If speed turns out to be
1172
//    an issue, we could re-implement this in terms of their
1173
//    implementation.
1174
// ----------------------------------------------------------------------
1175
1176
0
string SimpleDtoa(double value) {
1177
0
    char buffer[kDoubleToBufferSize];
1178
0
    return DoubleToBuffer(value, buffer);
1179
0
}
1180
1181
0
string SimpleFtoa(float value) {
1182
0
    char buffer[kFloatToBufferSize];
1183
0
    return FloatToBuffer(value, buffer);
1184
0
}
1185
1186
0
char* DoubleToBuffer(double value, char* buffer) {
1187
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1188
    // platforms these days.  Just in case some system exists where DBL_DIG
1189
    // is significantly larger -- and risks overflowing our buffer -- we have
1190
    // this assert.
1191
0
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1192
1193
0
    int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
1194
1195
    // The snprintf should never overflow because the buffer is significantly
1196
    // larger than the precision we asked for.
1197
0
    DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1198
1199
0
    if (strtod(buffer, nullptr) != value) {
1200
0
        snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG + 2, value);
1201
1202
        // Should never overflow; see above.
1203
0
        DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
1204
0
    }
1205
0
    return buffer;
1206
0
}
1207
1208
0
char* FloatToBuffer(float value, char* buffer) {
1209
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1210
    // platforms these days.  Just in case some system exists where FLT_DIG
1211
    // is significantly larger -- and risks overflowing our buffer -- we have
1212
    // this assert.
1213
0
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1214
1215
0
    int snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
1216
1217
    // The snprintf should never overflow because the buffer is significantly
1218
    // larger than the precision we asked for.
1219
0
    DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1220
1221
0
    float parsed_value;
1222
0
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1223
0
        snprintf_result = snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG + 2, value);
1224
1225
        // Should never overflow; see above.
1226
0
        DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
1227
0
    }
1228
0
    return buffer;
1229
0
}
1230
1231
11
int DoubleToBuffer(double value, int width, char* buffer) {
1232
    // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1233
    // platforms these days.  Just in case some system exists where DBL_DIG
1234
    // is significantly larger -- and risks overflowing our buffer -- we have
1235
    // this assert.
1236
11
    COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
1237
1238
11
    int snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG, value);
1239
1240
    // The snprintf should never overflow because the buffer is significantly
1241
    // larger than the precision we asked for.
1242
11
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1243
1244
11
    if (strtod(buffer, nullptr) != value) {
1245
3
        snprintf_result = snprintf(buffer, width, "%.*g", DBL_DIG + 2, value);
1246
1247
        // Should never overflow; see above.
1248
3
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1249
3
    }
1250
1251
11
    return snprintf_result;
1252
11
}
1253
1254
42
int FloatToBuffer(float value, int width, char* buffer) {
1255
    // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1256
    // platforms these days.  Just in case some system exists where FLT_DIG
1257
    // is significantly larger -- and risks overflowing our buffer -- we have
1258
    // this assert.
1259
42
    COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
1260
1261
42
    int snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG, value);
1262
1263
    // The snprintf should never overflow because the buffer is significantly
1264
    // larger than the precision we asked for.
1265
42
    DCHECK(snprintf_result > 0 && snprintf_result < width);
1266
1267
42
    float parsed_value;
1268
42
    if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
1269
8
        snprintf_result = snprintf(buffer, width, "%.*g", FLT_DIG + 2, value);
1270
1271
        // Should never overflow; see above.
1272
8
        DCHECK(snprintf_result > 0 && snprintf_result < width);
1273
8
    }
1274
1275
42
    return snprintf_result;
1276
42
}
1277
1278
12
int FastDoubleToBuffer(double value, char* buffer, bool faster_float_convert) {
1279
12
    if (faster_float_convert) {
1280
2
        return jkj::dragonbox::to_chars_n(value, buffer) - buffer;
1281
2
    }
1282
 
1283
10
    auto end = fmt::format_to(buffer, "{:.15g}", value);
1284
10
    *end = '\0';
1285
10
    if (strtod(buffer, nullptr) != value) {
1286
3
        end = fmt::format_to(buffer, "{:.17g}", value);
1287
3
    }
1288
10
    return end - buffer;
1289
12
}
1290
1291
18
int FastFloatToBuffer(float value, char* buffer, bool faster_float_convert) {
1292
18
    if (faster_float_convert) {
1293
8
        return jkj::dragonbox::to_chars_n(value, buffer) - buffer;
1294
8
    }
1295
1296
10
    auto end = fmt::format_to(buffer, "{:.6g}", value);
1297
10
    *end = '\0';
1298
#ifdef _MSC_VER // has no strtof()
1299
    if (strtod(buffer, nullptr) != value) {
1300
#else
1301
10
    if (strtof(buffer, nullptr) != value) {
1302
6
#endif
1303
6
        end = fmt::format_to(buffer, "{:.8g}", value);
1304
6
    }
1305
10
    return end - buffer;
1306
18
}
1307
1308
// ----------------------------------------------------------------------
1309
// SimpleItoaWithCommas()
1310
//    Description: converts an integer to a string.
1311
//    Puts commas every 3 spaces.
1312
//    Faster than printf("%d")?
1313
//
1314
//    Return value: string
1315
// ----------------------------------------------------------------------
1316
0
string SimpleItoaWithCommas(int32 i) {
1317
    // 10 digits, 3 commas, and sign are good for 32-bit or smaller ints.
1318
    // Longest is -2,147,483,648.
1319
0
    char local[14];
1320
0
    char* p = local + sizeof(local);
1321
    // Need to use uint32 instead of int32 to correctly handle
1322
    // -2,147,483,648.
1323
0
    uint32 n = i;
1324
0
    if (i < 0) n = 0 - n; // negate the unsigned value to avoid overflow
1325
0
    *--p = '0' + n % 10;  // this case deals with the number "0"
1326
0
    n /= 10;
1327
0
    while (n) {
1328
0
        *--p = '0' + n % 10;
1329
0
        n /= 10;
1330
0
        if (n == 0) break;
1331
1332
0
        *--p = '0' + n % 10;
1333
0
        n /= 10;
1334
0
        if (n == 0) break;
1335
1336
0
        *--p = ',';
1337
0
        *--p = '0' + n % 10;
1338
0
        n /= 10;
1339
        // For this unrolling, we check if n == 0 in the main while loop
1340
0
    }
1341
0
    if (i < 0) *--p = '-';
1342
0
    return string(p, local + sizeof(local));
1343
0
}
1344
1345
// We need this overload because otherwise SimpleItoaWithCommas(5U) wouldn't
1346
// compile.
1347
0
string SimpleItoaWithCommas(uint32 i) {
1348
    // 10 digits and 3 commas are good for 32-bit or smaller ints.
1349
    // Longest is 4,294,967,295.
1350
0
    char local[13];
1351
0
    char* p = local + sizeof(local);
1352
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1353
0
    i /= 10;
1354
0
    while (i) {
1355
0
        *--p = '0' + i % 10;
1356
0
        i /= 10;
1357
0
        if (i == 0) break;
1358
1359
0
        *--p = '0' + i % 10;
1360
0
        i /= 10;
1361
0
        if (i == 0) break;
1362
1363
0
        *--p = ',';
1364
0
        *--p = '0' + i % 10;
1365
0
        i /= 10;
1366
        // For this unrolling, we check if i == 0 in the main while loop
1367
0
    }
1368
0
    return string(p, local + sizeof(local));
1369
0
}
1370
1371
0
string SimpleItoaWithCommas(int64 i) {
1372
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1373
0
    char local[26];
1374
0
    char* p = SimpleItoaWithCommas(i, local, sizeof(local));
1375
0
    return string(p, local + sizeof(local));
1376
0
}
1377
1378
// We need this overload because otherwise SimpleItoaWithCommas(5ULL) wouldn't
1379
// compile.
1380
0
string SimpleItoaWithCommas(uint64 i) {
1381
    // 20 digits and 6 commas are good for 64-bit or smaller ints.
1382
    // Longest is 18,446,744,073,709,551,615.
1383
0
    char local[26];
1384
0
    char* p = local + sizeof(local);
1385
0
    *--p = '0' + i % 10; // this case deals with the number "0"
1386
0
    i /= 10;
1387
0
    while (i) {
1388
0
        *--p = '0' + i % 10;
1389
0
        i /= 10;
1390
0
        if (i == 0) break;
1391
1392
0
        *--p = '0' + i % 10;
1393
0
        i /= 10;
1394
0
        if (i == 0) break;
1395
1396
0
        *--p = ',';
1397
0
        *--p = '0' + i % 10;
1398
0
        i /= 10;
1399
        // For this unrolling, we check if i == 0 in the main while loop
1400
0
    }
1401
0
    return string(p, local + sizeof(local));
1402
0
}
1403
1404
3
char* SimpleItoaWithCommas(int64_t i, char* buffer, int32_t buffer_size) {
1405
    // 19 digits, 6 commas, and sign are good for 64-bit or smaller ints.
1406
3
    char* p = buffer + buffer_size;
1407
    // Need to use uint64 instead of int64 to correctly handle
1408
    // -9,223,372,036,854,775,808.
1409
3
    uint64 n = i;
1410
3
    if (i < 0) n = 0 - n;
1411
3
    *--p = '0' + n % 10; // this case deals with the number "0"
1412
3
    n /= 10;
1413
7
    while (n) {
1414
6
        *--p = '0' + n % 10;
1415
6
        n /= 10;
1416
6
        if (n == 0) break;
1417
1418
4
        *--p = '0' + n % 10;
1419
4
        n /= 10;
1420
4
        if (n == 0) break;
1421
1422
4
        *--p = ',';
1423
4
        *--p = '0' + n % 10;
1424
4
        n /= 10;
1425
        // For this unrolling, we check if n == 0 in the main while loop
1426
4
    }
1427
3
    if (i < 0) *--p = '-';
1428
3
    return p;
1429
3
}
1430
1431
17
char* SimpleItoaWithCommas(__int128_t i, char* buffer, int32_t buffer_size) {
1432
    // 39 digits, 12 commas, and sign are good for 128-bit or smaller ints.
1433
17
    char* p = buffer + buffer_size;
1434
    // Need to use uint128 instead of int128 to correctly handle
1435
    // -170,141,183,460,469,231,731,687,303,715,884,105,728.
1436
17
    __uint128_t n = i;
1437
17
    if (i < 0) n = 0 - n;
1438
17
    *--p = '0' + n % 10; // this case deals with the number "0"
1439
17
    n /= 10;
1440
45
    while (n) {
1441
38
        *--p = '0' + n % 10;
1442
38
        n /= 10;
1443
38
        if (n == 0) break;
1444
1445
34
        *--p = '0' + n % 10;
1446
34
        n /= 10;
1447
34
        if (n == 0) break;
1448
1449
28
        *--p = ',';
1450
28
        *--p = '0' + n % 10;
1451
28
        n /= 10;
1452
        // For this unrolling, we check if n == 0 in the main while loop
1453
28
    }
1454
17
    if (i < 0) *--p = '-';
1455
17
    return p;
1456
17
}
1457
1458
// ----------------------------------------------------------------------
1459
// ItoaKMGT()
1460
//    Description: converts an integer to a string
1461
//    Truncates values to a readable unit: K, G, M or T
1462
//    Opposite of atoi_kmgt()
1463
//    e.g. 100 -> "100" 1500 -> "1500"  4000 -> "3K"   57185920 -> "45M"
1464
//
1465
//    Return value: string
1466
// ----------------------------------------------------------------------
1467
0
string ItoaKMGT(int64 i) {
1468
0
    const char *sign = "", *suffix = "";
1469
0
    if (i < 0) {
1470
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1471
        // that's OK as this function is only for human readability
1472
0
        if (i == numeric_limits<int64>::min()) i++;
1473
0
        sign = "-";
1474
0
        i = -i;
1475
0
    }
1476
1477
0
    int64 val;
1478
1479
0
    if ((val = (i >> 40)) > 1) {
1480
0
        suffix = "T";
1481
0
    } else if ((val = (i >> 30)) > 1) {
1482
0
        suffix = "G";
1483
0
    } else if ((val = (i >> 20)) > 1) {
1484
0
        suffix = "M";
1485
0
    } else if ((val = (i >> 10)) > 1) {
1486
0
        suffix = "K";
1487
0
    } else {
1488
0
        val = i;
1489
0
    }
1490
1491
0
    return StringPrintf("%s%" PRId64 "%s", sign, val, suffix);
1492
0
}
1493
1494
0
string AccurateItoaKMGT(int64 i) {
1495
0
    const char* sign = "";
1496
0
    if (i < 0) {
1497
        // We lose some accuracy if the caller passes LONG_LONG_MIN, but
1498
        // that's OK as this function is only for human readability
1499
0
        if (i == numeric_limits<int64>::min()) i++;
1500
0
        sign = "-";
1501
0
        i = -i;
1502
0
    }
1503
1504
0
    string ret = StringPrintf("%s", sign);
1505
0
    int64 val;
1506
0
    if ((val = (i >> 40)) > 1) {
1507
0
        ret += StringPrintf("%" PRId64
1508
0
                            "%s"
1509
0
                            ",",
1510
0
                            val, "T");
1511
0
        i = i - (val << 40);
1512
0
    }
1513
0
    if ((val = (i >> 30)) > 1) {
1514
0
        ret += StringPrintf("%" PRId64
1515
0
                            "%s"
1516
0
                            ",",
1517
0
                            val, "G");
1518
0
        i = i - (val << 30);
1519
0
    }
1520
0
    if ((val = (i >> 20)) > 1) {
1521
0
        ret += StringPrintf("%" PRId64
1522
0
                            "%s"
1523
0
                            ",",
1524
0
                            val, "M");
1525
0
        i = i - (val << 20);
1526
0
    }
1527
0
    if ((val = (i >> 10)) > 1) {
1528
0
        ret += StringPrintf("%" PRId64 "%s", val, "K");
1529
0
        i = i - (val << 10);
1530
0
    } else {
1531
0
        ret += StringPrintf("%" PRId64 "%s", i, "K");
1532
0
    }
1533
1534
0
    return ret;
1535
0
}
1536
1537
// DEPRECATED(wadetregaskis).
1538
// These are non-inline because some BUILD files turn on -Wformat-non-literal.
1539
1540
0
string FloatToString(float f, const char* format) {
1541
0
    return StringPrintf(format, f);
1542
0
}
1543
1544
0
string IntToString(int i, const char* format) {
1545
0
    return StringPrintf(format, i);
1546
0
}
1547
1548
0
string Int64ToString(int64 i64, const char* format) {
1549
0
    return StringPrintf(format, i64);
1550
0
}
1551
1552
0
string UInt64ToString(uint64 ui64, const char* format) {
1553
0
    return StringPrintf(format, ui64);
1554
0
}