Coverage Report

Created: 2024-11-22 20:18

/root/doris/be/src/gutil/strings/util.cc
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (C) 1999-2005 Google, Inc.
3
//
4
5
// TODO(user): visit each const_cast.  Some of them are no longer necessary
6
// because last Single Unix Spec and grte v2 are more const-y.
7
8
#include "gutil/strings/util.h"
9
10
// IWYU pragma: no_include <pstl/glue_algorithm_defs.h>
11
12
#include <assert.h>
13
#include <stdarg.h>
14
#include <stdio.h>
15
#include <string.h>
16
#include <time.h> // for FastTimeToBuffer()
17
#include <algorithm>
18
#include <iterator>
19
#include <mutex>
20
#include <ostream>
21
22
using std::copy;
23
using std::max;
24
using std::min;
25
using std::reverse;
26
using std::sort;
27
using std::swap;
28
#include <string>
29
30
using std::string;
31
#include <vector>
32
33
using std::vector;
34
35
#include "common/logging.h"
36
37
#include "gutil/stl_util.h" // for string_as_array, STLAppendToString
38
#include "gutil/strings/ascii_ctype.h"
39
#include "gutil/strings/numbers.h"
40
#include "gutil/strings/stringpiece.h"
41
#include "gutil/utf/utf.h"
42
#include "gutil/stringprintf.h"
43
44
#ifdef OS_WINDOWS
45
#ifdef min // windows.h defines this to something silly
46
#undef min
47
#endif
48
#endif
49
50
// Use this instead of gmtime_r if you want to build for Windows.
51
// Windows doesn't have a 'gmtime_r', but it has the similar 'gmtime_s'.
52
// TODO(user): Probably belongs in //base:time_support.{cc|h}.
53
0
static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) {
54
#ifdef OS_WINDOWS
55
    return gmtime_s(result, timep) == 0 ? result : NULL;
56
#else
57
0
    return gmtime_r(timep, result);
58
0
#endif // OS_WINDOWS
59
0
}
60
61
0
char* strnstr(const char* haystack, const char* needle, size_t haystack_len) {
62
0
    if (*needle == '\0') {
63
0
        return const_cast<char*>(haystack);
64
0
    }
65
0
    size_t needle_len = strlen(needle);
66
0
    char* where;
67
0
    while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) {
68
0
        if (where - haystack + needle_len > haystack_len) {
69
0
            return nullptr;
70
0
        }
71
0
        if (strncmp(where, needle, needle_len) == 0) {
72
0
            return where;
73
0
        }
74
0
        haystack_len -= where + 1 - haystack;
75
0
        haystack = where + 1;
76
0
    }
77
0
    return nullptr;
78
0
}
79
80
const char* strnprefix(const char* haystack, int haystack_size, const char* needle,
81
0
                       int needle_size) {
82
0
    if (needle_size > haystack_size) {
83
0
        return nullptr;
84
0
    } else {
85
0
        if (strncmp(haystack, needle, needle_size) == 0) {
86
0
            return haystack + needle_size;
87
0
        } else {
88
0
            return nullptr;
89
0
        }
90
0
    }
91
0
}
92
93
const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle,
94
0
                           int needle_size) {
95
0
    if (needle_size > haystack_size) {
96
0
        return nullptr;
97
0
    } else {
98
0
        if (strncasecmp(haystack, needle, needle_size) == 0) {
99
0
            return haystack + needle_size;
100
0
        } else {
101
0
            return nullptr;
102
0
        }
103
0
    }
104
0
}
105
106
0
char* strcasesuffix(char* str, const char* suffix) {
107
0
    const int lenstr = strlen(str);
108
0
    const int lensuffix = strlen(suffix);
109
0
    char* strbeginningoftheend = str + lenstr - lensuffix;
110
111
0
    if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) {
112
0
        return (strbeginningoftheend);
113
0
    } else {
114
0
        return (nullptr);
115
0
    }
116
0
}
117
118
const char* strnsuffix(const char* haystack, int haystack_size, const char* needle,
119
0
                       int needle_size) {
120
0
    if (needle_size > haystack_size) {
121
0
        return nullptr;
122
0
    } else {
123
0
        const char* start = haystack + haystack_size - needle_size;
124
0
        if (strncmp(start, needle, needle_size) == 0) {
125
0
            return start;
126
0
        } else {
127
0
            return nullptr;
128
0
        }
129
0
    }
130
0
}
131
132
const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle,
133
0
                           int needle_size) {
134
0
    if (needle_size > haystack_size) {
135
0
        return nullptr;
136
0
    } else {
137
0
        const char* start = haystack + haystack_size - needle_size;
138
0
        if (strncasecmp(start, needle, needle_size) == 0) {
139
0
            return start;
140
0
        } else {
141
0
            return nullptr;
142
0
        }
143
0
    }
144
0
}
145
146
0
char* strchrnth(const char* str, const char& c, int n) {
147
0
    if (str == nullptr) return nullptr;
148
0
    if (n <= 0) return const_cast<char*>(str);
149
0
    const char* sp;
150
0
    int k = 0;
151
0
    for (sp = str; *sp != '\0'; sp++) {
152
0
        if (*sp == c) {
153
0
            ++k;
154
0
            if (k >= n) break;
155
0
        }
156
0
    }
157
0
    return (k < n) ? nullptr : const_cast<char*>(sp);
158
0
}
159
160
0
char* AdjustedLastPos(const char* str, char separator, int n) {
161
0
    if (str == nullptr) return nullptr;
162
0
    const char* pos = nullptr;
163
0
    if (n > 0) pos = strchrnth(str, separator, n);
164
165
    // if n <= 0 or separator appears fewer than n times, get the last occurrence
166
0
    if (pos == nullptr) pos = strrchr(str, separator);
167
0
    return const_cast<char*>(pos);
168
0
}
169
170
// ----------------------------------------------------------------------
171
// Misc. routines
172
// ----------------------------------------------------------------------
173
174
0
bool IsAscii(const char* str, int len) {
175
0
    const char* end = str + len;
176
0
    while (str < end) {
177
0
        if (!ascii_isascii(*str++)) {
178
0
            return false;
179
0
        }
180
0
    }
181
0
    return true;
182
0
}
183
184
// ----------------------------------------------------------------------
185
// StringReplace()
186
//    Give me a string and two patterns "old" and "new", and I replace
187
//    the first instance of "old" in the string with "new", if it
188
//    exists.  If "replace_all" is true then call this repeatedly until it
189
//    fails.  RETURN a new string, regardless of whether the replacement
190
//    happened or not.
191
// ----------------------------------------------------------------------
192
193
string StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub,
194
0
                     bool replace_all) {
195
0
    string ret;
196
0
    StringReplace(s, oldsub, newsub, replace_all, &ret);
197
0
    return ret;
198
0
}
199
200
// ----------------------------------------------------------------------
201
// StringReplace()
202
//    Replace the "old" pattern with the "new" pattern in a string,
203
//    and append the result to "res".  If replace_all is false,
204
//    it only replaces the first instance of "old."
205
// ----------------------------------------------------------------------
206
207
void StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub,
208
0
                   bool replace_all, string* res) {
209
0
    if (oldsub.empty()) {
210
0
        res->append(s.data(), s.length()); // If empty, append the given string.
211
0
        return;
212
0
    }
213
214
0
    StringPiece::size_type start_pos = 0;
215
0
    StringPiece::size_type pos;
216
0
    do {
217
0
        pos = s.find(oldsub, start_pos);
218
0
        if (pos == StringPiece::npos) {
219
0
            break;
220
0
        }
221
0
        res->append(s.data() + start_pos, pos - start_pos);
222
0
        res->append(newsub.data(), newsub.length());
223
        // Start searching again after the "old".
224
0
        start_pos = pos + oldsub.length();
225
0
    } while (replace_all);
226
0
    res->append(s.data() + start_pos, s.length() - start_pos);
227
0
}
228
229
// ----------------------------------------------------------------------
230
// GlobalReplaceSubstring()
231
//    Replaces all instances of a substring in a string.  Does nothing
232
//    if 'substring' is empty.  Returns the number of replacements.
233
//
234
//    NOTE: The string pieces must not overlap s.
235
// ----------------------------------------------------------------------
236
237
int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement,
238
0
                           string* s) {
239
0
    CHECK(s != nullptr);
240
0
    if (s->empty() || substring.empty()) return 0;
241
0
    string tmp;
242
0
    int num_replacements = 0;
243
0
    size_t pos = 0;
244
0
    for (size_t match_pos = s->find(substring.data(), pos, substring.length());
245
0
         match_pos != string::npos; pos = match_pos + substring.length(),
246
0
                match_pos = s->find(substring.data(), pos, substring.length())) {
247
0
        ++num_replacements;
248
        // Append the original content before the match.
249
0
        tmp.append(*s, pos, match_pos - pos);
250
        // Append the replacement for the match.
251
0
        tmp.append(replacement.begin(), replacement.end());
252
0
    }
253
    // Append the content after the last match. If no replacements were made, the
254
    // original string is left untouched.
255
0
    if (num_replacements > 0) {
256
0
        tmp.append(*s, pos, s->length() - pos);
257
0
        s->swap(tmp);
258
0
    }
259
0
    return num_replacements;
260
0
}
261
262
//---------------------------------------------------------------------------
263
// RemoveStrings()
264
//   Remove the strings from v given by the (sorted least -> greatest)
265
//   numbers in indices.
266
//   Order of v is *not* preserved.
267
//---------------------------------------------------------------------------
268
0
void RemoveStrings(vector<string>* v, const vector<int>& indices) {
269
0
    assert(v);
270
0
    assert(indices.size() <= v->size());
271
    // go from largest index to smallest so that smaller indices aren't
272
    // invalidated
273
0
    for (int lcv = indices.size() - 1; lcv >= 0; --lcv) {
274
0
#ifndef NDEBUG
275
        // verify that indices is sorted least->greatest
276
0
        if (indices.size() >= 2 && lcv > 0)
277
            // use LT and not LE because we should never see repeat indices
278
0
            CHECK_LT(indices[lcv - 1], indices[lcv]);
279
0
#endif
280
0
        assert(indices[lcv] >= 0);
281
0
        assert(indices[lcv] < v->size());
282
0
        swap((*v)[indices[lcv]], v->back());
283
0
        v->pop_back();
284
0
    }
285
0
}
286
287
// ----------------------------------------------------------------------
288
// gstrcasestr is a case-insensitive strstr. Eventually we should just
289
// use the GNU libc version of strcasestr, but it isn't compiled into
290
// RedHat Linux by default in version 6.1.
291
//
292
// This function uses ascii_tolower() instead of tolower(), for speed.
293
// ----------------------------------------------------------------------
294
295
0
char* gstrcasestr(const char* haystack, const char* needle) {
296
0
    char c, sc;
297
0
    size_t len;
298
299
0
    if ((c = *needle++) != 0) {
300
0
        c = ascii_tolower(c);
301
0
        len = strlen(needle);
302
0
        do {
303
0
            do {
304
0
                if ((sc = *haystack++) == 0) return nullptr;
305
0
            } while (ascii_tolower(sc) != c);
306
0
        } while (strncasecmp(haystack, needle, len) != 0);
307
0
        haystack--;
308
0
    }
309
    // This is a const violation but strstr() also returns a char*.
310
0
    return const_cast<char*>(haystack);
311
0
}
312
313
// ----------------------------------------------------------------------
314
// gstrncasestr is a case-insensitive strnstr.
315
//    Finds the occurence of the (null-terminated) needle in the
316
//    haystack, where no more than len bytes of haystack is searched.
317
//    Characters that appear after a '\0' in the haystack are not searched.
318
//
319
// This function uses ascii_tolower() instead of tolower(), for speed.
320
// ----------------------------------------------------------------------
321
0
const char* gstrncasestr(const char* haystack, const char* needle, size_t len) {
322
0
    char c, sc;
323
324
0
    if ((c = *needle++) != 0) {
325
0
        c = ascii_tolower(c);
326
0
        size_t needle_len = strlen(needle);
327
0
        do {
328
0
            do {
329
0
                if (len-- <= needle_len || 0 == (sc = *haystack++)) return nullptr;
330
0
            } while (ascii_tolower(sc) != c);
331
0
        } while (strncasecmp(haystack, needle, needle_len) != 0);
332
0
        haystack--;
333
0
    }
334
0
    return haystack;
335
0
}
336
337
// ----------------------------------------------------------------------
338
// gstrncasestr is a case-insensitive strnstr.
339
//    Finds the occurence of the (null-terminated) needle in the
340
//    haystack, where no more than len bytes of haystack is searched.
341
//    Characters that appear after a '\0' in the haystack are not searched.
342
//
343
//    This function uses ascii_tolower() instead of tolower(), for speed.
344
// ----------------------------------------------------------------------
345
0
char* gstrncasestr(char* haystack, const char* needle, size_t len) {
346
0
    return const_cast<char*>(gstrncasestr(static_cast<const char*>(haystack), needle, len));
347
0
}
348
// ----------------------------------------------------------------------
349
// gstrncasestr_split performs a case insensitive search
350
// on (prefix, non_alpha, suffix).
351
// ----------------------------------------------------------------------
352
char* gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix,
353
0
                         size_t n) {
354
0
    int prelen = prefix == nullptr ? 0 : strlen(prefix);
355
0
    int suflen = suffix == nullptr ? 0 : strlen(suffix);
356
357
    // adjust the string and its length to avoid unnessary searching.
358
    // an added benefit is to avoid unnecessary range checks in the if
359
    // statement in the inner loop.
360
0
    if (suflen + prelen >= n) return nullptr;
361
0
    str += prelen;
362
0
    n -= prelen;
363
0
    n -= suflen;
364
365
0
    const char* where = nullptr;
366
367
    // for every occurance of non_alpha in the string ...
368
0
    while ((where = static_cast<const char*>(memchr(str, non_alpha, n))) != nullptr) {
369
        // ... test whether it is followed by suffix and preceded by prefix
370
0
        if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) &&
371
0
            (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) {
372
0
            return const_cast<char*>(where - prelen);
373
0
        }
374
        // if not, advance the pointer, and adjust the length according
375
0
        n -= (where + 1) - str;
376
0
        str = where + 1;
377
0
    }
378
379
0
    return nullptr;
380
0
}
381
382
// ----------------------------------------------------------------------
383
// strcasestr_alnum is like a case-insensitive strstr, except that it
384
// ignores non-alphanumeric characters in both strings for the sake of
385
// comparison.
386
//
387
// This function uses ascii_isalnum() instead of isalnum() and
388
// ascii_tolower() instead of tolower(), for speed.
389
//
390
// E.g. strcasestr_alnum("i use google all the time", " !!Google!! ")
391
// returns pointer to "google all the time"
392
// ----------------------------------------------------------------------
393
0
char* strcasestr_alnum(const char* haystack, const char* needle) {
394
0
    const char* haystack_ptr;
395
0
    const char* needle_ptr;
396
397
    // Skip non-alnums at beginning
398
0
    while (!ascii_isalnum(*needle))
399
0
        if (*needle++ == '\0') return const_cast<char*>(haystack);
400
0
    needle_ptr = needle;
401
402
    // Skip non-alnums at beginning
403
0
    while (!ascii_isalnum(*haystack))
404
0
        if (*haystack++ == '\0') return nullptr;
405
0
    haystack_ptr = haystack;
406
407
0
    while (*needle_ptr != '\0') {
408
        // Non-alnums - advance
409
0
        while (!ascii_isalnum(*needle_ptr))
410
0
            if (*needle_ptr++ == '\0') return const_cast<char*>(haystack);
411
412
0
        while (!ascii_isalnum(*haystack_ptr))
413
0
            if (*haystack_ptr++ == '\0') return nullptr;
414
415
0
        if (ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr)) {
416
            // Case-insensitive match - advance
417
0
            needle_ptr++;
418
0
            haystack_ptr++;
419
0
        } else {
420
            // No match - rollback to next start point in haystack
421
0
            haystack++;
422
0
            while (!ascii_isalnum(*haystack))
423
0
                if (*haystack++ == '\0') return nullptr;
424
0
            haystack_ptr = haystack;
425
0
            needle_ptr = needle;
426
0
        }
427
0
    }
428
0
    return const_cast<char*>(haystack);
429
0
}
430
431
// ----------------------------------------------------------------------
432
// CountSubstring()
433
//    Return the number times a "substring" appears in the "text"
434
//    NOTE: this function's complexity is O(|text| * |substring|)
435
//          It is meant for short "text" (such as to ensure the
436
//          printf format string has the right number of arguments).
437
//          DO NOT pass in long "text".
438
// ----------------------------------------------------------------------
439
0
int CountSubstring(StringPiece text, StringPiece substring) {
440
0
    CHECK_GT(substring.length(), 0);
441
442
0
    int count = 0;
443
0
    StringPiece::size_type curr = 0;
444
0
    while (StringPiece::npos != (curr = text.find(substring, curr))) {
445
0
        ++count;
446
0
        ++curr;
447
0
    }
448
0
    return count;
449
0
}
450
451
// ----------------------------------------------------------------------
452
// strstr_delimited()
453
//    Just like strstr(), except it ensures that the needle appears as
454
//    a complete item (or consecutive series of items) in a delimited
455
//    list.
456
//
457
//    Like strstr(), returns haystack if needle is empty, or NULL if
458
//    either needle/haystack is NULL.
459
// ----------------------------------------------------------------------
460
0
const char* strstr_delimited(const char* haystack, const char* needle, char delim) {
461
0
    if (!needle || !haystack) return nullptr;
462
0
    if (*needle == '\0') return haystack;
463
464
0
    int needle_len = strlen(needle);
465
466
0
    while (true) {
467
        // Skip any leading delimiters.
468
0
        while (*haystack == delim) ++haystack;
469
470
        // Walk down the haystack, matching every character in the needle.
471
0
        const char* this_match = haystack;
472
0
        int i = 0;
473
0
        for (; i < needle_len; i++) {
474
0
            if (*haystack != needle[i]) {
475
                // We ran out of haystack or found a non-matching character.
476
0
                break;
477
0
            }
478
0
            ++haystack;
479
0
        }
480
481
        // If we matched the whole needle, ensure that it's properly delimited.
482
0
        if (i == needle_len && (*haystack == '\0' || *haystack == delim)) {
483
0
            return this_match;
484
0
        }
485
486
        // No match. Consume non-delimiter characters until we run out of them.
487
0
        while (*haystack != delim) {
488
0
            if (*haystack == '\0') return nullptr;
489
0
            ++haystack;
490
0
        }
491
0
    }
492
0
    LOG(FATAL) << "Unreachable statement";
493
0
    return nullptr;
494
0
}
495
496
// ----------------------------------------------------------------------
497
// Older versions of libc have a buggy strsep.
498
// ----------------------------------------------------------------------
499
500
0
char* gstrsep(char** stringp, const char* delim) {
501
0
    char* s;
502
0
    const char* spanp;
503
0
    int c, sc;
504
0
    char* tok;
505
506
0
    if ((s = *stringp) == nullptr) return nullptr;
507
508
0
    tok = s;
509
0
    while (true) {
510
0
        c = *s++;
511
0
        spanp = delim;
512
0
        do {
513
0
            if ((sc = *spanp++) == c) {
514
0
                if (c == 0)
515
0
                    s = nullptr;
516
0
                else
517
0
                    s[-1] = 0;
518
0
                *stringp = s;
519
0
                return tok;
520
0
            }
521
0
        } while (sc != 0);
522
0
    }
523
524
0
    return nullptr; /* should not happen */
525
0
}
526
527
0
void FastStringAppend(string* s, const char* data, int len) {
528
0
    STLAppendToString(s, data, len);
529
0
}
530
531
// TODO(user): add a microbenchmark and revisit
532
// the optimizations done here.
533
//
534
// Several converters use this table to reduce
535
// division and modulo operations.
536
extern const char two_ASCII_digits[100][2];
537
538
const char two_ASCII_digits[100][2] = {
539
        {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'},
540
        {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'},
541
        {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, {'2', '0'},
542
        {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'},
543
        {'2', '8'}, {'2', '9'}, {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
544
        {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
545
        {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'},
546
        {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'},
547
        {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, {'6', '0'}, {'6', '1'}, {'6', '2'},
548
        {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
549
        {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'},
550
        {'7', '7'}, {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
551
        {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, {'9', '0'},
552
        {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'},
553
        {'9', '8'}, {'9', '9'}};
554
555
0
static inline void PutTwoDigits(int i, char* p) {
556
0
    DCHECK_GE(i, 0);
557
0
    DCHECK_LT(i, 100);
558
0
    p[0] = two_ASCII_digits[i][0];
559
0
    p[1] = two_ASCII_digits[i][1];
560
0
}
561
562
0
char* FastTimeToBuffer(time_t s, char* buffer) {
563
0
    if (s == 0) {
564
0
        time(&s);
565
0
    }
566
567
0
    struct tm tm;
568
0
    if (PortableSafeGmtime(&s, &tm) == nullptr) {
569
        // Error message must fit in 30-char buffer.
570
0
        memcpy(buffer, "Invalid:", sizeof("Invalid:"));
571
0
        FastInt64ToBufferLeft(s, buffer + strlen(buffer));
572
0
        return buffer;
573
0
    }
574
575
    // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
576
    // but strftime does locale stuff which we do not want
577
    // plus strftime takes > 10x the time of hard code
578
579
0
    const char* weekday_name = "Xxx";
580
0
    switch (tm.tm_wday) {
581
0
    default: {
582
0
        DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday;
583
0
    } break;
584
0
    case 0:
585
0
        weekday_name = "Sun";
586
0
        break;
587
0
    case 1:
588
0
        weekday_name = "Mon";
589
0
        break;
590
0
    case 2:
591
0
        weekday_name = "Tue";
592
0
        break;
593
0
    case 3:
594
0
        weekday_name = "Wed";
595
0
        break;
596
0
    case 4:
597
0
        weekday_name = "Thu";
598
0
        break;
599
0
    case 5:
600
0
        weekday_name = "Fri";
601
0
        break;
602
0
    case 6:
603
0
        weekday_name = "Sat";
604
0
        break;
605
0
    }
606
607
0
    const char* month_name = "Xxx";
608
0
    switch (tm.tm_mon) {
609
0
    default: {
610
0
        DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon;
611
0
    } break;
612
0
    case 0:
613
0
        month_name = "Jan";
614
0
        break;
615
0
    case 1:
616
0
        month_name = "Feb";
617
0
        break;
618
0
    case 2:
619
0
        month_name = "Mar";
620
0
        break;
621
0
    case 3:
622
0
        month_name = "Apr";
623
0
        break;
624
0
    case 4:
625
0
        month_name = "May";
626
0
        break;
627
0
    case 5:
628
0
        month_name = "Jun";
629
0
        break;
630
0
    case 6:
631
0
        month_name = "Jul";
632
0
        break;
633
0
    case 7:
634
0
        month_name = "Aug";
635
0
        break;
636
0
    case 8:
637
0
        month_name = "Sep";
638
0
        break;
639
0
    case 9:
640
0
        month_name = "Oct";
641
0
        break;
642
0
    case 10:
643
0
        month_name = "Nov";
644
0
        break;
645
0
    case 11:
646
0
        month_name = "Dec";
647
0
        break;
648
0
    }
649
650
    // Write out the buffer.
651
652
0
    memcpy(buffer + 0, weekday_name, 3);
653
0
    buffer[3] = ',';
654
0
    buffer[4] = ' ';
655
656
0
    PutTwoDigits(tm.tm_mday, buffer + 5);
657
0
    buffer[7] = ' ';
658
659
0
    memcpy(buffer + 8, month_name, 3);
660
0
    buffer[11] = ' ';
661
662
0
    int32 year = tm.tm_year + 1900;
663
0
    PutTwoDigits(year / 100, buffer + 12);
664
0
    PutTwoDigits(year % 100, buffer + 14);
665
0
    buffer[16] = ' ';
666
667
0
    PutTwoDigits(tm.tm_hour, buffer + 17);
668
0
    buffer[19] = ':';
669
670
0
    PutTwoDigits(tm.tm_min, buffer + 20);
671
0
    buffer[22] = ':';
672
673
0
    PutTwoDigits(tm.tm_sec, buffer + 23);
674
675
    // includes ending NUL
676
0
    memcpy(buffer + 25, " GMT", 5);
677
678
0
    return buffer;
679
0
}
680
681
// ----------------------------------------------------------------------
682
// strdup_with_new()
683
// strndup_with_new()
684
//
685
//    strdup_with_new() is the same as strdup() except that the memory
686
//    is allocated by new[] and hence an exception will be generated
687
//    if out of memory.
688
//
689
//    strndup_with_new() is the same as strdup_with_new() except that it will
690
//    copy up to the specified number of characters.  This function
691
//    is useful when we want to copy a substring out of a string
692
//    and didn't want to (or cannot) modify the string
693
// ----------------------------------------------------------------------
694
0
char* strdup_with_new(const char* the_string) {
695
0
    if (the_string == nullptr)
696
0
        return nullptr;
697
0
    else
698
0
        return strndup_with_new(the_string, strlen(the_string));
699
0
}
700
701
0
char* strndup_with_new(const char* the_string, int max_length) {
702
0
    if (the_string == nullptr) return nullptr;
703
0
    size_t str_len = strlen(the_string);
704
0
    if (str_len > max_length) {
705
0
        str_len = max_length;
706
0
    }
707
0
    auto result = new char[str_len + 1];
708
0
    result[max_length] = '\0'; // truncated string might not have \0 at end
709
0
    memcpy(result, the_string, str_len);
710
0
    return result;
711
0
}
712
713
// ----------------------------------------------------------------------
714
// ScanForFirstWord()
715
//    This function finds the first word in the string "the_string" given.
716
//    A word is defined by consecutive !ascii_isspace() characters.
717
//    If no valid words are found,
718
//        return NULL and *end_ptr will contain junk
719
//    else
720
//        return the beginning of the first word and
721
//        *end_ptr will store the address of the first invalid character
722
//        (ascii_isspace() or '\0').
723
//
724
//    Precondition: (end_ptr != NULL)
725
// ----------------------------------------------------------------------
726
0
const char* ScanForFirstWord(const char* the_string, const char** end_ptr) {
727
0
    CHECK(end_ptr != nullptr) << ": precondition violated";
728
729
0
    if (the_string == nullptr) // empty string
730
0
        return nullptr;
731
732
0
    const char* curr = the_string;
733
0
    while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces
734
0
        ++curr;
735
736
0
    if (*curr == '\0') // no valid word found
737
0
        return nullptr;
738
739
    // else has a valid word
740
0
    const char* first_word = curr;
741
742
    // now locate the end of the word
743
0
    while ((*curr != '\0') && !ascii_isspace(*curr)) ++curr;
744
745
0
    *end_ptr = curr;
746
0
    return first_word;
747
0
}
748
749
// ----------------------------------------------------------------------
750
// AdvanceIdentifier()
751
//    This function returns a pointer past the end of the longest C-style
752
//    identifier that is a prefix of str or NULL if str does not start with
753
//    one.  A C-style identifier begins with an ASCII letter or underscore
754
//    and continues with ASCII letters, digits, or underscores.
755
// ----------------------------------------------------------------------
756
0
const char* AdvanceIdentifier(const char* str) {
757
    // Not using isalpha and isalnum so as not to rely on the locale.
758
    // We could have used ascii_isalpha and ascii_isalnum.
759
0
    char ch = *str++;
760
0
    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) return nullptr;
761
0
    while (true) {
762
0
        ch = *str;
763
0
        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') ||
764
0
              ch == '_'))
765
0
            return str;
766
0
        str++;
767
0
    }
768
0
}
769
770
// ----------------------------------------------------------------------
771
// IsIdentifier()
772
//    This function returns true if str is a C-style identifier.
773
//    A C-style identifier begins with an ASCII letter or underscore
774
//    and continues with ASCII letters, digits, or underscores.
775
// ----------------------------------------------------------------------
776
0
bool IsIdentifier(const char* str) {
777
0
    const char* end = AdvanceIdentifier(str);
778
0
    return end && *end == '\0';
779
0
}
780
781
0
static bool IsWildcard(Rune character) {
782
0
    return character == '*' || character == '?';
783
0
}
784
785
// Move the strings pointers to the point where they start to differ.
786
template <typename CHAR, typename NEXT>
787
void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, const CHAR** string,
788
0
                  const CHAR* string_end, NEXT next) {
789
0
    const CHAR* escape = nullptr;
790
0
    while (*pattern != pattern_end && *string != string_end) {
791
0
        if (!escape && IsWildcard(**pattern)) {
792
            // We don't want to match wildcard here, except if it's escaped.
793
0
            return;
794
0
        }
795
796
        // Check if the escapement char is found. If so, skip it and move to the
797
        // next character.
798
0
        if (!escape && **pattern == '\\') {
799
0
            escape = *pattern;
800
0
            next(pattern, pattern_end);
801
0
            continue;
802
0
        }
803
804
        // Check if the chars match, if so, increment the ptrs.
805
0
        const CHAR* pattern_next = *pattern;
806
0
        const CHAR* string_next = *string;
807
0
        Rune pattern_char = next(&pattern_next, pattern_end);
808
0
        if (pattern_char == next(&string_next, string_end) && pattern_char != Runeerror &&
809
0
            pattern_char <= Runemax) {
810
0
            *pattern = pattern_next;
811
0
            *string = string_next;
812
0
        } else {
813
            // Uh ho, it did not match, we are done. If the last char was an
814
            // escapement, that means that it was an error to advance the ptr here,
815
            // let's put it back where it was. This also mean that the MatchPattern
816
            // function will return false because if we can't match an escape char
817
            // here, then no one will.
818
0
            if (escape) {
819
0
                *pattern = escape;
820
0
            }
821
0
            return;
822
0
        }
823
824
0
        escape = nullptr;
825
0
    }
826
0
}
827
828
template <typename CHAR, typename NEXT>
829
0
void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
830
0
    while (*pattern != end) {
831
0
        if (!IsWildcard(**pattern)) return;
832
0
        next(pattern, end);
833
0
    }
834
0
}
835
836
template <typename CHAR, typename NEXT>
837
bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, const CHAR* pattern,
838
0
                   const CHAR* pattern_end, int depth, NEXT next) {
839
0
    const int kMaxDepth = 16;
840
0
    if (depth > kMaxDepth) return false;
841
842
    // Eat all the matching chars.
843
0
    EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
844
845
    // If the string is empty, then the pattern must be empty too, or contains
846
    // only wildcards.
847
0
    if (eval == eval_end) {
848
0
        EatWildcard(&pattern, pattern_end, next);
849
0
        return pattern == pattern_end;
850
0
    }
851
852
    // Pattern is empty but not string, this is not a match.
853
0
    if (pattern == pattern_end) return false;
854
855
    // If this is a question mark, then we need to compare the rest with
856
    // the current string or the string with one character eaten.
857
0
    const CHAR* next_pattern = pattern;
858
0
    next(&next_pattern, pattern_end);
859
0
    if (pattern[0] == '?') {
860
0
        if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true;
861
0
        const CHAR* next_eval = eval;
862
0
        next(&next_eval, eval_end);
863
0
        if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, next))
864
0
            return true;
865
0
    }
866
867
    // This is a *, try to match all the possible substrings with the remainder
868
    // of the pattern.
869
0
    if (pattern[0] == '*') {
870
        // Collapse duplicate wild cards (********** into *) so that the
871
        // method does not recurse unnecessarily. http://crbug.com/52839
872
0
        EatWildcard(&next_pattern, pattern_end, next);
873
874
0
        while (eval != eval_end) {
875
0
            if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next))
876
0
                return true;
877
0
            eval++;
878
0
        }
879
880
        // We reached the end of the string, let see if the pattern contains only
881
        // wildcards.
882
0
        if (eval == eval_end) {
883
0
            EatWildcard(&pattern, pattern_end, next);
884
0
            if (pattern != pattern_end) return false;
885
0
            return true;
886
0
        }
887
0
    }
888
889
0
    return false;
890
0
}
891
892
struct NextCharUTF8 {
893
0
    Rune operator()(const char** p, const char* end) {
894
0
        Rune c;
895
0
        int offset = charntorune(&c, *p, static_cast<int>(end - *p));
896
0
        *p += offset;
897
0
        return c;
898
0
    }
899
};
900
901
0
bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
902
0
    return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
903
0
                         pattern.data() + pattern.size(), 0, NextCharUTF8());
904
0
}
905
906
// ----------------------------------------------------------------------
907
// FindTagValuePair
908
//    Given a string of the form
909
//    <something><attr_sep><tag><tag_value_sep><value><attr_sep>...<string_term>
910
//    where the part before the first attr_sep is optional,
911
//    this function extracts the first tag and value, if any.
912
//    The function returns true if successful, in which case "tag" and "value"
913
//    are set to point to the beginning of the tag and the value, respectively,
914
//    and "tag_len" and "value_len" are set to the respective lengths.
915
// ----------------------------------------------------------------------
916
917
bool FindTagValuePair(const char* arg_str, char tag_value_separator, char attribute_separator,
918
                      char string_terminal, char** tag, int* tag_len, char** value,
919
0
                      int* value_len) {
920
0
    char* in_str = const_cast<char*>(arg_str); // For msvc8.
921
0
    if (in_str == nullptr) return false;
922
0
    char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'};
923
0
    char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'};
924
925
    // Look for beginning of tag
926
0
    *tag = strpbrk(in_str, attr_sep_or_term);
927
    // If string_terminal is '\0', strpbrk won't find it but return null.
928
0
    if (*tag == nullptr || **tag == string_terminal)
929
0
        *tag = in_str;
930
0
    else
931
0
        (*tag)++; // Move past separator
932
    // Now look for value...
933
0
    char* tv_sep_pos = strpbrk(*tag, tv_sep_or_term);
934
0
    if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) return false;
935
    // ...and end of value
936
0
    char* attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term);
937
938
0
    *tag_len = tv_sep_pos - *tag;
939
0
    *value = tv_sep_pos + 1;
940
0
    if (attr_sep_pos != nullptr)
941
0
        *value_len = attr_sep_pos - *value;
942
0
    else
943
0
        *value_len = strlen(*value);
944
0
    return true;
945
0
}
946
947
0
void UniformInsertString(string* s, int interval, const char* separator) {
948
0
    const size_t separator_len = strlen(separator);
949
950
0
    if (interval < 1 ||     // invalid interval
951
0
        s->empty() ||       // nothing to do
952
0
        separator_len == 0) // invalid separator
953
0
        return;
954
955
0
    int num_inserts = (s->size() - 1) / interval; // -1 to avoid appending at end
956
0
    if (num_inserts == 0)                         // nothing to do
957
0
        return;
958
959
0
    string tmp;
960
0
    tmp.reserve(s->size() + num_inserts * separator_len + 1);
961
962
0
    for (int i = 0; i < num_inserts; ++i) {
963
        // append this interval
964
0
        tmp.append(*s, i * interval, interval);
965
        // append a separator
966
0
        tmp.append(separator, separator_len);
967
0
    }
968
969
    // append the tail
970
0
    const size_t tail_pos = num_inserts * interval;
971
0
    tmp.append(*s, tail_pos, s->size() - tail_pos);
972
973
0
    s->swap(tmp);
974
0
}
975
976
0
void InsertString(string* const s, const vector<uint32>& indices, char const* const separator) {
977
0
    const unsigned num_indices(indices.size());
978
0
    if (num_indices == 0) {
979
0
        return; // nothing to do...
980
0
    }
981
982
0
    const unsigned separator_len(strlen(separator));
983
0
    if (separator_len == 0) {
984
0
        return; // still nothing to do...
985
0
    }
986
987
0
    string tmp;
988
0
    const unsigned s_len(s->size());
989
0
    tmp.reserve(s_len + separator_len * num_indices);
990
991
0
    vector<uint32>::const_iterator const ind_end(indices.end());
992
0
    auto ind_pos(indices.begin());
993
994
0
    uint32 last_pos(0);
995
0
    while (ind_pos != ind_end) {
996
0
        const uint32 pos(*ind_pos);
997
0
        DCHECK_GE(pos, last_pos);
998
0
        DCHECK_LE(pos, s_len);
999
1000
0
        tmp.append(s->substr(last_pos, pos - last_pos));
1001
0
        tmp.append(separator);
1002
1003
0
        last_pos = pos;
1004
0
        ++ind_pos;
1005
0
    }
1006
0
    tmp.append(s->substr(last_pos));
1007
1008
0
    s->swap(tmp);
1009
0
}
1010
1011
//------------------------------------------------------------------------
1012
// FindNth()
1013
//  return index of nth occurrence of c in the string,
1014
//  or string::npos if n > number of occurrences of c.
1015
//  (returns string::npos = -1 if n <= 0)
1016
//------------------------------------------------------------------------
1017
0
int FindNth(StringPiece s, char c, int n) {
1018
0
    size_t pos = string::npos;
1019
1020
0
    for (int i = 0; i < n; ++i) {
1021
0
        pos = s.find_first_of(c, pos + 1);
1022
0
        if (pos == StringPiece::npos) {
1023
0
            break;
1024
0
        }
1025
0
    }
1026
0
    return pos;
1027
0
}
1028
1029
//------------------------------------------------------------------------
1030
// ReverseFindNth()
1031
//  return index of nth-to-last occurrence of c in the string,
1032
//  or string::npos if n > number of occurrences of c.
1033
//  (returns string::npos if n <= 0)
1034
//------------------------------------------------------------------------
1035
0
int ReverseFindNth(StringPiece s, char c, int n) {
1036
0
    if (n <= 0) {
1037
0
        return static_cast<int>(StringPiece::npos);
1038
0
    }
1039
1040
0
    size_t pos = s.size();
1041
1042
0
    for (int i = 0; i < n; ++i) {
1043
        // If pos == 0, we return StringPiece::npos right away. Otherwise,
1044
        // the following find_last_of call would take (pos - 1) as string::npos,
1045
        // which means it would again search the entire input string.
1046
0
        if (pos == 0) {
1047
0
            return static_cast<int>(StringPiece::npos);
1048
0
        }
1049
0
        pos = s.find_last_of(c, pos - 1);
1050
0
        if (pos == string::npos) {
1051
0
            break;
1052
0
        }
1053
0
    }
1054
0
    return pos;
1055
0
}
1056
1057
namespace strings {
1058
1059
// FindEol()
1060
// Returns the location of the next end-of-line sequence.
1061
1062
0
StringPiece FindEol(StringPiece s) {
1063
0
    for (size_t i = 0; i < s.length(); ++i) {
1064
0
        if (s[i] == '\n') {
1065
0
            return StringPiece(s.data() + i, 1);
1066
0
        }
1067
0
        if (s[i] == '\r') {
1068
0
            if (i + 1 < s.length() && s[i + 1] == '\n') {
1069
0
                return StringPiece(s.data() + i, 2);
1070
0
            } else {
1071
0
                return StringPiece(s.data() + i, 1);
1072
0
            }
1073
0
        }
1074
0
    }
1075
0
    return StringPiece(s.data() + s.length(), 0);
1076
0
}
1077
1078
} // namespace strings
1079
1080
//------------------------------------------------------------------------
1081
// OnlyWhitespace()
1082
//  return true if string s contains only whitespace characters
1083
//------------------------------------------------------------------------
1084
0
bool OnlyWhitespace(const StringPiece& s) {
1085
0
    for (const auto& c : s) {
1086
0
        if (!ascii_isspace(c)) return false;
1087
0
    }
1088
0
    return true;
1089
0
}
1090
1091
0
string PrefixSuccessor(const StringPiece& prefix) {
1092
    // We can increment the last character in the string and be done
1093
    // unless that character is 255, in which case we have to erase the
1094
    // last character and increment the previous character, unless that
1095
    // is 255, etc. If the string is empty or consists entirely of
1096
    // 255's, we just return the empty string.
1097
0
    bool done = false;
1098
0
    string limit(prefix.data(), prefix.size());
1099
0
    int index = limit.length() - 1;
1100
0
    while (!done && index >= 0) {
1101
0
        if (limit[index] == 255) {
1102
0
            limit.erase(index);
1103
0
            index--;
1104
0
        } else {
1105
0
            limit[index]++;
1106
0
            done = true;
1107
0
        }
1108
0
    }
1109
0
    if (!done) {
1110
0
        return "";
1111
0
    } else {
1112
0
        return limit;
1113
0
    }
1114
0
}
1115
1116
0
string ImmediateSuccessor(const StringPiece& s) {
1117
    // Return the input string, with an additional NUL byte appended.
1118
0
    string out;
1119
0
    out.reserve(s.size() + 1);
1120
0
    out.append(s.data(), s.size());
1121
0
    out.push_back('\0');
1122
0
    return out;
1123
0
}
1124
1125
0
void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, string* separator) {
1126
    // Find length of common prefix
1127
0
    size_t min_length = min(start.size(), limit.size());
1128
0
    size_t diff_index = 0;
1129
0
    while ((diff_index < min_length) && (start[diff_index] == limit[diff_index])) {
1130
0
        diff_index++;
1131
0
    }
1132
1133
0
    if (diff_index >= min_length) {
1134
        // Handle the case where either string is a prefix of the other
1135
        // string, or both strings are identical.
1136
0
        start.CopyToString(separator);
1137
0
        return;
1138
0
    }
1139
1140
0
    if (diff_index + 1 == start.size()) {
1141
        // If the first difference is in the last character, do not bother
1142
        // incrementing that character since the separator will be no
1143
        // shorter than "start".
1144
0
        start.CopyToString(separator);
1145
0
        return;
1146
0
    }
1147
1148
0
    if (start[diff_index] == 0xff) {
1149
        // Avoid overflow when incrementing start[diff_index]
1150
0
        start.CopyToString(separator);
1151
0
        return;
1152
0
    }
1153
1154
0
    separator->assign(start.data(), diff_index);
1155
0
    separator->push_back(start[diff_index] + 1);
1156
0
    if (*separator >= limit) {
1157
        // Never pick a separator that causes confusion with "limit"
1158
0
        start.CopyToString(separator);
1159
0
    }
1160
0
}
1161
1162
0
int SafeSnprintf(char* str, size_t size, const char* format, ...) {
1163
0
    va_list printargs;
1164
0
    va_start(printargs, format);
1165
0
    int ncw = vsnprintf(str, size, format, printargs);
1166
0
    va_end(printargs);
1167
0
    return (ncw < size && ncw >= 0) ? ncw : 0;
1168
0
}
1169
1170
0
bool GetlineFromStdioFile(FILE* file, string* str, char delim) {
1171
0
    str->erase();
1172
0
    while (true) {
1173
0
        if (feof(file) || ferror(file)) {
1174
0
            return false;
1175
0
        }
1176
0
        int c = getc(file);
1177
0
        if (c == EOF) return false;
1178
0
        if (c == delim) return true;
1179
0
        str->push_back(c);
1180
0
    }
1181
0
}
1182
1183
namespace {
1184
1185
template <typename CHAR>
1186
0
size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1187
0
    for (size_t i = 0; i < dst_size; ++i) {
1188
0
        if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1189
0
            return i;
1190
0
    }
1191
1192
    // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1193
0
    if (dst_size != 0) dst[dst_size - 1] = 0;
1194
1195
    // Count the rest of the |src|, and return it's length in characters.
1196
0
    while (src[dst_size]) ++dst_size;
1197
0
    return dst_size;
1198
0
}
1199
1200
} // namespace
1201
1202
0
size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) {
1203
0
    return lcpyT<char>(dst, src, dst_size);
1204
0
}