Coverage Report

Created: 2025-03-10 18:45

/root/doris/be/src/gutil/strings/util.cc
Line
Count
Source (jump to first uncovered line)
1
//
2
// Copyright (C) 1999-2005 Google, Inc.
3
//
4
5
// TODO(user): visit each const_cast.  Some of them are no longer necessary
6
// because last Single Unix Spec and grte v2 are more const-y.
7
8
#include "gutil/strings/util.h"
9
10
// IWYU pragma: no_include <pstl/glue_algorithm_defs.h>
11
12
#include <assert.h>
13
#include <stdarg.h>
14
#include <stdio.h>
15
#include <string.h>
16
#include <time.h> // for FastTimeToBuffer()
17
#include <algorithm>
18
#include <iterator>
19
#include <mutex>
20
#include <ostream>
21
22
#include "common/exception.h"
23
24
using std::copy;
25
using std::max;
26
using std::min;
27
using std::reverse;
28
using std::sort;
29
using std::swap;
30
#include <string>
31
32
using std::string;
33
#include <vector>
34
35
using std::vector;
36
37
#include "common/logging.h"
38
39
#include "gutil/stl_util.h" // for string_as_array, STLAppendToString
40
#include "gutil/strings/ascii_ctype.h"
41
#include "gutil/strings/numbers.h"
42
#include "gutil/strings/stringpiece.h"
43
#include "gutil/utf/utf.h"
44
#include "gutil/stringprintf.h"
45
46
#ifdef OS_WINDOWS
47
#ifdef min // windows.h defines this to something silly
48
#undef min
49
#endif
50
#endif
51
52
// Use this instead of gmtime_r if you want to build for Windows.
53
// Windows doesn't have a 'gmtime_r', but it has the similar 'gmtime_s'.
54
// TODO(user): Probably belongs in //base:time_support.{cc|h}.
55
0
static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) {
56
#ifdef OS_WINDOWS
57
    return gmtime_s(result, timep) == 0 ? result : NULL;
58
#else
59
0
    return gmtime_r(timep, result);
60
0
#endif // OS_WINDOWS
61
0
}
62
63
0
char* strnstr(const char* haystack, const char* needle, size_t haystack_len) {
64
0
    if (*needle == '\0') {
65
0
        return const_cast<char*>(haystack);
66
0
    }
67
0
    size_t needle_len = strlen(needle);
68
0
    char* where;
69
0
    while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) {
70
0
        if (where - haystack + needle_len > haystack_len) {
71
0
            return nullptr;
72
0
        }
73
0
        if (strncmp(where, needle, needle_len) == 0) {
74
0
            return where;
75
0
        }
76
0
        haystack_len -= where + 1 - haystack;
77
0
        haystack = where + 1;
78
0
    }
79
0
    return nullptr;
80
0
}
81
82
const char* strnprefix(const char* haystack, int haystack_size, const char* needle,
83
0
                       int needle_size) {
84
0
    if (needle_size > haystack_size) {
85
0
        return nullptr;
86
0
    } else {
87
0
        if (strncmp(haystack, needle, needle_size) == 0) {
88
0
            return haystack + needle_size;
89
0
        } else {
90
0
            return nullptr;
91
0
        }
92
0
    }
93
0
}
94
95
const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle,
96
0
                           int needle_size) {
97
0
    if (needle_size > haystack_size) {
98
0
        return nullptr;
99
0
    } else {
100
0
        if (strncasecmp(haystack, needle, needle_size) == 0) {
101
0
            return haystack + needle_size;
102
0
        } else {
103
0
            return nullptr;
104
0
        }
105
0
    }
106
0
}
107
108
0
char* strcasesuffix(char* str, const char* suffix) {
109
0
    const int lenstr = strlen(str);
110
0
    const int lensuffix = strlen(suffix);
111
0
    char* strbeginningoftheend = str + lenstr - lensuffix;
112
113
0
    if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) {
114
0
        return (strbeginningoftheend);
115
0
    } else {
116
0
        return (nullptr);
117
0
    }
118
0
}
119
120
const char* strnsuffix(const char* haystack, int haystack_size, const char* needle,
121
0
                       int needle_size) {
122
0
    if (needle_size > haystack_size) {
123
0
        return nullptr;
124
0
    } else {
125
0
        const char* start = haystack + haystack_size - needle_size;
126
0
        if (strncmp(start, needle, needle_size) == 0) {
127
0
            return start;
128
0
        } else {
129
0
            return nullptr;
130
0
        }
131
0
    }
132
0
}
133
134
const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle,
135
0
                           int needle_size) {
136
0
    if (needle_size > haystack_size) {
137
0
        return nullptr;
138
0
    } else {
139
0
        const char* start = haystack + haystack_size - needle_size;
140
0
        if (strncasecmp(start, needle, needle_size) == 0) {
141
0
            return start;
142
0
        } else {
143
0
            return nullptr;
144
0
        }
145
0
    }
146
0
}
147
148
0
char* strchrnth(const char* str, const char& c, int n) {
149
0
    if (str == nullptr) return nullptr;
150
0
    if (n <= 0) return const_cast<char*>(str);
151
0
    const char* sp;
152
0
    int k = 0;
153
0
    for (sp = str; *sp != '\0'; sp++) {
154
0
        if (*sp == c) {
155
0
            ++k;
156
0
            if (k >= n) break;
157
0
        }
158
0
    }
159
0
    return (k < n) ? nullptr : const_cast<char*>(sp);
160
0
}
161
162
0
char* AdjustedLastPos(const char* str, char separator, int n) {
163
0
    if (str == nullptr) return nullptr;
164
0
    const char* pos = nullptr;
165
0
    if (n > 0) pos = strchrnth(str, separator, n);
166
167
    // if n <= 0 or separator appears fewer than n times, get the last occurrence
168
0
    if (pos == nullptr) pos = strrchr(str, separator);
169
0
    return const_cast<char*>(pos);
170
0
}
171
172
// ----------------------------------------------------------------------
173
// Misc. routines
174
// ----------------------------------------------------------------------
175
176
0
bool IsAscii(const char* str, int len) {
177
0
    const char* end = str + len;
178
0
    while (str < end) {
179
0
        if (!ascii_isascii(*str++)) {
180
0
            return false;
181
0
        }
182
0
    }
183
0
    return true;
184
0
}
185
186
// ----------------------------------------------------------------------
187
// StringReplace()
188
//    Give me a string and two patterns "old" and "new", and I replace
189
//    the first instance of "old" in the string with "new", if it
190
//    exists.  If "replace_all" is true then call this repeatedly until it
191
//    fails.  RETURN a new string, regardless of whether the replacement
192
//    happened or not.
193
// ----------------------------------------------------------------------
194
195
string StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub,
196
0
                     bool replace_all) {
197
0
    string ret;
198
0
    StringReplace(s, oldsub, newsub, replace_all, &ret);
199
0
    return ret;
200
0
}
201
202
// ----------------------------------------------------------------------
203
// StringReplace()
204
//    Replace the "old" pattern with the "new" pattern in a string,
205
//    and append the result to "res".  If replace_all is false,
206
//    it only replaces the first instance of "old."
207
// ----------------------------------------------------------------------
208
209
void StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub,
210
0
                   bool replace_all, string* res) {
211
0
    if (oldsub.empty()) {
212
0
        res->append(s.data(), s.length()); // If empty, append the given string.
213
0
        return;
214
0
    }
215
216
0
    StringPiece::size_type start_pos = 0;
217
0
    StringPiece::size_type pos;
218
0
    do {
219
0
        pos = s.find(oldsub, start_pos);
220
0
        if (pos == StringPiece::npos) {
221
0
            break;
222
0
        }
223
0
        res->append(s.data() + start_pos, pos - start_pos);
224
0
        res->append(newsub.data(), newsub.length());
225
        // Start searching again after the "old".
226
0
        start_pos = pos + oldsub.length();
227
0
    } while (replace_all);
228
0
    res->append(s.data() + start_pos, s.length() - start_pos);
229
0
}
230
231
// ----------------------------------------------------------------------
232
// GlobalReplaceSubstring()
233
//    Replaces all instances of a substring in a string.  Does nothing
234
//    if 'substring' is empty.  Returns the number of replacements.
235
//
236
//    NOTE: The string pieces must not overlap s.
237
// ----------------------------------------------------------------------
238
239
int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement,
240
0
                           string* s) {
241
0
    CHECK(s != nullptr);
242
0
    if (s->empty() || substring.empty()) return 0;
243
0
    string tmp;
244
0
    int num_replacements = 0;
245
0
    size_t pos = 0;
246
0
    for (size_t match_pos = s->find(substring.data(), pos, substring.length());
247
0
         match_pos != string::npos; pos = match_pos + substring.length(),
248
0
                match_pos = s->find(substring.data(), pos, substring.length())) {
249
0
        ++num_replacements;
250
        // Append the original content before the match.
251
0
        tmp.append(*s, pos, match_pos - pos);
252
        // Append the replacement for the match.
253
0
        tmp.append(replacement.begin(), replacement.end());
254
0
    }
255
    // Append the content after the last match. If no replacements were made, the
256
    // original string is left untouched.
257
0
    if (num_replacements > 0) {
258
0
        tmp.append(*s, pos, s->length() - pos);
259
0
        s->swap(tmp);
260
0
    }
261
0
    return num_replacements;
262
0
}
263
264
//---------------------------------------------------------------------------
265
// RemoveStrings()
266
//   Remove the strings from v given by the (sorted least -> greatest)
267
//   numbers in indices.
268
//   Order of v is *not* preserved.
269
//---------------------------------------------------------------------------
270
0
void RemoveStrings(vector<string>* v, const vector<int>& indices) {
271
0
    assert(v);
272
0
    assert(indices.size() <= v->size());
273
    // go from largest index to smallest so that smaller indices aren't
274
    // invalidated
275
0
    for (int lcv = indices.size() - 1; lcv >= 0; --lcv) {
276
0
#ifndef NDEBUG
277
        // verify that indices is sorted least->greatest
278
0
        if (indices.size() >= 2 && lcv > 0)
279
            // use LT and not LE because we should never see repeat indices
280
0
            CHECK_LT(indices[lcv - 1], indices[lcv]);
281
0
#endif
282
0
        assert(indices[lcv] >= 0);
283
0
        assert(indices[lcv] < v->size());
284
0
        swap((*v)[indices[lcv]], v->back());
285
0
        v->pop_back();
286
0
    }
287
0
}
288
289
// ----------------------------------------------------------------------
290
// gstrcasestr is a case-insensitive strstr. Eventually we should just
291
// use the GNU libc version of strcasestr, but it isn't compiled into
292
// RedHat Linux by default in version 6.1.
293
//
294
// This function uses ascii_tolower() instead of tolower(), for speed.
295
// ----------------------------------------------------------------------
296
297
0
char* gstrcasestr(const char* haystack, const char* needle) {
298
0
    char c, sc;
299
0
    size_t len;
300
301
0
    if ((c = *needle++) != 0) {
302
0
        c = ascii_tolower(c);
303
0
        len = strlen(needle);
304
0
        do {
305
0
            do {
306
0
                if ((sc = *haystack++) == 0) return nullptr;
307
0
            } while (ascii_tolower(sc) != c);
308
0
        } while (strncasecmp(haystack, needle, len) != 0);
309
0
        haystack--;
310
0
    }
311
    // This is a const violation but strstr() also returns a char*.
312
0
    return const_cast<char*>(haystack);
313
0
}
314
315
// ----------------------------------------------------------------------
316
// gstrncasestr is a case-insensitive strnstr.
317
//    Finds the occurence of the (null-terminated) needle in the
318
//    haystack, where no more than len bytes of haystack is searched.
319
//    Characters that appear after a '\0' in the haystack are not searched.
320
//
321
// This function uses ascii_tolower() instead of tolower(), for speed.
322
// ----------------------------------------------------------------------
323
0
const char* gstrncasestr(const char* haystack, const char* needle, size_t len) {
324
0
    char c, sc;
325
326
0
    if ((c = *needle++) != 0) {
327
0
        c = ascii_tolower(c);
328
0
        size_t needle_len = strlen(needle);
329
0
        do {
330
0
            do {
331
0
                if (len-- <= needle_len || 0 == (sc = *haystack++)) return nullptr;
332
0
            } while (ascii_tolower(sc) != c);
333
0
        } while (strncasecmp(haystack, needle, needle_len) != 0);
334
0
        haystack--;
335
0
    }
336
0
    return haystack;
337
0
}
338
339
// ----------------------------------------------------------------------
340
// gstrncasestr is a case-insensitive strnstr.
341
//    Finds the occurence of the (null-terminated) needle in the
342
//    haystack, where no more than len bytes of haystack is searched.
343
//    Characters that appear after a '\0' in the haystack are not searched.
344
//
345
//    This function uses ascii_tolower() instead of tolower(), for speed.
346
// ----------------------------------------------------------------------
347
0
char* gstrncasestr(char* haystack, const char* needle, size_t len) {
348
0
    return const_cast<char*>(gstrncasestr(static_cast<const char*>(haystack), needle, len));
349
0
}
350
// ----------------------------------------------------------------------
351
// gstrncasestr_split performs a case insensitive search
352
// on (prefix, non_alpha, suffix).
353
// ----------------------------------------------------------------------
354
char* gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix,
355
0
                         size_t n) {
356
0
    int prelen = prefix == nullptr ? 0 : strlen(prefix);
357
0
    int suflen = suffix == nullptr ? 0 : strlen(suffix);
358
359
    // adjust the string and its length to avoid unnessary searching.
360
    // an added benefit is to avoid unnecessary range checks in the if
361
    // statement in the inner loop.
362
0
    if (suflen + prelen >= n) return nullptr;
363
0
    str += prelen;
364
0
    n -= prelen;
365
0
    n -= suflen;
366
367
0
    const char* where = nullptr;
368
369
    // for every occurance of non_alpha in the string ...
370
0
    while ((where = static_cast<const char*>(memchr(str, non_alpha, n))) != nullptr) {
371
        // ... test whether it is followed by suffix and preceded by prefix
372
0
        if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) &&
373
0
            (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) {
374
0
            return const_cast<char*>(where - prelen);
375
0
        }
376
        // if not, advance the pointer, and adjust the length according
377
0
        n -= (where + 1) - str;
378
0
        str = where + 1;
379
0
    }
380
381
0
    return nullptr;
382
0
}
383
384
// ----------------------------------------------------------------------
385
// strcasestr_alnum is like a case-insensitive strstr, except that it
386
// ignores non-alphanumeric characters in both strings for the sake of
387
// comparison.
388
//
389
// This function uses ascii_isalnum() instead of isalnum() and
390
// ascii_tolower() instead of tolower(), for speed.
391
//
392
// E.g. strcasestr_alnum("i use google all the time", " !!Google!! ")
393
// returns pointer to "google all the time"
394
// ----------------------------------------------------------------------
395
0
char* strcasestr_alnum(const char* haystack, const char* needle) {
396
0
    const char* haystack_ptr;
397
0
    const char* needle_ptr;
398
399
    // Skip non-alnums at beginning
400
0
    while (!ascii_isalnum(*needle))
401
0
        if (*needle++ == '\0') return const_cast<char*>(haystack);
402
0
    needle_ptr = needle;
403
404
    // Skip non-alnums at beginning
405
0
    while (!ascii_isalnum(*haystack))
406
0
        if (*haystack++ == '\0') return nullptr;
407
0
    haystack_ptr = haystack;
408
409
0
    while (*needle_ptr != '\0') {
410
        // Non-alnums - advance
411
0
        while (!ascii_isalnum(*needle_ptr))
412
0
            if (*needle_ptr++ == '\0') return const_cast<char*>(haystack);
413
414
0
        while (!ascii_isalnum(*haystack_ptr))
415
0
            if (*haystack_ptr++ == '\0') return nullptr;
416
417
0
        if (ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr)) {
418
            // Case-insensitive match - advance
419
0
            needle_ptr++;
420
0
            haystack_ptr++;
421
0
        } else {
422
            // No match - rollback to next start point in haystack
423
0
            haystack++;
424
0
            while (!ascii_isalnum(*haystack))
425
0
                if (*haystack++ == '\0') return nullptr;
426
0
            haystack_ptr = haystack;
427
0
            needle_ptr = needle;
428
0
        }
429
0
    }
430
0
    return const_cast<char*>(haystack);
431
0
}
432
433
// ----------------------------------------------------------------------
434
// CountSubstring()
435
//    Return the number times a "substring" appears in the "text"
436
//    NOTE: this function's complexity is O(|text| * |substring|)
437
//          It is meant for short "text" (such as to ensure the
438
//          printf format string has the right number of arguments).
439
//          DO NOT pass in long "text".
440
// ----------------------------------------------------------------------
441
0
int CountSubstring(StringPiece text, StringPiece substring) {
442
0
    CHECK_GT(substring.length(), 0);
443
444
0
    int count = 0;
445
0
    StringPiece::size_type curr = 0;
446
0
    while (StringPiece::npos != (curr = text.find(substring, curr))) {
447
0
        ++count;
448
0
        ++curr;
449
0
    }
450
0
    return count;
451
0
}
452
453
// ----------------------------------------------------------------------
454
// strstr_delimited()
455
//    Just like strstr(), except it ensures that the needle appears as
456
//    a complete item (or consecutive series of items) in a delimited
457
//    list.
458
//
459
//    Like strstr(), returns haystack if needle is empty, or NULL if
460
//    either needle/haystack is NULL.
461
// ----------------------------------------------------------------------
462
0
const char* strstr_delimited(const char* haystack, const char* needle, char delim) {
463
0
    if (!needle || !haystack) return nullptr;
464
0
    if (*needle == '\0') return haystack;
465
466
0
    int needle_len = strlen(needle);
467
468
0
    while (true) {
469
        // Skip any leading delimiters.
470
0
        while (*haystack == delim) ++haystack;
471
472
        // Walk down the haystack, matching every character in the needle.
473
0
        const char* this_match = haystack;
474
0
        int i = 0;
475
0
        for (; i < needle_len; i++) {
476
0
            if (*haystack != needle[i]) {
477
                // We ran out of haystack or found a non-matching character.
478
0
                break;
479
0
            }
480
0
            ++haystack;
481
0
        }
482
483
        // If we matched the whole needle, ensure that it's properly delimited.
484
0
        if (i == needle_len && (*haystack == '\0' || *haystack == delim)) {
485
0
            return this_match;
486
0
        }
487
488
        // No match. Consume non-delimiter characters until we run out of them.
489
0
        while (*haystack != delim) {
490
0
            if (*haystack == '\0') return nullptr;
491
0
            ++haystack;
492
0
        }
493
0
    }
494
0
    throw doris::Exception(doris::Status::FatalError("Unreachable statement"));
495
0
}
496
497
// ----------------------------------------------------------------------
498
// Older versions of libc have a buggy strsep.
499
// ----------------------------------------------------------------------
500
501
0
char* gstrsep(char** stringp, const char* delim) {
502
0
    char* s;
503
0
    const char* spanp;
504
0
    int c, sc;
505
0
    char* tok;
506
507
0
    if ((s = *stringp) == nullptr) return nullptr;
508
509
0
    tok = s;
510
0
    while (true) {
511
0
        c = *s++;
512
0
        spanp = delim;
513
0
        do {
514
0
            if ((sc = *spanp++) == c) {
515
0
                if (c == 0)
516
0
                    s = nullptr;
517
0
                else
518
0
                    s[-1] = 0;
519
0
                *stringp = s;
520
0
                return tok;
521
0
            }
522
0
        } while (sc != 0);
523
0
    }
524
525
0
    return nullptr; /* should not happen */
526
0
}
527
528
0
void FastStringAppend(string* s, const char* data, int len) {
529
0
    STLAppendToString(s, data, len);
530
0
}
531
532
// TODO(user): add a microbenchmark and revisit
533
// the optimizations done here.
534
//
535
// Several converters use this table to reduce
536
// division and modulo operations.
537
extern const char two_ASCII_digits[100][2];
538
539
const char two_ASCII_digits[100][2] = {
540
        {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'},
541
        {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'},
542
        {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, {'2', '0'},
543
        {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'},
544
        {'2', '8'}, {'2', '9'}, {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
545
        {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'},
546
        {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'},
547
        {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'},
548
        {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, {'6', '0'}, {'6', '1'}, {'6', '2'},
549
        {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
550
        {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'},
551
        {'7', '7'}, {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'},
552
        {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, {'9', '0'},
553
        {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'},
554
        {'9', '8'}, {'9', '9'}};
555
556
0
static inline void PutTwoDigits(int i, char* p) {
557
0
    DCHECK_GE(i, 0);
558
0
    DCHECK_LT(i, 100);
559
0
    p[0] = two_ASCII_digits[i][0];
560
0
    p[1] = two_ASCII_digits[i][1];
561
0
}
562
563
0
char* FastTimeToBuffer(time_t s, char* buffer) {
564
0
    if (s == 0) {
565
0
        time(&s);
566
0
    }
567
568
0
    struct tm tm;
569
0
    if (PortableSafeGmtime(&s, &tm) == nullptr) {
570
        // Error message must fit in 30-char buffer.
571
0
        memcpy(buffer, "Invalid:", sizeof("Invalid:"));
572
0
        FastInt64ToBufferLeft(s, buffer + strlen(buffer));
573
0
        return buffer;
574
0
    }
575
576
    // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
577
    // but strftime does locale stuff which we do not want
578
    // plus strftime takes > 10x the time of hard code
579
580
0
    const char* weekday_name = "Xxx";
581
0
    switch (tm.tm_wday) {
582
0
    default: {
583
0
        DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday;
584
0
    } break;
585
0
    case 0:
586
0
        weekday_name = "Sun";
587
0
        break;
588
0
    case 1:
589
0
        weekday_name = "Mon";
590
0
        break;
591
0
    case 2:
592
0
        weekday_name = "Tue";
593
0
        break;
594
0
    case 3:
595
0
        weekday_name = "Wed";
596
0
        break;
597
0
    case 4:
598
0
        weekday_name = "Thu";
599
0
        break;
600
0
    case 5:
601
0
        weekday_name = "Fri";
602
0
        break;
603
0
    case 6:
604
0
        weekday_name = "Sat";
605
0
        break;
606
0
    }
607
608
0
    const char* month_name = "Xxx";
609
0
    switch (tm.tm_mon) {
610
0
    default: {
611
0
        DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon;
612
0
    } break;
613
0
    case 0:
614
0
        month_name = "Jan";
615
0
        break;
616
0
    case 1:
617
0
        month_name = "Feb";
618
0
        break;
619
0
    case 2:
620
0
        month_name = "Mar";
621
0
        break;
622
0
    case 3:
623
0
        month_name = "Apr";
624
0
        break;
625
0
    case 4:
626
0
        month_name = "May";
627
0
        break;
628
0
    case 5:
629
0
        month_name = "Jun";
630
0
        break;
631
0
    case 6:
632
0
        month_name = "Jul";
633
0
        break;
634
0
    case 7:
635
0
        month_name = "Aug";
636
0
        break;
637
0
    case 8:
638
0
        month_name = "Sep";
639
0
        break;
640
0
    case 9:
641
0
        month_name = "Oct";
642
0
        break;
643
0
    case 10:
644
0
        month_name = "Nov";
645
0
        break;
646
0
    case 11:
647
0
        month_name = "Dec";
648
0
        break;
649
0
    }
650
651
    // Write out the buffer.
652
653
0
    memcpy(buffer + 0, weekday_name, 3);
654
0
    buffer[3] = ',';
655
0
    buffer[4] = ' ';
656
657
0
    PutTwoDigits(tm.tm_mday, buffer + 5);
658
0
    buffer[7] = ' ';
659
660
0
    memcpy(buffer + 8, month_name, 3);
661
0
    buffer[11] = ' ';
662
663
0
    int32 year = tm.tm_year + 1900;
664
0
    PutTwoDigits(year / 100, buffer + 12);
665
0
    PutTwoDigits(year % 100, buffer + 14);
666
0
    buffer[16] = ' ';
667
668
0
    PutTwoDigits(tm.tm_hour, buffer + 17);
669
0
    buffer[19] = ':';
670
671
0
    PutTwoDigits(tm.tm_min, buffer + 20);
672
0
    buffer[22] = ':';
673
674
0
    PutTwoDigits(tm.tm_sec, buffer + 23);
675
676
    // includes ending NUL
677
0
    memcpy(buffer + 25, " GMT", 5);
678
679
0
    return buffer;
680
0
}
681
682
// ----------------------------------------------------------------------
683
// strdup_with_new()
684
// strndup_with_new()
685
//
686
//    strdup_with_new() is the same as strdup() except that the memory
687
//    is allocated by new[] and hence an exception will be generated
688
//    if out of memory.
689
//
690
//    strndup_with_new() is the same as strdup_with_new() except that it will
691
//    copy up to the specified number of characters.  This function
692
//    is useful when we want to copy a substring out of a string
693
//    and didn't want to (or cannot) modify the string
694
// ----------------------------------------------------------------------
695
0
char* strdup_with_new(const char* the_string) {
696
0
    if (the_string == nullptr)
697
0
        return nullptr;
698
0
    else
699
0
        return strndup_with_new(the_string, strlen(the_string));
700
0
}
701
702
0
char* strndup_with_new(const char* the_string, int max_length) {
703
0
    if (the_string == nullptr) return nullptr;
704
0
    size_t str_len = strlen(the_string);
705
0
    if (str_len > max_length) {
706
0
        str_len = max_length;
707
0
    }
708
0
    auto result = new char[str_len + 1];
709
0
    result[max_length] = '\0'; // truncated string might not have \0 at end
710
0
    memcpy(result, the_string, str_len);
711
0
    return result;
712
0
}
713
714
// ----------------------------------------------------------------------
715
// ScanForFirstWord()
716
//    This function finds the first word in the string "the_string" given.
717
//    A word is defined by consecutive !ascii_isspace() characters.
718
//    If no valid words are found,
719
//        return NULL and *end_ptr will contain junk
720
//    else
721
//        return the beginning of the first word and
722
//        *end_ptr will store the address of the first invalid character
723
//        (ascii_isspace() or '\0').
724
//
725
//    Precondition: (end_ptr != NULL)
726
// ----------------------------------------------------------------------
727
0
const char* ScanForFirstWord(const char* the_string, const char** end_ptr) {
728
0
    CHECK(end_ptr != nullptr) << ": precondition violated";
729
730
0
    if (the_string == nullptr) // empty string
731
0
        return nullptr;
732
733
0
    const char* curr = the_string;
734
0
    while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces
735
0
        ++curr;
736
737
0
    if (*curr == '\0') // no valid word found
738
0
        return nullptr;
739
740
    // else has a valid word
741
0
    const char* first_word = curr;
742
743
    // now locate the end of the word
744
0
    while ((*curr != '\0') && !ascii_isspace(*curr)) ++curr;
745
746
0
    *end_ptr = curr;
747
0
    return first_word;
748
0
}
749
750
// ----------------------------------------------------------------------
751
// AdvanceIdentifier()
752
//    This function returns a pointer past the end of the longest C-style
753
//    identifier that is a prefix of str or NULL if str does not start with
754
//    one.  A C-style identifier begins with an ASCII letter or underscore
755
//    and continues with ASCII letters, digits, or underscores.
756
// ----------------------------------------------------------------------
757
0
const char* AdvanceIdentifier(const char* str) {
758
    // Not using isalpha and isalnum so as not to rely on the locale.
759
    // We could have used ascii_isalpha and ascii_isalnum.
760
0
    char ch = *str++;
761
0
    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) return nullptr;
762
0
    while (true) {
763
0
        ch = *str;
764
0
        if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') ||
765
0
              ch == '_'))
766
0
            return str;
767
0
        str++;
768
0
    }
769
0
}
770
771
// ----------------------------------------------------------------------
772
// IsIdentifier()
773
//    This function returns true if str is a C-style identifier.
774
//    A C-style identifier begins with an ASCII letter or underscore
775
//    and continues with ASCII letters, digits, or underscores.
776
// ----------------------------------------------------------------------
777
0
bool IsIdentifier(const char* str) {
778
0
    const char* end = AdvanceIdentifier(str);
779
0
    return end && *end == '\0';
780
0
}
781
782
0
static bool IsWildcard(Rune character) {
783
0
    return character == '*' || character == '?';
784
0
}
785
786
// Move the strings pointers to the point where they start to differ.
787
template <typename CHAR, typename NEXT>
788
void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, const CHAR** string,
789
0
                  const CHAR* string_end, NEXT next) {
790
0
    const CHAR* escape = nullptr;
791
0
    while (*pattern != pattern_end && *string != string_end) {
792
0
        if (!escape && IsWildcard(**pattern)) {
793
            // We don't want to match wildcard here, except if it's escaped.
794
0
            return;
795
0
        }
796
797
        // Check if the escapement char is found. If so, skip it and move to the
798
        // next character.
799
0
        if (!escape && **pattern == '\\') {
800
0
            escape = *pattern;
801
0
            next(pattern, pattern_end);
802
0
            continue;
803
0
        }
804
805
        // Check if the chars match, if so, increment the ptrs.
806
0
        const CHAR* pattern_next = *pattern;
807
0
        const CHAR* string_next = *string;
808
0
        Rune pattern_char = next(&pattern_next, pattern_end);
809
0
        if (pattern_char == next(&string_next, string_end) && pattern_char != Runeerror &&
810
0
            pattern_char <= Runemax) {
811
0
            *pattern = pattern_next;
812
0
            *string = string_next;
813
0
        } else {
814
            // Uh ho, it did not match, we are done. If the last char was an
815
            // escapement, that means that it was an error to advance the ptr here,
816
            // let's put it back where it was. This also mean that the MatchPattern
817
            // function will return false because if we can't match an escape char
818
            // here, then no one will.
819
0
            if (escape) {
820
0
                *pattern = escape;
821
0
            }
822
0
            return;
823
0
        }
824
825
0
        escape = nullptr;
826
0
    }
827
0
}
828
829
template <typename CHAR, typename NEXT>
830
0
void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
831
0
    while (*pattern != end) {
832
0
        if (!IsWildcard(**pattern)) return;
833
0
        next(pattern, end);
834
0
    }
835
0
}
836
837
template <typename CHAR, typename NEXT>
838
bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, const CHAR* pattern,
839
0
                   const CHAR* pattern_end, int depth, NEXT next) {
840
0
    const int kMaxDepth = 16;
841
0
    if (depth > kMaxDepth) return false;
842
843
    // Eat all the matching chars.
844
0
    EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
845
846
    // If the string is empty, then the pattern must be empty too, or contains
847
    // only wildcards.
848
0
    if (eval == eval_end) {
849
0
        EatWildcard(&pattern, pattern_end, next);
850
0
        return pattern == pattern_end;
851
0
    }
852
853
    // Pattern is empty but not string, this is not a match.
854
0
    if (pattern == pattern_end) return false;
855
856
    // If this is a question mark, then we need to compare the rest with
857
    // the current string or the string with one character eaten.
858
0
    const CHAR* next_pattern = pattern;
859
0
    next(&next_pattern, pattern_end);
860
0
    if (pattern[0] == '?') {
861
0
        if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true;
862
0
        const CHAR* next_eval = eval;
863
0
        next(&next_eval, eval_end);
864
0
        if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, next))
865
0
            return true;
866
0
    }
867
868
    // This is a *, try to match all the possible substrings with the remainder
869
    // of the pattern.
870
0
    if (pattern[0] == '*') {
871
        // Collapse duplicate wild cards (********** into *) so that the
872
        // method does not recurse unnecessarily. http://crbug.com/52839
873
0
        EatWildcard(&next_pattern, pattern_end, next);
874
875
0
        while (eval != eval_end) {
876
0
            if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next))
877
0
                return true;
878
0
            eval++;
879
0
        }
880
881
        // We reached the end of the string, let see if the pattern contains only
882
        // wildcards.
883
0
        if (eval == eval_end) {
884
0
            EatWildcard(&pattern, pattern_end, next);
885
0
            if (pattern != pattern_end) return false;
886
0
            return true;
887
0
        }
888
0
    }
889
890
0
    return false;
891
0
}
892
893
struct NextCharUTF8 {
894
0
    Rune operator()(const char** p, const char* end) {
895
0
        Rune c;
896
0
        int offset = charntorune(&c, *p, static_cast<int>(end - *p));
897
0
        *p += offset;
898
0
        return c;
899
0
    }
900
};
901
902
0
bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) {
903
0
    return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
904
0
                         pattern.data() + pattern.size(), 0, NextCharUTF8());
905
0
}
906
907
// ----------------------------------------------------------------------
908
// FindTagValuePair
909
//    Given a string of the form
910
//    <something><attr_sep><tag><tag_value_sep><value><attr_sep>...<string_term>
911
//    where the part before the first attr_sep is optional,
912
//    this function extracts the first tag and value, if any.
913
//    The function returns true if successful, in which case "tag" and "value"
914
//    are set to point to the beginning of the tag and the value, respectively,
915
//    and "tag_len" and "value_len" are set to the respective lengths.
916
// ----------------------------------------------------------------------
917
918
bool FindTagValuePair(const char* arg_str, char tag_value_separator, char attribute_separator,
919
                      char string_terminal, char** tag, int* tag_len, char** value,
920
0
                      int* value_len) {
921
0
    char* in_str = const_cast<char*>(arg_str); // For msvc8.
922
0
    if (in_str == nullptr) return false;
923
0
    char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'};
924
0
    char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'};
925
926
    // Look for beginning of tag
927
0
    *tag = strpbrk(in_str, attr_sep_or_term);
928
    // If string_terminal is '\0', strpbrk won't find it but return null.
929
0
    if (*tag == nullptr || **tag == string_terminal)
930
0
        *tag = in_str;
931
0
    else
932
0
        (*tag)++; // Move past separator
933
    // Now look for value...
934
0
    char* tv_sep_pos = strpbrk(*tag, tv_sep_or_term);
935
0
    if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) return false;
936
    // ...and end of value
937
0
    char* attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term);
938
939
0
    *tag_len = tv_sep_pos - *tag;
940
0
    *value = tv_sep_pos + 1;
941
0
    if (attr_sep_pos != nullptr)
942
0
        *value_len = attr_sep_pos - *value;
943
0
    else
944
0
        *value_len = strlen(*value);
945
0
    return true;
946
0
}
947
948
0
void UniformInsertString(string* s, int interval, const char* separator) {
949
0
    const size_t separator_len = strlen(separator);
950
951
0
    if (interval < 1 ||     // invalid interval
952
0
        s->empty() ||       // nothing to do
953
0
        separator_len == 0) // invalid separator
954
0
        return;
955
956
0
    int num_inserts = (s->size() - 1) / interval; // -1 to avoid appending at end
957
0
    if (num_inserts == 0)                         // nothing to do
958
0
        return;
959
960
0
    string tmp;
961
0
    tmp.reserve(s->size() + num_inserts * separator_len + 1);
962
963
0
    for (int i = 0; i < num_inserts; ++i) {
964
        // append this interval
965
0
        tmp.append(*s, i * interval, interval);
966
        // append a separator
967
0
        tmp.append(separator, separator_len);
968
0
    }
969
970
    // append the tail
971
0
    const size_t tail_pos = num_inserts * interval;
972
0
    tmp.append(*s, tail_pos, s->size() - tail_pos);
973
974
0
    s->swap(tmp);
975
0
}
976
977
0
void InsertString(string* const s, const vector<uint32>& indices, char const* const separator) {
978
0
    const unsigned num_indices(indices.size());
979
0
    if (num_indices == 0) {
980
0
        return; // nothing to do...
981
0
    }
982
983
0
    const unsigned separator_len(strlen(separator));
984
0
    if (separator_len == 0) {
985
0
        return; // still nothing to do...
986
0
    }
987
988
0
    string tmp;
989
0
    const unsigned s_len(s->size());
990
0
    tmp.reserve(s_len + separator_len * num_indices);
991
992
0
    vector<uint32>::const_iterator const ind_end(indices.end());
993
0
    auto ind_pos(indices.begin());
994
995
0
    uint32 last_pos(0);
996
0
    while (ind_pos != ind_end) {
997
0
        const uint32 pos(*ind_pos);
998
0
        DCHECK_GE(pos, last_pos);
999
0
        DCHECK_LE(pos, s_len);
1000
1001
0
        tmp.append(s->substr(last_pos, pos - last_pos));
1002
0
        tmp.append(separator);
1003
1004
0
        last_pos = pos;
1005
0
        ++ind_pos;
1006
0
    }
1007
0
    tmp.append(s->substr(last_pos));
1008
1009
0
    s->swap(tmp);
1010
0
}
1011
1012
//------------------------------------------------------------------------
1013
// FindNth()
1014
//  return index of nth occurrence of c in the string,
1015
//  or string::npos if n > number of occurrences of c.
1016
//  (returns string::npos = -1 if n <= 0)
1017
//------------------------------------------------------------------------
1018
0
int FindNth(StringPiece s, char c, int n) {
1019
0
    size_t pos = string::npos;
1020
1021
0
    for (int i = 0; i < n; ++i) {
1022
0
        pos = s.find_first_of(c, pos + 1);
1023
0
        if (pos == StringPiece::npos) {
1024
0
            break;
1025
0
        }
1026
0
    }
1027
0
    return pos;
1028
0
}
1029
1030
//------------------------------------------------------------------------
1031
// ReverseFindNth()
1032
//  return index of nth-to-last occurrence of c in the string,
1033
//  or string::npos if n > number of occurrences of c.
1034
//  (returns string::npos if n <= 0)
1035
//------------------------------------------------------------------------
1036
0
int ReverseFindNth(StringPiece s, char c, int n) {
1037
0
    if (n <= 0) {
1038
0
        return static_cast<int>(StringPiece::npos);
1039
0
    }
1040
1041
0
    size_t pos = s.size();
1042
1043
0
    for (int i = 0; i < n; ++i) {
1044
        // If pos == 0, we return StringPiece::npos right away. Otherwise,
1045
        // the following find_last_of call would take (pos - 1) as string::npos,
1046
        // which means it would again search the entire input string.
1047
0
        if (pos == 0) {
1048
0
            return static_cast<int>(StringPiece::npos);
1049
0
        }
1050
0
        pos = s.find_last_of(c, pos - 1);
1051
0
        if (pos == string::npos) {
1052
0
            break;
1053
0
        }
1054
0
    }
1055
0
    return pos;
1056
0
}
1057
1058
namespace strings {
1059
1060
// FindEol()
1061
// Returns the location of the next end-of-line sequence.
1062
1063
0
StringPiece FindEol(StringPiece s) {
1064
0
    for (size_t i = 0; i < s.length(); ++i) {
1065
0
        if (s[i] == '\n') {
1066
0
            return StringPiece(s.data() + i, 1);
1067
0
        }
1068
0
        if (s[i] == '\r') {
1069
0
            if (i + 1 < s.length() && s[i + 1] == '\n') {
1070
0
                return StringPiece(s.data() + i, 2);
1071
0
            } else {
1072
0
                return StringPiece(s.data() + i, 1);
1073
0
            }
1074
0
        }
1075
0
    }
1076
0
    return StringPiece(s.data() + s.length(), 0);
1077
0
}
1078
1079
} // namespace strings
1080
1081
//------------------------------------------------------------------------
1082
// OnlyWhitespace()
1083
//  return true if string s contains only whitespace characters
1084
//------------------------------------------------------------------------
1085
0
bool OnlyWhitespace(const StringPiece& s) {
1086
0
    for (const auto& c : s) {
1087
0
        if (!ascii_isspace(c)) return false;
1088
0
    }
1089
0
    return true;
1090
0
}
1091
1092
0
string PrefixSuccessor(const StringPiece& prefix) {
1093
    // We can increment the last character in the string and be done
1094
    // unless that character is 255, in which case we have to erase the
1095
    // last character and increment the previous character, unless that
1096
    // is 255, etc. If the string is empty or consists entirely of
1097
    // 255's, we just return the empty string.
1098
0
    bool done = false;
1099
0
    string limit(prefix.data(), prefix.size());
1100
0
    int index = limit.length() - 1;
1101
0
    while (!done && index >= 0) {
1102
0
        if (limit[index] == 255) {
1103
0
            limit.erase(index);
1104
0
            index--;
1105
0
        } else {
1106
0
            limit[index]++;
1107
0
            done = true;
1108
0
        }
1109
0
    }
1110
0
    if (!done) {
1111
0
        return "";
1112
0
    } else {
1113
0
        return limit;
1114
0
    }
1115
0
}
1116
1117
0
string ImmediateSuccessor(const StringPiece& s) {
1118
    // Return the input string, with an additional NUL byte appended.
1119
0
    string out;
1120
0
    out.reserve(s.size() + 1);
1121
0
    out.append(s.data(), s.size());
1122
0
    out.push_back('\0');
1123
0
    return out;
1124
0
}
1125
1126
0
void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, string* separator) {
1127
    // Find length of common prefix
1128
0
    size_t min_length = min(start.size(), limit.size());
1129
0
    size_t diff_index = 0;
1130
0
    while ((diff_index < min_length) && (start[diff_index] == limit[diff_index])) {
1131
0
        diff_index++;
1132
0
    }
1133
1134
0
    if (diff_index >= min_length) {
1135
        // Handle the case where either string is a prefix of the other
1136
        // string, or both strings are identical.
1137
0
        start.CopyToString(separator);
1138
0
        return;
1139
0
    }
1140
1141
0
    if (diff_index + 1 == start.size()) {
1142
        // If the first difference is in the last character, do not bother
1143
        // incrementing that character since the separator will be no
1144
        // shorter than "start".
1145
0
        start.CopyToString(separator);
1146
0
        return;
1147
0
    }
1148
1149
0
    if (start[diff_index] == 0xff) {
1150
        // Avoid overflow when incrementing start[diff_index]
1151
0
        start.CopyToString(separator);
1152
0
        return;
1153
0
    }
1154
1155
0
    separator->assign(start.data(), diff_index);
1156
0
    separator->push_back(start[diff_index] + 1);
1157
0
    if (*separator >= limit) {
1158
        // Never pick a separator that causes confusion with "limit"
1159
0
        start.CopyToString(separator);
1160
0
    }
1161
0
}
1162
1163
0
int SafeSnprintf(char* str, size_t size, const char* format, ...) {
1164
0
    va_list printargs;
1165
0
    va_start(printargs, format);
1166
0
    int ncw = vsnprintf(str, size, format, printargs);
1167
0
    va_end(printargs);
1168
0
    return (ncw < size && ncw >= 0) ? ncw : 0;
1169
0
}
1170
1171
0
bool GetlineFromStdioFile(FILE* file, string* str, char delim) {
1172
0
    str->erase();
1173
0
    while (true) {
1174
0
        if (feof(file) || ferror(file)) {
1175
0
            return false;
1176
0
        }
1177
0
        int c = getc(file);
1178
0
        if (c == EOF) return false;
1179
0
        if (c == delim) return true;
1180
0
        str->push_back(c);
1181
0
    }
1182
0
}
1183
1184
namespace {
1185
1186
template <typename CHAR>
1187
0
size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
1188
0
    for (size_t i = 0; i < dst_size; ++i) {
1189
0
        if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
1190
0
            return i;
1191
0
    }
1192
1193
    // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
1194
0
    if (dst_size != 0) dst[dst_size - 1] = 0;
1195
1196
    // Count the rest of the |src|, and return it's length in characters.
1197
0
    while (src[dst_size]) ++dst_size;
1198
0
    return dst_size;
1199
0
}
1200
1201
} // namespace
1202
1203
0
size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) {
1204
0
    return lcpyT<char>(dst, src, dst_size);
1205
0
}