/root/doris/be/src/gutil/strings/util.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // |
2 | | // Copyright (C) 1999-2005 Google, Inc. |
3 | | // |
4 | | |
5 | | // TODO(user): visit each const_cast. Some of them are no longer necessary |
6 | | // because last Single Unix Spec and grte v2 are more const-y. |
7 | | |
8 | | #include "gutil/strings/util.h" |
9 | | |
10 | | // IWYU pragma: no_include <pstl/glue_algorithm_defs.h> |
11 | | |
12 | | #include <assert.h> |
13 | | #include <stdarg.h> |
14 | | #include <stdio.h> |
15 | | #include <string.h> |
16 | | #include <time.h> // for FastTimeToBuffer() |
17 | | #include <algorithm> |
18 | | #include <iterator> |
19 | | #include <mutex> |
20 | | #include <ostream> |
21 | | |
22 | | #include "common/exception.h" |
23 | | |
24 | | using std::copy; |
25 | | using std::max; |
26 | | using std::min; |
27 | | using std::reverse; |
28 | | using std::sort; |
29 | | using std::swap; |
30 | | #include <string> |
31 | | |
32 | | using std::string; |
33 | | #include <vector> |
34 | | |
35 | | using std::vector; |
36 | | |
37 | | #include "common/logging.h" |
38 | | |
39 | | #include "gutil/stl_util.h" // for string_as_array, STLAppendToString |
40 | | #include "gutil/strings/ascii_ctype.h" |
41 | | #include "gutil/strings/numbers.h" |
42 | | #include "gutil/strings/stringpiece.h" |
43 | | #include "gutil/utf/utf.h" |
44 | | #include "gutil/stringprintf.h" |
45 | | |
46 | | #ifdef OS_WINDOWS |
47 | | #ifdef min // windows.h defines this to something silly |
48 | | #undef min |
49 | | #endif |
50 | | #endif |
51 | | |
52 | | // Use this instead of gmtime_r if you want to build for Windows. |
53 | | // Windows doesn't have a 'gmtime_r', but it has the similar 'gmtime_s'. |
54 | | // TODO(user): Probably belongs in //base:time_support.{cc|h}. |
55 | 0 | static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) { |
56 | | #ifdef OS_WINDOWS |
57 | | return gmtime_s(result, timep) == 0 ? result : NULL; |
58 | | #else |
59 | 0 | return gmtime_r(timep, result); |
60 | 0 | #endif // OS_WINDOWS |
61 | 0 | } |
62 | | |
63 | 0 | char* strnstr(const char* haystack, const char* needle, size_t haystack_len) { |
64 | 0 | if (*needle == '\0') { |
65 | 0 | return const_cast<char*>(haystack); |
66 | 0 | } |
67 | 0 | size_t needle_len = strlen(needle); |
68 | 0 | char* where; |
69 | 0 | while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) { |
70 | 0 | if (where - haystack + needle_len > haystack_len) { |
71 | 0 | return nullptr; |
72 | 0 | } |
73 | 0 | if (strncmp(where, needle, needle_len) == 0) { |
74 | 0 | return where; |
75 | 0 | } |
76 | 0 | haystack_len -= where + 1 - haystack; |
77 | 0 | haystack = where + 1; |
78 | 0 | } |
79 | 0 | return nullptr; |
80 | 0 | } |
81 | | |
82 | | const char* strnprefix(const char* haystack, int haystack_size, const char* needle, |
83 | 0 | int needle_size) { |
84 | 0 | if (needle_size > haystack_size) { |
85 | 0 | return nullptr; |
86 | 0 | } else { |
87 | 0 | if (strncmp(haystack, needle, needle_size) == 0) { |
88 | 0 | return haystack + needle_size; |
89 | 0 | } else { |
90 | 0 | return nullptr; |
91 | 0 | } |
92 | 0 | } |
93 | 0 | } |
94 | | |
95 | | const char* strncaseprefix(const char* haystack, int haystack_size, const char* needle, |
96 | 0 | int needle_size) { |
97 | 0 | if (needle_size > haystack_size) { |
98 | 0 | return nullptr; |
99 | 0 | } else { |
100 | 0 | if (strncasecmp(haystack, needle, needle_size) == 0) { |
101 | 0 | return haystack + needle_size; |
102 | 0 | } else { |
103 | 0 | return nullptr; |
104 | 0 | } |
105 | 0 | } |
106 | 0 | } |
107 | | |
108 | 0 | char* strcasesuffix(char* str, const char* suffix) { |
109 | 0 | const int lenstr = strlen(str); |
110 | 0 | const int lensuffix = strlen(suffix); |
111 | 0 | char* strbeginningoftheend = str + lenstr - lensuffix; |
112 | |
|
113 | 0 | if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) { |
114 | 0 | return (strbeginningoftheend); |
115 | 0 | } else { |
116 | 0 | return (nullptr); |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | const char* strnsuffix(const char* haystack, int haystack_size, const char* needle, |
121 | 0 | int needle_size) { |
122 | 0 | if (needle_size > haystack_size) { |
123 | 0 | return nullptr; |
124 | 0 | } else { |
125 | 0 | const char* start = haystack + haystack_size - needle_size; |
126 | 0 | if (strncmp(start, needle, needle_size) == 0) { |
127 | 0 | return start; |
128 | 0 | } else { |
129 | 0 | return nullptr; |
130 | 0 | } |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | | const char* strncasesuffix(const char* haystack, int haystack_size, const char* needle, |
135 | 0 | int needle_size) { |
136 | 0 | if (needle_size > haystack_size) { |
137 | 0 | return nullptr; |
138 | 0 | } else { |
139 | 0 | const char* start = haystack + haystack_size - needle_size; |
140 | 0 | if (strncasecmp(start, needle, needle_size) == 0) { |
141 | 0 | return start; |
142 | 0 | } else { |
143 | 0 | return nullptr; |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } |
147 | | |
148 | 0 | char* strchrnth(const char* str, const char& c, int n) { |
149 | 0 | if (str == nullptr) return nullptr; |
150 | 0 | if (n <= 0) return const_cast<char*>(str); |
151 | 0 | const char* sp; |
152 | 0 | int k = 0; |
153 | 0 | for (sp = str; *sp != '\0'; sp++) { |
154 | 0 | if (*sp == c) { |
155 | 0 | ++k; |
156 | 0 | if (k >= n) break; |
157 | 0 | } |
158 | 0 | } |
159 | 0 | return (k < n) ? nullptr : const_cast<char*>(sp); |
160 | 0 | } |
161 | | |
162 | 0 | char* AdjustedLastPos(const char* str, char separator, int n) { |
163 | 0 | if (str == nullptr) return nullptr; |
164 | 0 | const char* pos = nullptr; |
165 | 0 | if (n > 0) pos = strchrnth(str, separator, n); |
166 | | |
167 | | // if n <= 0 or separator appears fewer than n times, get the last occurrence |
168 | 0 | if (pos == nullptr) pos = strrchr(str, separator); |
169 | 0 | return const_cast<char*>(pos); |
170 | 0 | } |
171 | | |
172 | | // ---------------------------------------------------------------------- |
173 | | // Misc. routines |
174 | | // ---------------------------------------------------------------------- |
175 | | |
176 | 0 | bool IsAscii(const char* str, int len) { |
177 | 0 | const char* end = str + len; |
178 | 0 | while (str < end) { |
179 | 0 | if (!ascii_isascii(*str++)) { |
180 | 0 | return false; |
181 | 0 | } |
182 | 0 | } |
183 | 0 | return true; |
184 | 0 | } |
185 | | |
186 | | // ---------------------------------------------------------------------- |
187 | | // StringReplace() |
188 | | // Give me a string and two patterns "old" and "new", and I replace |
189 | | // the first instance of "old" in the string with "new", if it |
190 | | // exists. If "replace_all" is true then call this repeatedly until it |
191 | | // fails. RETURN a new string, regardless of whether the replacement |
192 | | // happened or not. |
193 | | // ---------------------------------------------------------------------- |
194 | | |
195 | | string StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, |
196 | 0 | bool replace_all) { |
197 | 0 | string ret; |
198 | 0 | StringReplace(s, oldsub, newsub, replace_all, &ret); |
199 | 0 | return ret; |
200 | 0 | } |
201 | | |
202 | | // ---------------------------------------------------------------------- |
203 | | // StringReplace() |
204 | | // Replace the "old" pattern with the "new" pattern in a string, |
205 | | // and append the result to "res". If replace_all is false, |
206 | | // it only replaces the first instance of "old." |
207 | | // ---------------------------------------------------------------------- |
208 | | |
209 | | void StringReplace(const StringPiece& s, const StringPiece& oldsub, const StringPiece& newsub, |
210 | 0 | bool replace_all, string* res) { |
211 | 0 | if (oldsub.empty()) { |
212 | 0 | res->append(s.data(), s.length()); // If empty, append the given string. |
213 | 0 | return; |
214 | 0 | } |
215 | | |
216 | 0 | StringPiece::size_type start_pos = 0; |
217 | 0 | StringPiece::size_type pos; |
218 | 0 | do { |
219 | 0 | pos = s.find(oldsub, start_pos); |
220 | 0 | if (pos == StringPiece::npos) { |
221 | 0 | break; |
222 | 0 | } |
223 | 0 | res->append(s.data() + start_pos, pos - start_pos); |
224 | 0 | res->append(newsub.data(), newsub.length()); |
225 | | // Start searching again after the "old". |
226 | 0 | start_pos = pos + oldsub.length(); |
227 | 0 | } while (replace_all); |
228 | 0 | res->append(s.data() + start_pos, s.length() - start_pos); |
229 | 0 | } |
230 | | |
231 | | // ---------------------------------------------------------------------- |
232 | | // GlobalReplaceSubstring() |
233 | | // Replaces all instances of a substring in a string. Does nothing |
234 | | // if 'substring' is empty. Returns the number of replacements. |
235 | | // |
236 | | // NOTE: The string pieces must not overlap s. |
237 | | // ---------------------------------------------------------------------- |
238 | | |
239 | | int GlobalReplaceSubstring(const StringPiece& substring, const StringPiece& replacement, |
240 | 0 | string* s) { |
241 | 0 | CHECK(s != nullptr); |
242 | 0 | if (s->empty() || substring.empty()) return 0; |
243 | 0 | string tmp; |
244 | 0 | int num_replacements = 0; |
245 | 0 | size_t pos = 0; |
246 | 0 | for (size_t match_pos = s->find(substring.data(), pos, substring.length()); |
247 | 0 | match_pos != string::npos; pos = match_pos + substring.length(), |
248 | 0 | match_pos = s->find(substring.data(), pos, substring.length())) { |
249 | 0 | ++num_replacements; |
250 | | // Append the original content before the match. |
251 | 0 | tmp.append(*s, pos, match_pos - pos); |
252 | | // Append the replacement for the match. |
253 | 0 | tmp.append(replacement.begin(), replacement.end()); |
254 | 0 | } |
255 | | // Append the content after the last match. If no replacements were made, the |
256 | | // original string is left untouched. |
257 | 0 | if (num_replacements > 0) { |
258 | 0 | tmp.append(*s, pos, s->length() - pos); |
259 | 0 | s->swap(tmp); |
260 | 0 | } |
261 | 0 | return num_replacements; |
262 | 0 | } |
263 | | |
264 | | //--------------------------------------------------------------------------- |
265 | | // RemoveStrings() |
266 | | // Remove the strings from v given by the (sorted least -> greatest) |
267 | | // numbers in indices. |
268 | | // Order of v is *not* preserved. |
269 | | //--------------------------------------------------------------------------- |
270 | 0 | void RemoveStrings(vector<string>* v, const vector<int>& indices) { |
271 | 0 | assert(v); |
272 | 0 | assert(indices.size() <= v->size()); |
273 | | // go from largest index to smallest so that smaller indices aren't |
274 | | // invalidated |
275 | 0 | for (int lcv = indices.size() - 1; lcv >= 0; --lcv) { |
276 | 0 | #ifndef NDEBUG |
277 | | // verify that indices is sorted least->greatest |
278 | 0 | if (indices.size() >= 2 && lcv > 0) |
279 | | // use LT and not LE because we should never see repeat indices |
280 | 0 | CHECK_LT(indices[lcv - 1], indices[lcv]); |
281 | 0 | #endif |
282 | 0 | assert(indices[lcv] >= 0); |
283 | 0 | assert(indices[lcv] < v->size()); |
284 | 0 | swap((*v)[indices[lcv]], v->back()); |
285 | 0 | v->pop_back(); |
286 | 0 | } |
287 | 0 | } |
288 | | |
289 | | // ---------------------------------------------------------------------- |
290 | | // gstrcasestr is a case-insensitive strstr. Eventually we should just |
291 | | // use the GNU libc version of strcasestr, but it isn't compiled into |
292 | | // RedHat Linux by default in version 6.1. |
293 | | // |
294 | | // This function uses ascii_tolower() instead of tolower(), for speed. |
295 | | // ---------------------------------------------------------------------- |
296 | | |
297 | 0 | char* gstrcasestr(const char* haystack, const char* needle) { |
298 | 0 | char c, sc; |
299 | 0 | size_t len; |
300 | |
|
301 | 0 | if ((c = *needle++) != 0) { |
302 | 0 | c = ascii_tolower(c); |
303 | 0 | len = strlen(needle); |
304 | 0 | do { |
305 | 0 | do { |
306 | 0 | if ((sc = *haystack++) == 0) return nullptr; |
307 | 0 | } while (ascii_tolower(sc) != c); |
308 | 0 | } while (strncasecmp(haystack, needle, len) != 0); |
309 | 0 | haystack--; |
310 | 0 | } |
311 | | // This is a const violation but strstr() also returns a char*. |
312 | 0 | return const_cast<char*>(haystack); |
313 | 0 | } |
314 | | |
315 | | // ---------------------------------------------------------------------- |
316 | | // gstrncasestr is a case-insensitive strnstr. |
317 | | // Finds the occurence of the (null-terminated) needle in the |
318 | | // haystack, where no more than len bytes of haystack is searched. |
319 | | // Characters that appear after a '\0' in the haystack are not searched. |
320 | | // |
321 | | // This function uses ascii_tolower() instead of tolower(), for speed. |
322 | | // ---------------------------------------------------------------------- |
323 | 0 | const char* gstrncasestr(const char* haystack, const char* needle, size_t len) { |
324 | 0 | char c, sc; |
325 | |
|
326 | 0 | if ((c = *needle++) != 0) { |
327 | 0 | c = ascii_tolower(c); |
328 | 0 | size_t needle_len = strlen(needle); |
329 | 0 | do { |
330 | 0 | do { |
331 | 0 | if (len-- <= needle_len || 0 == (sc = *haystack++)) return nullptr; |
332 | 0 | } while (ascii_tolower(sc) != c); |
333 | 0 | } while (strncasecmp(haystack, needle, needle_len) != 0); |
334 | 0 | haystack--; |
335 | 0 | } |
336 | 0 | return haystack; |
337 | 0 | } |
338 | | |
339 | | // ---------------------------------------------------------------------- |
340 | | // gstrncasestr is a case-insensitive strnstr. |
341 | | // Finds the occurence of the (null-terminated) needle in the |
342 | | // haystack, where no more than len bytes of haystack is searched. |
343 | | // Characters that appear after a '\0' in the haystack are not searched. |
344 | | // |
345 | | // This function uses ascii_tolower() instead of tolower(), for speed. |
346 | | // ---------------------------------------------------------------------- |
347 | 0 | char* gstrncasestr(char* haystack, const char* needle, size_t len) { |
348 | 0 | return const_cast<char*>(gstrncasestr(static_cast<const char*>(haystack), needle, len)); |
349 | 0 | } |
350 | | // ---------------------------------------------------------------------- |
351 | | // gstrncasestr_split performs a case insensitive search |
352 | | // on (prefix, non_alpha, suffix). |
353 | | // ---------------------------------------------------------------------- |
354 | | char* gstrncasestr_split(const char* str, const char* prefix, char non_alpha, const char* suffix, |
355 | 0 | size_t n) { |
356 | 0 | int prelen = prefix == nullptr ? 0 : strlen(prefix); |
357 | 0 | int suflen = suffix == nullptr ? 0 : strlen(suffix); |
358 | | |
359 | | // adjust the string and its length to avoid unnessary searching. |
360 | | // an added benefit is to avoid unnecessary range checks in the if |
361 | | // statement in the inner loop. |
362 | 0 | if (suflen + prelen >= n) return nullptr; |
363 | 0 | str += prelen; |
364 | 0 | n -= prelen; |
365 | 0 | n -= suflen; |
366 | |
|
367 | 0 | const char* where = nullptr; |
368 | | |
369 | | // for every occurance of non_alpha in the string ... |
370 | 0 | while ((where = static_cast<const char*>(memchr(str, non_alpha, n))) != nullptr) { |
371 | | // ... test whether it is followed by suffix and preceded by prefix |
372 | 0 | if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) && |
373 | 0 | (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) { |
374 | 0 | return const_cast<char*>(where - prelen); |
375 | 0 | } |
376 | | // if not, advance the pointer, and adjust the length according |
377 | 0 | n -= (where + 1) - str; |
378 | 0 | str = where + 1; |
379 | 0 | } |
380 | | |
381 | 0 | return nullptr; |
382 | 0 | } |
383 | | |
384 | | // ---------------------------------------------------------------------- |
385 | | // strcasestr_alnum is like a case-insensitive strstr, except that it |
386 | | // ignores non-alphanumeric characters in both strings for the sake of |
387 | | // comparison. |
388 | | // |
389 | | // This function uses ascii_isalnum() instead of isalnum() and |
390 | | // ascii_tolower() instead of tolower(), for speed. |
391 | | // |
392 | | // E.g. strcasestr_alnum("i use google all the time", " !!Google!! ") |
393 | | // returns pointer to "google all the time" |
394 | | // ---------------------------------------------------------------------- |
395 | 0 | char* strcasestr_alnum(const char* haystack, const char* needle) { |
396 | 0 | const char* haystack_ptr; |
397 | 0 | const char* needle_ptr; |
398 | | |
399 | | // Skip non-alnums at beginning |
400 | 0 | while (!ascii_isalnum(*needle)) |
401 | 0 | if (*needle++ == '\0') return const_cast<char*>(haystack); |
402 | 0 | needle_ptr = needle; |
403 | | |
404 | | // Skip non-alnums at beginning |
405 | 0 | while (!ascii_isalnum(*haystack)) |
406 | 0 | if (*haystack++ == '\0') return nullptr; |
407 | 0 | haystack_ptr = haystack; |
408 | |
|
409 | 0 | while (*needle_ptr != '\0') { |
410 | | // Non-alnums - advance |
411 | 0 | while (!ascii_isalnum(*needle_ptr)) |
412 | 0 | if (*needle_ptr++ == '\0') return const_cast<char*>(haystack); |
413 | | |
414 | 0 | while (!ascii_isalnum(*haystack_ptr)) |
415 | 0 | if (*haystack_ptr++ == '\0') return nullptr; |
416 | | |
417 | 0 | if (ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr)) { |
418 | | // Case-insensitive match - advance |
419 | 0 | needle_ptr++; |
420 | 0 | haystack_ptr++; |
421 | 0 | } else { |
422 | | // No match - rollback to next start point in haystack |
423 | 0 | haystack++; |
424 | 0 | while (!ascii_isalnum(*haystack)) |
425 | 0 | if (*haystack++ == '\0') return nullptr; |
426 | 0 | haystack_ptr = haystack; |
427 | 0 | needle_ptr = needle; |
428 | 0 | } |
429 | 0 | } |
430 | 0 | return const_cast<char*>(haystack); |
431 | 0 | } |
432 | | |
433 | | // ---------------------------------------------------------------------- |
434 | | // CountSubstring() |
435 | | // Return the number times a "substring" appears in the "text" |
436 | | // NOTE: this function's complexity is O(|text| * |substring|) |
437 | | // It is meant for short "text" (such as to ensure the |
438 | | // printf format string has the right number of arguments). |
439 | | // DO NOT pass in long "text". |
440 | | // ---------------------------------------------------------------------- |
441 | 0 | int CountSubstring(StringPiece text, StringPiece substring) { |
442 | 0 | CHECK_GT(substring.length(), 0); |
443 | |
|
444 | 0 | int count = 0; |
445 | 0 | StringPiece::size_type curr = 0; |
446 | 0 | while (StringPiece::npos != (curr = text.find(substring, curr))) { |
447 | 0 | ++count; |
448 | 0 | ++curr; |
449 | 0 | } |
450 | 0 | return count; |
451 | 0 | } |
452 | | |
453 | | // ---------------------------------------------------------------------- |
454 | | // strstr_delimited() |
455 | | // Just like strstr(), except it ensures that the needle appears as |
456 | | // a complete item (or consecutive series of items) in a delimited |
457 | | // list. |
458 | | // |
459 | | // Like strstr(), returns haystack if needle is empty, or NULL if |
460 | | // either needle/haystack is NULL. |
461 | | // ---------------------------------------------------------------------- |
462 | 0 | const char* strstr_delimited(const char* haystack, const char* needle, char delim) { |
463 | 0 | if (!needle || !haystack) return nullptr; |
464 | 0 | if (*needle == '\0') return haystack; |
465 | | |
466 | 0 | int needle_len = strlen(needle); |
467 | |
|
468 | 0 | while (true) { |
469 | | // Skip any leading delimiters. |
470 | 0 | while (*haystack == delim) ++haystack; |
471 | | |
472 | | // Walk down the haystack, matching every character in the needle. |
473 | 0 | const char* this_match = haystack; |
474 | 0 | int i = 0; |
475 | 0 | for (; i < needle_len; i++) { |
476 | 0 | if (*haystack != needle[i]) { |
477 | | // We ran out of haystack or found a non-matching character. |
478 | 0 | break; |
479 | 0 | } |
480 | 0 | ++haystack; |
481 | 0 | } |
482 | | |
483 | | // If we matched the whole needle, ensure that it's properly delimited. |
484 | 0 | if (i == needle_len && (*haystack == '\0' || *haystack == delim)) { |
485 | 0 | return this_match; |
486 | 0 | } |
487 | | |
488 | | // No match. Consume non-delimiter characters until we run out of them. |
489 | 0 | while (*haystack != delim) { |
490 | 0 | if (*haystack == '\0') return nullptr; |
491 | 0 | ++haystack; |
492 | 0 | } |
493 | 0 | } |
494 | 0 | throw doris::Exception(doris::Status::FatalError("Unreachable statement")); |
495 | 0 | } |
496 | | |
497 | | // ---------------------------------------------------------------------- |
498 | | // Older versions of libc have a buggy strsep. |
499 | | // ---------------------------------------------------------------------- |
500 | | |
501 | 0 | char* gstrsep(char** stringp, const char* delim) { |
502 | 0 | char* s; |
503 | 0 | const char* spanp; |
504 | 0 | int c, sc; |
505 | 0 | char* tok; |
506 | |
|
507 | 0 | if ((s = *stringp) == nullptr) return nullptr; |
508 | | |
509 | 0 | tok = s; |
510 | 0 | while (true) { |
511 | 0 | c = *s++; |
512 | 0 | spanp = delim; |
513 | 0 | do { |
514 | 0 | if ((sc = *spanp++) == c) { |
515 | 0 | if (c == 0) |
516 | 0 | s = nullptr; |
517 | 0 | else |
518 | 0 | s[-1] = 0; |
519 | 0 | *stringp = s; |
520 | 0 | return tok; |
521 | 0 | } |
522 | 0 | } while (sc != 0); |
523 | 0 | } |
524 | | |
525 | 0 | return nullptr; /* should not happen */ |
526 | 0 | } |
527 | | |
528 | 0 | void FastStringAppend(string* s, const char* data, int len) { |
529 | 0 | STLAppendToString(s, data, len); |
530 | 0 | } |
531 | | |
532 | | // TODO(user): add a microbenchmark and revisit |
533 | | // the optimizations done here. |
534 | | // |
535 | | // Several converters use this table to reduce |
536 | | // division and modulo operations. |
537 | | extern const char two_ASCII_digits[100][2]; |
538 | | |
539 | | const char two_ASCII_digits[100][2] = { |
540 | | {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, |
541 | | {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, |
542 | | {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, {'2', '0'}, |
543 | | {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, |
544 | | {'2', '8'}, {'2', '9'}, {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, |
545 | | {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, |
546 | | {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, |
547 | | {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, |
548 | | {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, {'6', '0'}, {'6', '1'}, {'6', '2'}, |
549 | | {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, |
550 | | {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, |
551 | | {'7', '7'}, {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, |
552 | | {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, {'9', '0'}, |
553 | | {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, |
554 | | {'9', '8'}, {'9', '9'}}; |
555 | | |
556 | 0 | static inline void PutTwoDigits(int i, char* p) { |
557 | 0 | DCHECK_GE(i, 0); |
558 | 0 | DCHECK_LT(i, 100); |
559 | 0 | p[0] = two_ASCII_digits[i][0]; |
560 | 0 | p[1] = two_ASCII_digits[i][1]; |
561 | 0 | } |
562 | | |
563 | 0 | char* FastTimeToBuffer(time_t s, char* buffer) { |
564 | 0 | if (s == 0) { |
565 | 0 | time(&s); |
566 | 0 | } |
567 | |
|
568 | 0 | struct tm tm; |
569 | 0 | if (PortableSafeGmtime(&s, &tm) == nullptr) { |
570 | | // Error message must fit in 30-char buffer. |
571 | 0 | memcpy(buffer, "Invalid:", sizeof("Invalid:")); |
572 | 0 | FastInt64ToBufferLeft(s, buffer + strlen(buffer)); |
573 | 0 | return buffer; |
574 | 0 | } |
575 | | |
576 | | // strftime format: "%a, %d %b %Y %H:%M:%S GMT", |
577 | | // but strftime does locale stuff which we do not want |
578 | | // plus strftime takes > 10x the time of hard code |
579 | | |
580 | 0 | const char* weekday_name = "Xxx"; |
581 | 0 | switch (tm.tm_wday) { |
582 | 0 | default: { |
583 | 0 | DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday; |
584 | 0 | } break; |
585 | 0 | case 0: |
586 | 0 | weekday_name = "Sun"; |
587 | 0 | break; |
588 | 0 | case 1: |
589 | 0 | weekday_name = "Mon"; |
590 | 0 | break; |
591 | 0 | case 2: |
592 | 0 | weekday_name = "Tue"; |
593 | 0 | break; |
594 | 0 | case 3: |
595 | 0 | weekday_name = "Wed"; |
596 | 0 | break; |
597 | 0 | case 4: |
598 | 0 | weekday_name = "Thu"; |
599 | 0 | break; |
600 | 0 | case 5: |
601 | 0 | weekday_name = "Fri"; |
602 | 0 | break; |
603 | 0 | case 6: |
604 | 0 | weekday_name = "Sat"; |
605 | 0 | break; |
606 | 0 | } |
607 | | |
608 | 0 | const char* month_name = "Xxx"; |
609 | 0 | switch (tm.tm_mon) { |
610 | 0 | default: { |
611 | 0 | DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon; |
612 | 0 | } break; |
613 | 0 | case 0: |
614 | 0 | month_name = "Jan"; |
615 | 0 | break; |
616 | 0 | case 1: |
617 | 0 | month_name = "Feb"; |
618 | 0 | break; |
619 | 0 | case 2: |
620 | 0 | month_name = "Mar"; |
621 | 0 | break; |
622 | 0 | case 3: |
623 | 0 | month_name = "Apr"; |
624 | 0 | break; |
625 | 0 | case 4: |
626 | 0 | month_name = "May"; |
627 | 0 | break; |
628 | 0 | case 5: |
629 | 0 | month_name = "Jun"; |
630 | 0 | break; |
631 | 0 | case 6: |
632 | 0 | month_name = "Jul"; |
633 | 0 | break; |
634 | 0 | case 7: |
635 | 0 | month_name = "Aug"; |
636 | 0 | break; |
637 | 0 | case 8: |
638 | 0 | month_name = "Sep"; |
639 | 0 | break; |
640 | 0 | case 9: |
641 | 0 | month_name = "Oct"; |
642 | 0 | break; |
643 | 0 | case 10: |
644 | 0 | month_name = "Nov"; |
645 | 0 | break; |
646 | 0 | case 11: |
647 | 0 | month_name = "Dec"; |
648 | 0 | break; |
649 | 0 | } |
650 | | |
651 | | // Write out the buffer. |
652 | | |
653 | 0 | memcpy(buffer + 0, weekday_name, 3); |
654 | 0 | buffer[3] = ','; |
655 | 0 | buffer[4] = ' '; |
656 | |
|
657 | 0 | PutTwoDigits(tm.tm_mday, buffer + 5); |
658 | 0 | buffer[7] = ' '; |
659 | |
|
660 | 0 | memcpy(buffer + 8, month_name, 3); |
661 | 0 | buffer[11] = ' '; |
662 | |
|
663 | 0 | int32 year = tm.tm_year + 1900; |
664 | 0 | PutTwoDigits(year / 100, buffer + 12); |
665 | 0 | PutTwoDigits(year % 100, buffer + 14); |
666 | 0 | buffer[16] = ' '; |
667 | |
|
668 | 0 | PutTwoDigits(tm.tm_hour, buffer + 17); |
669 | 0 | buffer[19] = ':'; |
670 | |
|
671 | 0 | PutTwoDigits(tm.tm_min, buffer + 20); |
672 | 0 | buffer[22] = ':'; |
673 | |
|
674 | 0 | PutTwoDigits(tm.tm_sec, buffer + 23); |
675 | | |
676 | | // includes ending NUL |
677 | 0 | memcpy(buffer + 25, " GMT", 5); |
678 | |
|
679 | 0 | return buffer; |
680 | 0 | } |
681 | | |
682 | | // ---------------------------------------------------------------------- |
683 | | // strdup_with_new() |
684 | | // strndup_with_new() |
685 | | // |
686 | | // strdup_with_new() is the same as strdup() except that the memory |
687 | | // is allocated by new[] and hence an exception will be generated |
688 | | // if out of memory. |
689 | | // |
690 | | // strndup_with_new() is the same as strdup_with_new() except that it will |
691 | | // copy up to the specified number of characters. This function |
692 | | // is useful when we want to copy a substring out of a string |
693 | | // and didn't want to (or cannot) modify the string |
694 | | // ---------------------------------------------------------------------- |
695 | 0 | char* strdup_with_new(const char* the_string) { |
696 | 0 | if (the_string == nullptr) |
697 | 0 | return nullptr; |
698 | 0 | else |
699 | 0 | return strndup_with_new(the_string, strlen(the_string)); |
700 | 0 | } |
701 | | |
702 | 0 | char* strndup_with_new(const char* the_string, int max_length) { |
703 | 0 | if (the_string == nullptr) return nullptr; |
704 | 0 | size_t str_len = strlen(the_string); |
705 | 0 | if (str_len > max_length) { |
706 | 0 | str_len = max_length; |
707 | 0 | } |
708 | 0 | auto result = new char[str_len + 1]; |
709 | 0 | result[max_length] = '\0'; // truncated string might not have \0 at end |
710 | 0 | memcpy(result, the_string, str_len); |
711 | 0 | return result; |
712 | 0 | } |
713 | | |
714 | | // ---------------------------------------------------------------------- |
715 | | // ScanForFirstWord() |
716 | | // This function finds the first word in the string "the_string" given. |
717 | | // A word is defined by consecutive !ascii_isspace() characters. |
718 | | // If no valid words are found, |
719 | | // return NULL and *end_ptr will contain junk |
720 | | // else |
721 | | // return the beginning of the first word and |
722 | | // *end_ptr will store the address of the first invalid character |
723 | | // (ascii_isspace() or '\0'). |
724 | | // |
725 | | // Precondition: (end_ptr != NULL) |
726 | | // ---------------------------------------------------------------------- |
727 | 0 | const char* ScanForFirstWord(const char* the_string, const char** end_ptr) { |
728 | 0 | CHECK(end_ptr != nullptr) << ": precondition violated"; |
729 | |
|
730 | 0 | if (the_string == nullptr) // empty string |
731 | 0 | return nullptr; |
732 | | |
733 | 0 | const char* curr = the_string; |
734 | 0 | while ((*curr != '\0') && ascii_isspace(*curr)) // skip initial spaces |
735 | 0 | ++curr; |
736 | |
|
737 | 0 | if (*curr == '\0') // no valid word found |
738 | 0 | return nullptr; |
739 | | |
740 | | // else has a valid word |
741 | 0 | const char* first_word = curr; |
742 | | |
743 | | // now locate the end of the word |
744 | 0 | while ((*curr != '\0') && !ascii_isspace(*curr)) ++curr; |
745 | |
|
746 | 0 | *end_ptr = curr; |
747 | 0 | return first_word; |
748 | 0 | } |
749 | | |
750 | | // ---------------------------------------------------------------------- |
751 | | // AdvanceIdentifier() |
752 | | // This function returns a pointer past the end of the longest C-style |
753 | | // identifier that is a prefix of str or NULL if str does not start with |
754 | | // one. A C-style identifier begins with an ASCII letter or underscore |
755 | | // and continues with ASCII letters, digits, or underscores. |
756 | | // ---------------------------------------------------------------------- |
757 | 0 | const char* AdvanceIdentifier(const char* str) { |
758 | | // Not using isalpha and isalnum so as not to rely on the locale. |
759 | | // We could have used ascii_isalpha and ascii_isalnum. |
760 | 0 | char ch = *str++; |
761 | 0 | if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_')) return nullptr; |
762 | 0 | while (true) { |
763 | 0 | ch = *str; |
764 | 0 | if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || |
765 | 0 | ch == '_')) |
766 | 0 | return str; |
767 | 0 | str++; |
768 | 0 | } |
769 | 0 | } |
770 | | |
771 | | // ---------------------------------------------------------------------- |
772 | | // IsIdentifier() |
773 | | // This function returns true if str is a C-style identifier. |
774 | | // A C-style identifier begins with an ASCII letter or underscore |
775 | | // and continues with ASCII letters, digits, or underscores. |
776 | | // ---------------------------------------------------------------------- |
777 | 0 | bool IsIdentifier(const char* str) { |
778 | 0 | const char* end = AdvanceIdentifier(str); |
779 | 0 | return end && *end == '\0'; |
780 | 0 | } |
781 | | |
782 | 0 | static bool IsWildcard(Rune character) { |
783 | 0 | return character == '*' || character == '?'; |
784 | 0 | } |
785 | | |
786 | | // Move the strings pointers to the point where they start to differ. |
787 | | template <typename CHAR, typename NEXT> |
788 | | void EatSameChars(const CHAR** pattern, const CHAR* pattern_end, const CHAR** string, |
789 | 0 | const CHAR* string_end, NEXT next) { |
790 | 0 | const CHAR* escape = nullptr; |
791 | 0 | while (*pattern != pattern_end && *string != string_end) { |
792 | 0 | if (!escape && IsWildcard(**pattern)) { |
793 | | // We don't want to match wildcard here, except if it's escaped. |
794 | 0 | return; |
795 | 0 | } |
796 | | |
797 | | // Check if the escapement char is found. If so, skip it and move to the |
798 | | // next character. |
799 | 0 | if (!escape && **pattern == '\\') { |
800 | 0 | escape = *pattern; |
801 | 0 | next(pattern, pattern_end); |
802 | 0 | continue; |
803 | 0 | } |
804 | | |
805 | | // Check if the chars match, if so, increment the ptrs. |
806 | 0 | const CHAR* pattern_next = *pattern; |
807 | 0 | const CHAR* string_next = *string; |
808 | 0 | Rune pattern_char = next(&pattern_next, pattern_end); |
809 | 0 | if (pattern_char == next(&string_next, string_end) && pattern_char != Runeerror && |
810 | 0 | pattern_char <= Runemax) { |
811 | 0 | *pattern = pattern_next; |
812 | 0 | *string = string_next; |
813 | 0 | } else { |
814 | | // Uh ho, it did not match, we are done. If the last char was an |
815 | | // escapement, that means that it was an error to advance the ptr here, |
816 | | // let's put it back where it was. This also mean that the MatchPattern |
817 | | // function will return false because if we can't match an escape char |
818 | | // here, then no one will. |
819 | 0 | if (escape) { |
820 | 0 | *pattern = escape; |
821 | 0 | } |
822 | 0 | return; |
823 | 0 | } |
824 | | |
825 | 0 | escape = nullptr; |
826 | 0 | } |
827 | 0 | } |
828 | | |
829 | | template <typename CHAR, typename NEXT> |
830 | 0 | void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) { |
831 | 0 | while (*pattern != end) { |
832 | 0 | if (!IsWildcard(**pattern)) return; |
833 | 0 | next(pattern, end); |
834 | 0 | } |
835 | 0 | } |
836 | | |
837 | | template <typename CHAR, typename NEXT> |
838 | | bool MatchPatternT(const CHAR* eval, const CHAR* eval_end, const CHAR* pattern, |
839 | 0 | const CHAR* pattern_end, int depth, NEXT next) { |
840 | 0 | const int kMaxDepth = 16; |
841 | 0 | if (depth > kMaxDepth) return false; |
842 | | |
843 | | // Eat all the matching chars. |
844 | 0 | EatSameChars(&pattern, pattern_end, &eval, eval_end, next); |
845 | | |
846 | | // If the string is empty, then the pattern must be empty too, or contains |
847 | | // only wildcards. |
848 | 0 | if (eval == eval_end) { |
849 | 0 | EatWildcard(&pattern, pattern_end, next); |
850 | 0 | return pattern == pattern_end; |
851 | 0 | } |
852 | | |
853 | | // Pattern is empty but not string, this is not a match. |
854 | 0 | if (pattern == pattern_end) return false; |
855 | | |
856 | | // If this is a question mark, then we need to compare the rest with |
857 | | // the current string or the string with one character eaten. |
858 | 0 | const CHAR* next_pattern = pattern; |
859 | 0 | next(&next_pattern, pattern_end); |
860 | 0 | if (pattern[0] == '?') { |
861 | 0 | if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) return true; |
862 | 0 | const CHAR* next_eval = eval; |
863 | 0 | next(&next_eval, eval_end); |
864 | 0 | if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end, depth + 1, next)) |
865 | 0 | return true; |
866 | 0 | } |
867 | | |
868 | | // This is a *, try to match all the possible substrings with the remainder |
869 | | // of the pattern. |
870 | 0 | if (pattern[0] == '*') { |
871 | | // Collapse duplicate wild cards (********** into *) so that the |
872 | | // method does not recurse unnecessarily. http://crbug.com/52839 |
873 | 0 | EatWildcard(&next_pattern, pattern_end, next); |
874 | |
|
875 | 0 | while (eval != eval_end) { |
876 | 0 | if (MatchPatternT(eval, eval_end, next_pattern, pattern_end, depth + 1, next)) |
877 | 0 | return true; |
878 | 0 | eval++; |
879 | 0 | } |
880 | | |
881 | | // We reached the end of the string, let see if the pattern contains only |
882 | | // wildcards. |
883 | 0 | if (eval == eval_end) { |
884 | 0 | EatWildcard(&pattern, pattern_end, next); |
885 | 0 | if (pattern != pattern_end) return false; |
886 | 0 | return true; |
887 | 0 | } |
888 | 0 | } |
889 | | |
890 | 0 | return false; |
891 | 0 | } |
892 | | |
893 | | struct NextCharUTF8 { |
894 | 0 | Rune operator()(const char** p, const char* end) { |
895 | 0 | Rune c; |
896 | 0 | int offset = charntorune(&c, *p, static_cast<int>(end - *p)); |
897 | 0 | *p += offset; |
898 | 0 | return c; |
899 | 0 | } |
900 | | }; |
901 | | |
902 | 0 | bool MatchPattern(const StringPiece& eval, const StringPiece& pattern) { |
903 | 0 | return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(), |
904 | 0 | pattern.data() + pattern.size(), 0, NextCharUTF8()); |
905 | 0 | } |
906 | | |
907 | | // ---------------------------------------------------------------------- |
908 | | // FindTagValuePair |
909 | | // Given a string of the form |
910 | | // <something><attr_sep><tag><tag_value_sep><value><attr_sep>...<string_term> |
911 | | // where the part before the first attr_sep is optional, |
912 | | // this function extracts the first tag and value, if any. |
913 | | // The function returns true if successful, in which case "tag" and "value" |
914 | | // are set to point to the beginning of the tag and the value, respectively, |
915 | | // and "tag_len" and "value_len" are set to the respective lengths. |
916 | | // ---------------------------------------------------------------------- |
917 | | |
918 | | bool FindTagValuePair(const char* arg_str, char tag_value_separator, char attribute_separator, |
919 | | char string_terminal, char** tag, int* tag_len, char** value, |
920 | 0 | int* value_len) { |
921 | 0 | char* in_str = const_cast<char*>(arg_str); // For msvc8. |
922 | 0 | if (in_str == nullptr) return false; |
923 | 0 | char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'}; |
924 | 0 | char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'}; |
925 | | |
926 | | // Look for beginning of tag |
927 | 0 | *tag = strpbrk(in_str, attr_sep_or_term); |
928 | | // If string_terminal is '\0', strpbrk won't find it but return null. |
929 | 0 | if (*tag == nullptr || **tag == string_terminal) |
930 | 0 | *tag = in_str; |
931 | 0 | else |
932 | 0 | (*tag)++; // Move past separator |
933 | | // Now look for value... |
934 | 0 | char* tv_sep_pos = strpbrk(*tag, tv_sep_or_term); |
935 | 0 | if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal) return false; |
936 | | // ...and end of value |
937 | 0 | char* attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term); |
938 | |
|
939 | 0 | *tag_len = tv_sep_pos - *tag; |
940 | 0 | *value = tv_sep_pos + 1; |
941 | 0 | if (attr_sep_pos != nullptr) |
942 | 0 | *value_len = attr_sep_pos - *value; |
943 | 0 | else |
944 | 0 | *value_len = strlen(*value); |
945 | 0 | return true; |
946 | 0 | } |
947 | | |
948 | 0 | void UniformInsertString(string* s, int interval, const char* separator) { |
949 | 0 | const size_t separator_len = strlen(separator); |
950 | |
|
951 | 0 | if (interval < 1 || // invalid interval |
952 | 0 | s->empty() || // nothing to do |
953 | 0 | separator_len == 0) // invalid separator |
954 | 0 | return; |
955 | | |
956 | 0 | int num_inserts = (s->size() - 1) / interval; // -1 to avoid appending at end |
957 | 0 | if (num_inserts == 0) // nothing to do |
958 | 0 | return; |
959 | | |
960 | 0 | string tmp; |
961 | 0 | tmp.reserve(s->size() + num_inserts * separator_len + 1); |
962 | |
|
963 | 0 | for (int i = 0; i < num_inserts; ++i) { |
964 | | // append this interval |
965 | 0 | tmp.append(*s, i * interval, interval); |
966 | | // append a separator |
967 | 0 | tmp.append(separator, separator_len); |
968 | 0 | } |
969 | | |
970 | | // append the tail |
971 | 0 | const size_t tail_pos = num_inserts * interval; |
972 | 0 | tmp.append(*s, tail_pos, s->size() - tail_pos); |
973 | |
|
974 | 0 | s->swap(tmp); |
975 | 0 | } |
976 | | |
977 | 0 | void InsertString(string* const s, const vector<uint32>& indices, char const* const separator) { |
978 | 0 | const unsigned num_indices(indices.size()); |
979 | 0 | if (num_indices == 0) { |
980 | 0 | return; // nothing to do... |
981 | 0 | } |
982 | | |
983 | 0 | const unsigned separator_len(strlen(separator)); |
984 | 0 | if (separator_len == 0) { |
985 | 0 | return; // still nothing to do... |
986 | 0 | } |
987 | | |
988 | 0 | string tmp; |
989 | 0 | const unsigned s_len(s->size()); |
990 | 0 | tmp.reserve(s_len + separator_len * num_indices); |
991 | |
|
992 | 0 | vector<uint32>::const_iterator const ind_end(indices.end()); |
993 | 0 | auto ind_pos(indices.begin()); |
994 | |
|
995 | 0 | uint32 last_pos(0); |
996 | 0 | while (ind_pos != ind_end) { |
997 | 0 | const uint32 pos(*ind_pos); |
998 | 0 | DCHECK_GE(pos, last_pos); |
999 | 0 | DCHECK_LE(pos, s_len); |
1000 | |
|
1001 | 0 | tmp.append(s->substr(last_pos, pos - last_pos)); |
1002 | 0 | tmp.append(separator); |
1003 | |
|
1004 | 0 | last_pos = pos; |
1005 | 0 | ++ind_pos; |
1006 | 0 | } |
1007 | 0 | tmp.append(s->substr(last_pos)); |
1008 | |
|
1009 | 0 | s->swap(tmp); |
1010 | 0 | } |
1011 | | |
1012 | | //------------------------------------------------------------------------ |
1013 | | // FindNth() |
1014 | | // return index of nth occurrence of c in the string, |
1015 | | // or string::npos if n > number of occurrences of c. |
1016 | | // (returns string::npos = -1 if n <= 0) |
1017 | | //------------------------------------------------------------------------ |
1018 | 0 | int FindNth(StringPiece s, char c, int n) { |
1019 | 0 | size_t pos = string::npos; |
1020 | |
|
1021 | 0 | for (int i = 0; i < n; ++i) { |
1022 | 0 | pos = s.find_first_of(c, pos + 1); |
1023 | 0 | if (pos == StringPiece::npos) { |
1024 | 0 | break; |
1025 | 0 | } |
1026 | 0 | } |
1027 | 0 | return pos; |
1028 | 0 | } |
1029 | | |
1030 | | //------------------------------------------------------------------------ |
1031 | | // ReverseFindNth() |
1032 | | // return index of nth-to-last occurrence of c in the string, |
1033 | | // or string::npos if n > number of occurrences of c. |
1034 | | // (returns string::npos if n <= 0) |
1035 | | //------------------------------------------------------------------------ |
1036 | 0 | int ReverseFindNth(StringPiece s, char c, int n) { |
1037 | 0 | if (n <= 0) { |
1038 | 0 | return static_cast<int>(StringPiece::npos); |
1039 | 0 | } |
1040 | | |
1041 | 0 | size_t pos = s.size(); |
1042 | |
|
1043 | 0 | for (int i = 0; i < n; ++i) { |
1044 | | // If pos == 0, we return StringPiece::npos right away. Otherwise, |
1045 | | // the following find_last_of call would take (pos - 1) as string::npos, |
1046 | | // which means it would again search the entire input string. |
1047 | 0 | if (pos == 0) { |
1048 | 0 | return static_cast<int>(StringPiece::npos); |
1049 | 0 | } |
1050 | 0 | pos = s.find_last_of(c, pos - 1); |
1051 | 0 | if (pos == string::npos) { |
1052 | 0 | break; |
1053 | 0 | } |
1054 | 0 | } |
1055 | 0 | return pos; |
1056 | 0 | } |
1057 | | |
1058 | | namespace strings { |
1059 | | |
1060 | | // FindEol() |
1061 | | // Returns the location of the next end-of-line sequence. |
1062 | | |
1063 | 0 | StringPiece FindEol(StringPiece s) { |
1064 | 0 | for (size_t i = 0; i < s.length(); ++i) { |
1065 | 0 | if (s[i] == '\n') { |
1066 | 0 | return StringPiece(s.data() + i, 1); |
1067 | 0 | } |
1068 | 0 | if (s[i] == '\r') { |
1069 | 0 | if (i + 1 < s.length() && s[i + 1] == '\n') { |
1070 | 0 | return StringPiece(s.data() + i, 2); |
1071 | 0 | } else { |
1072 | 0 | return StringPiece(s.data() + i, 1); |
1073 | 0 | } |
1074 | 0 | } |
1075 | 0 | } |
1076 | 0 | return StringPiece(s.data() + s.length(), 0); |
1077 | 0 | } |
1078 | | |
1079 | | } // namespace strings |
1080 | | |
1081 | | //------------------------------------------------------------------------ |
1082 | | // OnlyWhitespace() |
1083 | | // return true if string s contains only whitespace characters |
1084 | | //------------------------------------------------------------------------ |
1085 | 0 | bool OnlyWhitespace(const StringPiece& s) { |
1086 | 0 | for (const auto& c : s) { |
1087 | 0 | if (!ascii_isspace(c)) return false; |
1088 | 0 | } |
1089 | 0 | return true; |
1090 | 0 | } |
1091 | | |
1092 | 0 | string PrefixSuccessor(const StringPiece& prefix) { |
1093 | | // We can increment the last character in the string and be done |
1094 | | // unless that character is 255, in which case we have to erase the |
1095 | | // last character and increment the previous character, unless that |
1096 | | // is 255, etc. If the string is empty or consists entirely of |
1097 | | // 255's, we just return the empty string. |
1098 | 0 | bool done = false; |
1099 | 0 | string limit(prefix.data(), prefix.size()); |
1100 | 0 | int index = limit.length() - 1; |
1101 | 0 | while (!done && index >= 0) { |
1102 | 0 | if (limit[index] == 255) { |
1103 | 0 | limit.erase(index); |
1104 | 0 | index--; |
1105 | 0 | } else { |
1106 | 0 | limit[index]++; |
1107 | 0 | done = true; |
1108 | 0 | } |
1109 | 0 | } |
1110 | 0 | if (!done) { |
1111 | 0 | return ""; |
1112 | 0 | } else { |
1113 | 0 | return limit; |
1114 | 0 | } |
1115 | 0 | } |
1116 | | |
1117 | 0 | string ImmediateSuccessor(const StringPiece& s) { |
1118 | | // Return the input string, with an additional NUL byte appended. |
1119 | 0 | string out; |
1120 | 0 | out.reserve(s.size() + 1); |
1121 | 0 | out.append(s.data(), s.size()); |
1122 | 0 | out.push_back('\0'); |
1123 | 0 | return out; |
1124 | 0 | } |
1125 | | |
1126 | 0 | void FindShortestSeparator(const StringPiece& start, const StringPiece& limit, string* separator) { |
1127 | | // Find length of common prefix |
1128 | 0 | size_t min_length = min(start.size(), limit.size()); |
1129 | 0 | size_t diff_index = 0; |
1130 | 0 | while ((diff_index < min_length) && (start[diff_index] == limit[diff_index])) { |
1131 | 0 | diff_index++; |
1132 | 0 | } |
1133 | |
|
1134 | 0 | if (diff_index >= min_length) { |
1135 | | // Handle the case where either string is a prefix of the other |
1136 | | // string, or both strings are identical. |
1137 | 0 | start.CopyToString(separator); |
1138 | 0 | return; |
1139 | 0 | } |
1140 | | |
1141 | 0 | if (diff_index + 1 == start.size()) { |
1142 | | // If the first difference is in the last character, do not bother |
1143 | | // incrementing that character since the separator will be no |
1144 | | // shorter than "start". |
1145 | 0 | start.CopyToString(separator); |
1146 | 0 | return; |
1147 | 0 | } |
1148 | | |
1149 | 0 | if (start[diff_index] == 0xff) { |
1150 | | // Avoid overflow when incrementing start[diff_index] |
1151 | 0 | start.CopyToString(separator); |
1152 | 0 | return; |
1153 | 0 | } |
1154 | | |
1155 | 0 | separator->assign(start.data(), diff_index); |
1156 | 0 | separator->push_back(start[diff_index] + 1); |
1157 | 0 | if (*separator >= limit) { |
1158 | | // Never pick a separator that causes confusion with "limit" |
1159 | 0 | start.CopyToString(separator); |
1160 | 0 | } |
1161 | 0 | } |
1162 | | |
1163 | 0 | int SafeSnprintf(char* str, size_t size, const char* format, ...) { |
1164 | 0 | va_list printargs; |
1165 | 0 | va_start(printargs, format); |
1166 | 0 | int ncw = vsnprintf(str, size, format, printargs); |
1167 | 0 | va_end(printargs); |
1168 | 0 | return (ncw < size && ncw >= 0) ? ncw : 0; |
1169 | 0 | } |
1170 | | |
1171 | 0 | bool GetlineFromStdioFile(FILE* file, string* str, char delim) { |
1172 | 0 | str->erase(); |
1173 | 0 | while (true) { |
1174 | 0 | if (feof(file) || ferror(file)) { |
1175 | 0 | return false; |
1176 | 0 | } |
1177 | 0 | int c = getc(file); |
1178 | 0 | if (c == EOF) return false; |
1179 | 0 | if (c == delim) return true; |
1180 | 0 | str->push_back(c); |
1181 | 0 | } |
1182 | 0 | } |
1183 | | |
1184 | | namespace { |
1185 | | |
1186 | | template <typename CHAR> |
1187 | 0 | size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) { |
1188 | 0 | for (size_t i = 0; i < dst_size; ++i) { |
1189 | 0 | if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL. |
1190 | 0 | return i; |
1191 | 0 | } |
1192 | | |
1193 | | // We were left off at dst_size. We over copied 1 byte. Null terminate. |
1194 | 0 | if (dst_size != 0) dst[dst_size - 1] = 0; |
1195 | | |
1196 | | // Count the rest of the |src|, and return it's length in characters. |
1197 | 0 | while (src[dst_size]) ++dst_size; |
1198 | 0 | return dst_size; |
1199 | 0 | } |
1200 | | |
1201 | | } // namespace |
1202 | | |
1203 | 0 | size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) { |
1204 | 0 | return lcpyT<char>(dst, src, dst_size); |
1205 | 0 | } |