Coverage Report

Created: 2024-11-18 11:49

/root/doris/be/src/gutil/strings/split.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2008 and onwards Google, Inc.
2
//
3
// #status: RECOMMENDED
4
// #category: operations on strings
5
// #summary: Functions for splitting strings into substrings.
6
//
7
// This file contains functions for splitting strings. The new and recommended
8
// API for string splitting is the strings::Split() function. The old API is a
9
// large collection of standalone functions declared at the bottom of this file
10
// in the global scope.
11
//
12
// TODO(user): Rough migration plan from old API to new API
13
// (1) Add comments to old Split*() functions showing how to do the same things
14
//     with the new API.
15
// (2) Reimplement some of the old Split*() functions in terms of the new
16
//     Split() API. This will allow deletion of code in split.cc.
17
// (3) (Optional) Replace old Split*() API calls at call sites with calls to new
18
//     Split() API.
19
//
20
#pragma once
21
22
// IWYU pragma: no_include <pstl/glue_algorithm_defs.h>
23
24
#include <stddef.h>
25
#include <algorithm>
26
27
using std::copy;
28
using std::max;
29
using std::min;
30
using std::reverse;
31
using std::sort;
32
using std::swap;
33
#include <iterator>
34
35
using std::back_insert_iterator;
36
using std::iterator_traits;
37
#include <map>
38
39
using std::map;
40
using std::multimap;
41
#include <set>
42
43
using std::multiset;
44
using std::set;
45
#include <string>
46
47
using std::string;
48
#include <utility>
49
50
using std::make_pair;
51
using std::pair;
52
#include <vector>
53
54
using std::vector;
55
#include "common/logging.h"
56
#include <unordered_map>
57
#include <unordered_set>
58
59
#include "gutil/integral_types.h"
60
#include "gutil/strings/charset.h"
61
#include "gutil/strings/stringpiece.h"
62
#include "gutil/strings/strip.h"
63
#include "gutil/strings/split_internal.h" // IWYU pragma: keep
64
65
namespace strings {
66
67
//                              The new Split API
68
//                                  aka Split2
69
//                              aka strings::Split()
70
//
71
// This string splitting API consists of a Split() function in the ::strings
72
// namespace and a handful of delimiter objects in the ::strings::delimiter
73
// namespace (more on delimiter objects below). The Split() function always
74
// takes two arguments: the text to be split and the delimiter on which to split
75
// the text. An optional third argument may also be given, which is a Predicate
76
// functor that will be used to filter the results, e.g., to skip empty strings
77
// (more on predicates below). The Split() function adapts the returned
78
// collection to the type specified by the caller.
79
//
80
// Example 1:
81
//   // Splits the given string on commas. Returns the results in a
82
//   // vector of strings.
83
//   vector<string> v = strings::Split("a,b,c", ",");
84
//   assert(v.size() == 3);
85
//
86
// Example 2:
87
//   // By default, empty strings are *included* in the output. See the
88
//   // strings::SkipEmpty predicate below to omit them.
89
//   vector<string> v = strings::Split("a,b,,c", ",");
90
//   assert(v.size() == 4);  // "a", "b", "", "c"
91
//   v = strings::Split("", ",");
92
//   assert(v.size() == 1);  // v contains a single ""
93
//
94
// Example 3:
95
//   // Splits the string as in the previous example, except that the results
96
//   // are returned as StringPiece objects. Note that because we are storing
97
//   // the results within StringPiece objects, we have to ensure that the input
98
//   // string outlives any results.
99
//   vector<StringPiece> v = strings::Split("a,b,c", ",");
100
//   assert(v.size() == 3);
101
//
102
// Example 4:
103
//   // Stores results in a set<string>.
104
//   set<string> a = strings::Split("a,b,c,a,b,c", ",");
105
//   assert(a.size() == 3);
106
//
107
// Example 5:
108
//   // Stores results in a map. The map implementation assumes that the input
109
//   // is provided as a series of key/value pairs. For example, the 0th element
110
//   // resulting from the split will be stored as a key to the 1st element. If
111
//   // an odd number of elements are resolved, the last element is paired with
112
//   // a default-constructed value (e.g., empty string).
113
//   map<string, string> m = strings::Split("a,b,c", ",");
114
//   assert(m.size() == 2);
115
//   assert(m["a"] == "b");
116
//   assert(m["c"] == "");  // last component value equals ""
117
//
118
// Example 6:
119
//   // Splits on the empty string, which results in each character of the input
120
//   // string becoming one element in the output collection.
121
//   vector<string> v = strings::Split("abc", "");
122
//   assert(v.size() == 3);
123
//
124
// Example 7:
125
//   // Stores first two split strings as the members in an std::pair.
126
//   std::pair<string, string> p = strings::Split("a,b,c", ",");
127
//   EXPECT_EQ("a", p.first);
128
//   EXPECT_EQ("b", p.second);
129
//   // "c" is omitted because std::pair can hold only two elements.
130
//
131
// As illustrated above, the Split() function adapts the returned collection to
132
// the type specified by the caller. The returned collections may contain
133
// string, StringPiece, Cord, or any object that has a constructor (explicit or
134
// not) that takes a single StringPiece argument. This pattern works for all
135
// standard STL containers including vector, list, deque, set, multiset, map,
136
// multimap, unordered_set and unordered_map, and even std::pair which is not
137
// actually a container.
138
//
139
// Splitting to std::pair is an interesting case because it can hold only two
140
// elements and is not a collection type. When splitting to an std::pair the
141
// first two split strings become the std::pair's .first and .second members
142
// respectively. The remaining split substrings are discarded. If there are less
143
// than two split substrings, the empty string is used for the corresponding
144
// std::pair member.
145
//
146
// The strings::Split() function can be used multiple times to perform more
147
// complicated splitting logic, such as intelligently parsing key-value pairs.
148
// For example
149
//
150
//   // The input string "a=b=c,d=e,f=,g" becomes
151
//   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
152
//   map<string, string> m;
153
//   for (StringPiece sp : strings::Split("a=b=c,d=e,f=,g", ",")) {
154
//     m.insert(strings::Split(sp, strings::delimiter::Limit("=", 1)));
155
//   }
156
//   EXPECT_EQ("b=c", m.find("a")->second);
157
//   EXPECT_EQ("e", m.find("d")->second);
158
//   EXPECT_EQ("", m.find("f")->second);
159
//   EXPECT_EQ("", m.find("g")->second);
160
//
161
// The above example stores the results in an std::map. But depending on your
162
// data requirements, you can just as easily store the results in an
163
// std::multimap or even a vector<std::pair<>>.
164
//
165
//
166
//                                  Delimiters
167
//
168
// The Split() function also takes a second argument that is a delimiter. This
169
// delimiter is actually an object that defines the boundaries between elements
170
// in the provided input. If a string (const char*, ::string, or StringPiece) is
171
// passed in place of an explicit Delimiter object, the argument is implicitly
172
// converted to a ::strings::delimiter::Literal.
173
//
174
// With this split API comes the formal concept of a Delimiter (big D). A
175
// Delimiter is an object with a Find() function that knows how find the first
176
// occurrence of itself in a given StringPiece. Models of the Delimiter concept
177
// represent specific kinds of delimiters, such as single characters,
178
// substrings, or even regular expressions.
179
//
180
// The following Delimiter objects are provided as part of the Split() API:
181
//
182
//   - Literal (default)
183
//   - AnyOf
184
//   - Limit
185
//
186
// The following are examples of using some provided Delimiter objects:
187
//
188
// Example 1:
189
//   // Because a string literal is converted to a strings::delimiter::Literal,
190
//   // the following two splits are equivalent.
191
//   vector<string> v1 = strings::Split("a,b,c", ",");           // (1)
192
//   using ::strings::delimiter::Literal;
193
//   vector<string> v2 = strings::Split("a,b,c", Literal(","));  // (2)
194
//
195
// Example 2:
196
//   // Splits on any of the characters specified in the delimiter string.
197
//   using ::strings::delimiter::AnyOf;
198
//   vector<string> v = strings::Split("a,b;c-d", AnyOf(",;-"));
199
//   assert(v.size() == 4);
200
//
201
// Example 3:
202
//   // Uses the Limit meta-delimiter to limit the number of matches a delimiter
203
//   // can have. In this case, the delimiter of a Literal comma is limited to
204
//   // to matching at most one time. The last element in the returned
205
//   // collection will contain all unsplit pieces, which may contain instances
206
//   // of the delimiter.
207
//   using ::strings::delimiter::Limit;
208
//   vector<string> v = strings::Split("a,b,c", Limit(",", 1));
209
//   assert(v.size() == 2);  // Limited to 1 delimiter; so two elements found
210
//   assert(v[0] == "a");
211
//   assert(v[1] == "b,c");
212
//
213
//
214
//                                  Predicates
215
//
216
// Predicates can filter the results of a Split() operation by determining
217
// whether or not a resultant element is included in the result set. A predicate
218
// may be passed as an *optional* third argument to the Split() function.
219
//
220
// Predicates are unary functions (or functors) that take a single StringPiece
221
// argument and return bool indicating whether the argument should be included
222
// (true) or excluded (false).
223
//
224
// One example where this is useful is when filtering out empty substrings. By
225
// default, empty substrings may be returned by strings::Split(), which is
226
// similar to the way split functions work in other programming languages. For
227
// example:
228
//
229
//   // Empty strings *are* included in the returned collection.
230
//   vector<string> v = strings::Split(",a,,b,", ",");
231
//   assert(v.size() ==  5);  // v[0] == "", v[1] == "a", v[2] == "", ...
232
//
233
// These empty strings can be filtered out of the results by simply passing the
234
// provided SkipEmpty predicate as the third argument to the Split() function.
235
// SkipEmpty does not consider a string containing all whitespace to be empty.
236
// For that behavior use the SkipWhitespace predicate. For example:
237
//
238
// Example 1:
239
//   // Uses SkipEmpty to omit empty strings. Strings containing whitespace are
240
//   // not empty and are therefore not skipped.
241
//   using strings::SkipEmpty;
242
//   vector<string> v = strings::Split(",a, ,b,", ",", SkipEmpty());
243
//   assert(v.size() == 3);
244
//   assert(v[0] == "a");
245
//   assert(v[1] == " ");  // <-- The whitespace makes the string not empty.
246
//   assert(v[2] == "b");
247
//
248
// Example 2:
249
//   // Uses SkipWhitespace to skip all strings that are either empty or contain
250
//   // only whitespace.
251
//   using strings::SkipWhitespace;
252
//   vector<string> v = strings::Split(",a, ,b,", ",",  SkipWhitespace());
253
//   assert(v.size() == 2);
254
//   assert(v[0] == "a");
255
//   assert(v[1] == "b");
256
//
257
//
258
//                     Differences between Split1 and Split2
259
//
260
// Split2 is the strings::Split() API described above. Split1 is a name for the
261
// collection of legacy Split*() functions declared later in this file. Most of
262
// the Split1 functions follow a set of conventions that don't necessarily match
263
// the conventions used in Split2. The following are some of the important
264
// differences between Split1 and Split2:
265
//
266
// Split1 -> Split2
267
// ----------------
268
// Append -> Assign:
269
//   The Split1 functions all returned their output collections via a pointer to
270
//   an out parameter as is typical in Google code. In some cases the comments
271
//   explicitly stated that results would be *appended* to the output
272
//   collection. In some cases it was ambiguous whether results were appended.
273
//   This ambiguity is gone in the Split2 API as results are always assigned to
274
//   the output collection, never appended.
275
//
276
// AnyOf -> Literal:
277
//   Most Split1 functions treated their delimiter argument as a string of
278
//   individual byte delimiters. For example, a delimiter of ",;" would split on
279
//   "," and ";", not the substring ",;". This behavior is equivalent to the
280
//   Split2 delimiter strings::delimiter::AnyOf, which is *not* the default. By
281
//   default, strings::Split() splits using strings::delimiter::Literal() which
282
//   would treat the whole string ",;" as a single delimiter string.
283
//
284
// SkipEmpty -> allow empty:
285
//   Most Split1 functions omitted empty substrings in the results. To keep
286
//   empty substrings one would have to use an explicitly named
287
//   Split*AllowEmpty() function. This behavior is reversed in Split2. By
288
//   default, strings::Split() *allows* empty substrings in the output. To skip
289
//   them, use the strings::SkipEmpty predicate.
290
//
291
// string -> user's choice:
292
//   Most Split1 functions return collections of string objects. Some return
293
//   char*, but the type returned is dictated by each Split1 function. With
294
//   Split2 the caller can choose which string-like object to return. (Note:
295
//   char* C-strings are not supported in Split2--use StringPiece instead).
296
//
297
298
// Definitions of the main Split() function.
299
template <typename Delimiter>
300
24
internal::Splitter<Delimiter> Split(StringPiece text, Delimiter d) {
301
24
    return internal::Splitter<Delimiter>(text, d);
302
24
}
_ZN7strings5SplitINS_9delimiter9LimitImplINS1_7LiteralEEEEENS_8internal8SplitterIT_NS5_8NoFilterEEE11StringPieceS7_
Line
Count
Source
300
24
internal::Splitter<Delimiter> Split(StringPiece text, Delimiter d) {
301
24
    return internal::Splitter<Delimiter>(text, d);
302
24
}
Unexecuted instantiation: _ZN7strings5SplitINS_9delimiter5AnyOfEEENS_8internal8SplitterIT_NS3_8NoFilterEEE11StringPieceS5_
Unexecuted instantiation: _ZN7strings5SplitINS_9delimiter9LimitImplINS1_5AnyOfEEEEENS_8internal8SplitterIT_NS5_8NoFilterEEE11StringPieceS7_
303
304
template <typename Delimiter, typename Predicate>
305
0
internal::Splitter<Delimiter, Predicate> Split(StringPiece text, Delimiter d, Predicate p) {
306
0
    return internal::Splitter<Delimiter, Predicate>(text, d, p);
307
0
}
308
309
namespace delimiter {
310
// A Delimiter object represents a single separator, such as a character,
311
// literal string, or regular expression. A Delimiter object must have the
312
// following member:
313
//
314
//   StringPiece Find(StringPiece text);
315
//
316
// This Find() member function should return a StringPiece referring to the next
317
// occurrence of the represented delimiter within the given string text. If no
318
// delimiter is found in the given text, a zero-length StringPiece referring to
319
// text.end() should be returned (e.g., StringPiece(text.end(), 0)). It is
320
// important that the returned StringPiece always be within the bounds of the
321
// StringPiece given as an argument--it must not refer to a string that is
322
// physically located outside of the given string. The following example is a
323
// simple Delimiter object that is created with a single char and will look for
324
// that char in the text given to the Find() function:
325
//
326
//   struct SimpleDelimiter {
327
//     const char c_;
328
//     explicit SimpleDelimiter(char c) : c_(c) {}
329
//     StringPiece Find(StringPiece text) {
330
//       int pos = text.find(c_);
331
//       if (pos == StringPiece::npos) return StringPiece(text.end(), 0);
332
//       return StringPiece(text, pos, 1);
333
//     }
334
//   };
335
336
// Represents a literal string delimiter. Examples:
337
//
338
//   using ::strings::delimiter::Literal;
339
//   vector<string> v = strings::Split("a=>b=>c", Literal("=>"));
340
//   assert(v.size() == 3);
341
//   assert(v[0] == "a");
342
//   assert(v[1] == "b");
343
//   assert(v[2] == "c");
344
//
345
// The next example uses the empty string as a delimiter.
346
//
347
//   using ::strings::delimiter::Literal;
348
//   vector<string> v = strings::Split("abc", Literal(""));
349
//   assert(v.size() == 3);
350
//   assert(v[0] == "a");
351
//   assert(v[1] == "b");
352
//   assert(v[2] == "c");
353
//
354
class Literal {
355
public:
356
    explicit Literal(StringPiece sp);
357
    StringPiece Find(StringPiece text) const;
358
359
private:
360
    const string delimiter_;
361
};
362
363
// Represents a delimiter that will match any of the given byte-sized
364
// characters. AnyOf is similar to Literal, except that AnyOf uses
365
// StringPiece::find_first_of() and Literal uses StringPiece::find(). AnyOf
366
// examples:
367
//
368
//   using ::strings::delimiter::AnyOf;
369
//   vector<string> v = strings::Split("a,b=c", AnyOf(",="));
370
//
371
//   assert(v.size() == 3);
372
//   assert(v[0] == "a");
373
//   assert(v[1] == "b");
374
//   assert(v[2] == "c");
375
//
376
// If AnyOf is given the empty string, it behaves exactly like Literal and
377
// matches each individual character in the input string.
378
//
379
// Note: The string passed to AnyOf is assumed to be a string of single-byte
380
// ASCII characters. AnyOf does not work with multi-byte characters.
381
class AnyOf {
382
public:
383
    explicit AnyOf(StringPiece sp);
384
    StringPiece Find(StringPiece text) const;
385
386
private:
387
    const string delimiters_;
388
};
389
390
// Wraps another delimiter and sets a max number of matches for that delimiter.
391
// Create LimitImpls using the Limit() function. Example:
392
//
393
//   using ::strings::delimiter::Limit;
394
//   vector<string> v = strings::Split("a,b,c,d", Limit(",", 2));
395
//
396
//   assert(v.size() == 3);  // Split on 2 commas, giving a vector with 3 items
397
//   assert(v[0] == "a");
398
//   assert(v[1] == "b");
399
//   assert(v[2] == "c,d");
400
//
401
template <typename Delimiter>
402
class LimitImpl {
403
public:
404
    LimitImpl(Delimiter delimiter, int limit)
405
24
            : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {}
_ZN7strings9delimiter9LimitImplINS0_7LiteralEEC2ES2_i
Line
Count
Source
405
24
            : delimiter_(std::move(delimiter)), limit_(limit), count_(0) {}
Unexecuted instantiation: _ZN7strings9delimiter9LimitImplINS0_5AnyOfEEC2ES2_i
406
47
    StringPiece Find(StringPiece text) {
407
47
        if (count_++ == limit_) {
408
18
            return StringPiece(text.end(), 0); // No more matches.
409
18
        }
410
29
        return delimiter_.Find(text);
411
47
    }
_ZN7strings9delimiter9LimitImplINS0_7LiteralEE4FindE11StringPiece
Line
Count
Source
406
47
    StringPiece Find(StringPiece text) {
407
47
        if (count_++ == limit_) {
408
18
            return StringPiece(text.end(), 0); // No more matches.
409
18
        }
410
29
        return delimiter_.Find(text);
411
47
    }
Unexecuted instantiation: _ZN7strings9delimiter9LimitImplINS0_5AnyOfEE4FindE11StringPiece
412
413
private:
414
    Delimiter delimiter_;
415
    const int limit_;
416
    int count_;
417
};
418
419
// Overloaded Limit() function to create LimitImpl<> objects. Uses the Delimiter
420
// Literal as the default if string-like objects are passed as the delimiter
421
// parameter. This is similar to the overloads for Split() below.
422
template <typename Delimiter>
423
0
LimitImpl<Delimiter> Limit(Delimiter delim, int limit) {
424
0
    return LimitImpl<Delimiter>(delim, limit);
425
0
}
426
427
14
inline LimitImpl<Literal> Limit(const char* s, int limit) {
428
14
    return LimitImpl<Literal>(Literal(s), limit);
429
14
}
430
431
10
inline LimitImpl<Literal> Limit(const string& s, int limit) {
432
10
    return LimitImpl<Literal>(Literal(s), limit);
433
10
}
434
435
0
inline LimitImpl<Literal> Limit(StringPiece s, int limit) {
436
0
    return LimitImpl<Literal>(Literal(s), limit);
437
0
}
438
439
} // namespace delimiter
440
441
//
442
// Predicates are functors that return bool indicating whether the given
443
// StringPiece should be included in the split output. If the predicate returns
444
// false then the string will be excluded from the output from strings::Split().
445
//
446
447
// Always returns true, indicating that all strings--including empty
448
// strings--should be included in the split output. This predicate is not
449
// strictly needed because this is the default behavior of the strings::Split()
450
// function. But it might be useful at some call sites to make the intent
451
// explicit.
452
//
453
// vector<string> v = Split(" a , ,,b,", ",", AllowEmpty());
454
// EXPECT_THAT(v, ElementsAre(" a ", " ", "", "b", ""));
455
struct AllowEmpty {
456
0
    bool operator()(StringPiece sp) const { return true; }
457
};
458
459
// Returns false if the given StringPiece is empty, indicating that the
460
// strings::Split() API should omit the empty string.
461
//
462
// vector<string> v = Split(" a , ,,b,", ",", SkipEmpty());
463
// EXPECT_THAT(v, ElementsAre(" a ", " ", "b"));
464
struct SkipEmpty {
465
0
    bool operator()(StringPiece sp) const { return !sp.empty(); }
466
};
467
468
// Returns false if the given StringPiece is empty or contains only whitespace,
469
// indicating that the strings::Split() API should omit the string.
470
//
471
// vector<string> v = Split(" a , ,,b,", ",", SkipWhitespace());
472
// EXPECT_THAT(v, ElementsAre(" a ", "b"));
473
struct SkipWhitespace {
474
1.96k
    bool operator()(StringPiece sp) const {
475
1.96k
        StripWhiteSpace(&sp);
476
1.96k
        return !sp.empty();
477
1.96k
    }
478
};
479
480
// Split() function overloads to effectively give Split() a default Delimiter
481
// type of Literal. If Split() is called and a string is passed as the delimiter
482
// instead of an actual Delimiter object, then one of these overloads will be
483
// invoked and will create a Splitter<Literal> with the delimiter string.
484
//
485
// Since Split() is a function template above, these overload signatures need to
486
// be explicit about the string type so they match better than the templated
487
// version. These functions are overloaded for:
488
//
489
//   - const char*
490
//   - const string&
491
//   - StringPiece
492
493
166
inline internal::Splitter<delimiter::Literal> Split(StringPiece text, const char* delimiter) {
494
166
    return internal::Splitter<delimiter::Literal>(text, delimiter::Literal(delimiter));
495
166
}
496
497
89
inline internal::Splitter<delimiter::Literal> Split(StringPiece text, const string& delimiter) {
498
89
    return internal::Splitter<delimiter::Literal>(text, delimiter::Literal(delimiter));
499
89
}
500
501
0
inline internal::Splitter<delimiter::Literal> Split(StringPiece text, StringPiece delimiter) {
502
0
    return internal::Splitter<delimiter::Literal>(text, delimiter::Literal(delimiter));
503
0
}
504
505
// Same overloads as above, but also including a Predicate argument.
506
template <typename Predicate>
507
internal::Splitter<delimiter::Literal, Predicate> Split(StringPiece text, const char* delimiter,
508
388
                                                        Predicate p) {
509
388
    return internal::Splitter<delimiter::Literal, Predicate>(text, delimiter::Literal(delimiter),
510
388
                                                             p);
511
388
}
_ZN7strings5SplitINS_14SkipWhitespaceEEENS_8internal8SplitterINS_9delimiter7LiteralET_EE11StringPiecePKcS6_
Line
Count
Source
508
388
                                                        Predicate p) {
509
388
    return internal::Splitter<delimiter::Literal, Predicate>(text, delimiter::Literal(delimiter),
510
388
                                                             p);
511
388
}
Unexecuted instantiation: _ZN7strings5SplitINS_9SkipEmptyEEENS_8internal8SplitterINS_9delimiter7LiteralET_EE11StringPiecePKcS6_
512
513
template <typename Predicate>
514
internal::Splitter<delimiter::Literal, Predicate> Split(StringPiece text, const string& delimiter,
515
                                                        Predicate p) {
516
    return internal::Splitter<delimiter::Literal, Predicate>(text, delimiter::Literal(delimiter),
517
                                                             p);
518
}
519
520
template <typename Predicate>
521
internal::Splitter<delimiter::Literal, Predicate> Split(StringPiece text, StringPiece delimiter,
522
                                                        Predicate p) {
523
    return internal::Splitter<delimiter::Literal, Predicate>(text, delimiter::Literal(delimiter),
524
                                                             p);
525
}
526
527
} // namespace strings
528
529
//
530
// ==================== LEGACY SPLIT FUNCTIONS ====================
531
//
532
533
// NOTE: The instruction below creates a Module titled
534
// GlobalSplitFunctions within the auto-generated Doxygen documentation.
535
// This instruction is needed to expose global functions that are not
536
// within a namespace.
537
//
538
// START DOXYGEN SplitFunctions grouping
539
/* @defgroup SplitFunctions
540
 * @{ */
541
542
// ----------------------------------------------------------------------
543
// ClipString
544
//    Clip a string to a max length. We try to clip on a word boundary
545
//    if this is possible. If the string is clipped, we append an
546
//    ellipsis.
547
//
548
//    ***NOTE***
549
//    ClipString counts length with strlen.  If you have non-ASCII
550
//    strings like UTF-8, this is wrong.  If you are displaying the
551
//    clipped strings to users in a frontend, consider using
552
//    ClipStringOnWordBoundary in
553
//    webserver/util/snippets/rewriteboldtags, which considers the width
554
//    of the string, not just the number of bytes.
555
//
556
//    TODO(user) Move ClipString back to strutil.  The problem with this is
557
//    that ClipStringHelper is used behind the scenes by SplitStringToLines, but
558
//    probably shouldn't be exposed in the .h files.
559
// ----------------------------------------------------------------------
560
void ClipString(char* str, int max_len);
561
562
// ----------------------------------------------------------------------
563
// ClipString
564
//    Version of ClipString() that uses string instead of char*.
565
//    NOTE: See comment above.
566
// ----------------------------------------------------------------------
567
void ClipString(string* full_str, int max_len);
568
569
// ----------------------------------------------------------------------
570
// SplitStringToLines() Split a string into lines of maximum length
571
// 'max_len'. Append the resulting lines to 'result'. Will attempt
572
// to split on word boundaries.  If 'num_lines'
573
// is zero it splits up the whole string regardless of length. If
574
// 'num_lines' is positive, it returns at most num_lines lines, and
575
// appends a "..." to the end of the last line if the string is too
576
// long to fit completely into 'num_lines' lines.
577
// ----------------------------------------------------------------------
578
void SplitStringToLines(const char* full, int max_len, int num_lines, vector<string>* result);
579
580
// ----------------------------------------------------------------------
581
// SplitOneStringToken()
582
//   Returns the first "delim" delimited string from "*source" and modifies
583
//   *source to point after the delimiter that was found. If no delimiter is
584
//   found, *source is set to NULL.
585
//
586
//   If the start of *source is a delimiter, an empty string is returned.
587
//   If *source is NULL, an empty string is returned.
588
//
589
//   "delim" is treated as a sequence of 1 or more character delimiters. Any one
590
//   of the characters present in "delim" is considered to be a single
591
//   delimiter; The delimiter is not "delim" as a whole. For example:
592
//
593
//     const char* s = "abc=;de";
594
//     string r = SplitOneStringToken(&s, ";=");
595
//     // r = "abc"
596
//     // s points to ";de"
597
// ----------------------------------------------------------------------
598
string SplitOneStringToken(const char** source, const char* delim);
599
600
// ----------------------------------------------------------------------
601
// SplitUsing()
602
//    Split a string into substrings based on the nul-terminated list
603
//    of bytes at delimiters (uses strsep) and return a vector of
604
//    those strings. Modifies 'full' We allocate the return vector,
605
//    and you should free it.  Note that empty fields are ignored.
606
//    Use SplitToVector with last argument 'false' if you want the
607
//    empty fields.
608
//    ----------------------------------------------------------------------
609
vector<char*>* SplitUsing(char* full, const char* delimiters);
610
611
// ----------------------------------------------------------------------
612
// SplitToVector()
613
//    Split a string into substrings based on the nul-terminated list
614
//    of bytes at delim (uses strsep) and appends the split
615
//    strings to 'vec'.  Modifies "full".  If omit empty strings is
616
//    true, empty strings are omitted from the resulting vector.
617
// ----------------------------------------------------------------------
618
void SplitToVector(char* full, const char* delimiters, vector<char*>* vec, bool omit_empty_strings);
619
void SplitToVector(char* full, const char* delimiters, vector<const char*>* vec,
620
                   bool omit_empty_strings);
621
622
// ----------------------------------------------------------------------
623
// SplitStringPieceToVector
624
//    Split a StringPiece into sub-StringPieces based on the
625
//    nul-terminated list of bytes at delim and appends the
626
//    pieces to 'vec'.  If omit empty strings is true, empty strings
627
//    are omitted from the resulting vector.
628
//    Expects the original string (from which 'full' is derived) to exist
629
//    for the full lifespan of 'vec'.
630
// ----------------------------------------------------------------------
631
void SplitStringPieceToVector(const StringPiece& full, const char* delim, vector<StringPiece>* vec,
632
                              bool omit_empty_strings);
633
634
// ----------------------------------------------------------------------
635
// SplitStringUsing()
636
// SplitStringToHashsetUsing()
637
// SplitStringToSetUsing()
638
// SplitStringToMapUsing()
639
// SplitStringToHashmapUsing()
640
641
// Splits a string using one or more byte delimiters, presented as a
642
// nul-terminated c string. Append the components to 'result'. If there are
643
// consecutive delimiters, this function skips over all of them: in other words,
644
// empty components are dropped. If you want to keep empty components, try
645
// SplitStringAllowEmpty().
646
//
647
// NOTE: Do not use this for multi-byte delimiters such as UTF-8 strings. Use
648
// strings::Split() with strings::delimiter::Literal as the delimiter.
649
//
650
// ==> NEW API: Consider using the new Split API defined above. <==
651
// Example:
652
//
653
//   using strings::SkipEmpty;
654
//   using strings::Split;
655
//   using strings::delimiter::AnyOf;
656
//
657
//   vector<string> v = Split(full, AnyOf(delimiter), SkipEmpty());
658
//
659
// For even better performance, store the result in a vector<StringPiece>
660
// to avoid string copies.
661
// ----------------------------------------------------------------------
662
void SplitStringUsing(const string& full, const char* delimiters, vector<string>* result);
663
void SplitStringToHashsetUsing(const string& full, const char* delimiters,
664
                               std::unordered_set<string>* result);
665
void SplitStringToSetUsing(const string& full, const char* delimiters, set<string>* result);
666
// The even-positioned (0-based) components become the keys for the
667
// odd-positioned components that follow them. When there is an odd
668
// number of components, the value for the last key will be unchanged
669
// if the key was already present in the hash table, or will be the
670
// empty string if the key is a newly inserted key.
671
void SplitStringToMapUsing(const string& full, const char* delim, map<string, string>* result);
672
void SplitStringToHashmapUsing(const string& full, const char* delim,
673
                               std::unordered_map<string, string>* result);
674
675
// ----------------------------------------------------------------------
676
// SplitStringAllowEmpty()
677
//
678
// Split a string using one or more byte delimiters, presented as a
679
// nul-terminated c string. Append the components to 'result'. If there are
680
// consecutive delimiters, this function will return corresponding empty
681
// strings.  If you want to drop the empty strings, try SplitStringUsing().
682
//
683
// If "full" is the empty string, yields an empty string as the only value.
684
//
685
// ==> NEW API: Consider using the new Split API defined above. <==
686
//
687
//   using strings::Split;
688
//   using strings::delimiter::AnyOf;
689
//
690
//   vector<string> v = Split(full, AnyOf(delimiter));
691
//
692
// For even better performance, store the result in a vector<StringPiece> to
693
// avoid string copies.
694
// ----------------------------------------------------------------------
695
void SplitStringAllowEmpty(const string& full, const char* delim, vector<string>* result);
696
697
// ----------------------------------------------------------------------
698
// SplitStringWithEscaping()
699
// SplitStringWithEscapingAllowEmpty()
700
// SplitStringWithEscapingToSet()
701
// SplitStringWithEscapingToHashset()
702
703
//   Split the string using the specified delimiters, taking escaping into
704
//   account. '\' is not allowed as a delimiter.
705
//
706
//   Within the string, preserve a delimiter preceded by a backslash as a
707
//   literal delimiter. In addition, preserve two consecutive backslashes as
708
//   a single literal backslash. Do not unescape any other backslash-character
709
//   sequence.
710
//
711
//   Eg. 'foo\=bar=baz\\qu\ux' split on '=' becomes ('foo=bar', 'baz\qu\ux')
712
//
713
//   All versions other than "AllowEmpty" discard any empty substrings.
714
// ----------------------------------------------------------------------
715
void SplitStringWithEscaping(const string& full, const strings::CharSet& delimiters,
716
                             vector<string>* result);
717
void SplitStringWithEscapingAllowEmpty(const string& full, const strings::CharSet& delimiters,
718
                                       vector<string>* result);
719
void SplitStringWithEscapingToSet(const string& full, const strings::CharSet& delimiters,
720
                                  set<string>* result);
721
void SplitStringWithEscapingToHashset(const string& full, const strings::CharSet& delimiters,
722
                                      std::unordered_set<string>* result);
723
724
// ----------------------------------------------------------------------
725
// SplitStringIntoNPiecesAllowEmpty()
726
727
//    Split a string using a nul-terminated list of byte
728
//    delimiters. Append the components to 'result'.  If there are
729
//    consecutive delimiters, this function will return corresponding
730
//    empty strings. The string is split into at most the specified
731
//    number of pieces greedily. This means that the last piece may
732
//    possibly be split further. To split into as many pieces as
733
//    possible, specify 0 as the number of pieces.
734
//
735
//    If "full" is the empty string, yields an empty string as the only value.
736
// ----------------------------------------------------------------------
737
void SplitStringIntoNPiecesAllowEmpty(const string& full, const char* delimiters, int pieces,
738
                                      vector<string>* result);
739
740
// ----------------------------------------------------------------------
741
// SplitStringAndParse()
742
// SplitStringAndParseToContainer()
743
// SplitStringAndParseToList()
744
//    Split a string using a nul-terminated list of character
745
//    delimiters.  For each component, parse using the provided
746
//    parsing function and if successful, append it to 'result'.
747
//    Return true if and only if all components parse successfully.
748
//    If there are consecutive delimiters, this function skips over
749
//    all of them.  This function will correctly handle parsing
750
//    strings that have embedded \0s.
751
//
752
// SplitStringAndParse fills into a vector.
753
// SplitStringAndParseToContainer fills into any container that implements
754
//    a single-argument insert function. (i.e. insert(const value_type& x) ).
755
// SplitStringAndParseToList fills into any container that implements a single-
756
//    argument push_back function (i.e. push_back(const value_type& x) ), plus
757
//    value_type& back() and pop_back().
758
//    NOTE: This implementation relies on parsing in-place into the "back()"
759
//    reference, so its performance may depend on the efficiency of back().
760
//
761
// Example Usage:
762
//  vector<double> values;
763
//  CHECK(SplitStringAndParse("1.0,2.0,3.0", ",", &safe_strtod, &values));
764
//  CHECK_EQ(3, values.size());
765
//
766
//  vector<int64> values;
767
//  CHECK(SplitStringAndParse("1M,2M,3M", ",",
768
//        &HumanReadableNumBytes::ToInt64, &values));
769
//  CHECK_EQ(3, values.size());
770
//
771
//  set<int64> values;
772
//  CHECK(SplitStringAndParseToContainer("3,1,1,2", ",",
773
//        &safe_strto64, &values));
774
//  CHECK_EQ(4, values.size());
775
//
776
//  deque<int64> values;
777
//  CHECK(SplitStringAndParseToList("3,1,1,2", ",", &safe_strto64, &values));
778
//  CHECK_EQ(4, values.size());
779
// ----------------------------------------------------------------------
780
template <class T>
781
bool SplitStringAndParse(StringPiece source, StringPiece delim,
782
                         bool (*parse)(const string& str, T* value), vector<T>* result);
783
template <class Container>
784
bool SplitStringAndParseToContainer(StringPiece source, StringPiece delim,
785
                                    bool (*parse)(const string& str,
786
                                                  typename Container::value_type* value),
787
                                    Container* result);
788
789
template <class List>
790
bool SplitStringAndParseToList(StringPiece source, StringPiece delim,
791
                               bool (*parse)(const string& str, typename List::value_type* value),
792
                               List* result);
793
// ----------------------------------------------------------------------
794
// SplitRange()
795
//    Splits a string of the form "<from>-<to>".  Either or both can be
796
//    missing.  A raw number (<to>) is interpreted as "<to>-".  Modifies
797
//    parameters insofar as they're specified by the string.  RETURNS
798
//    true iff the input is a well-formed range.  If it RETURNS false,
799
//    from and to remain unchanged.  The range in rangestr should be
800
//    terminated either by "\0" or by whitespace.
801
// ----------------------------------------------------------------------
802
bool SplitRange(const char* rangestr, int* from, int* to);
803
804
// ----------------------------------------------------------------------
805
// SplitCSVLineWithDelimiter()
806
//    CSV lines come in many guises.  There's the Comma Separated Values
807
//    variety, in which fields are separated by (surprise!) commas.  There's
808
//    also the tab-separated values variant, in which tabs separate the
809
//    fields.  This routine handles both, which makes it almost like
810
//    SplitUsing(line, delimiter), but for some special processing.  For both
811
//    delimiters, whitespace is trimmed from either side of the field value.
812
//    If the delimiter is ',', we play additional games with quotes.  A
813
//    field value surrounded by double quotes is allowed to contain commas,
814
//    which are not treated as field separators.  Within a double-quoted
815
//    string, a series of two double quotes signals an escaped single double
816
//    quote.  It'll be clearer in the examples.
817
//    Example:
818
//     Google , x , "Buchheit, Paul", "string with "" quote in it"
819
//     -->  [Google], [x], [Buchheit, Paul], [string with " quote in it]
820
//
821
// SplitCSVLine()
822
//    A convenience wrapper around SplitCSVLineWithDelimiter which uses
823
//    ',' as the delimiter.
824
//
825
// The following variants of SplitCSVLine() are not recommended for new code.
826
// Please consider the CSV parser in //util/csv as an alternative.  Examples:
827
// To parse a single line:
828
//     #include "util/csv/parser.h"
829
//     vector<string> fields = util::csv::ParseLine(line).fields();
830
//
831
// To parse an entire file:
832
//     #include "util/csv/parser.h"
833
//     for (Record rec : Parser(source)) {
834
//       vector<string> fields = rec.fields();
835
//     }
836
//
837
// See //util/csv/parser.h for more complete documentation.
838
//
839
// ----------------------------------------------------------------------
840
void SplitCSVLine(char* line, vector<char*>* cols);
841
void SplitCSVLineWithDelimiter(char* line, char delimiter, vector<char*>* cols);
842
// SplitCSVLine string wrapper that internally makes a copy of string line.
843
void SplitCSVLineWithDelimiterForStrings(const string& line, char delimiter, vector<string>* cols);
844
845
// ----------------------------------------------------------------------
846
// SplitStructuredLine()
847
//    Splits a line using the given delimiter, and places the columns
848
//    into 'cols'. This is unlike 'SplitUsing(line, ",")' because you can
849
//    define pairs of opening closing symbols inside which the delimiter should
850
//    be ignored. If the symbol_pair string has an odd number of characters,
851
//    the last character (which cannot be paired) will be assumed to be both an
852
//    opening and closing symbol.
853
//    WARNING : The input string 'line' is destroyed in the process.
854
//    The function returns 0 if the line was parsed correctly (i.e all the
855
//    opened braces had their closing braces) otherwise, it returns the position
856
//    of the error.
857
//    Example:
858
//     SplitStructuredLine("item1,item2,{subitem1,subitem2},item4,[5,{6,7}]",
859
//                         ',',
860
//                         "{}[]", &output)
861
//     --> output = { "item1", "item2", "{subitem1,subitem2}", "item4",
862
//                    "[5,{6,7}]" }
863
//    Example2: trying to split "item1,[item2,{4,5],5}" will fail and the
864
//              function will return the position of the problem : ]
865
//
866
// ----------------------------------------------------------------------
867
char* SplitStructuredLine(char* line, char delimiter, const char* symbol_pairs,
868
                          vector<char*>* cols);
869
870
// Similar to the function with the same name above, but splits a StringPiece
871
// into StringPiece parts. Returns true if successful.
872
bool SplitStructuredLine(StringPiece line, char delimiter, const char* symbol_pairs,
873
                         vector<StringPiece>* cols);
874
875
// ----------------------------------------------------------------------
876
// SplitStructuredLineWithEscapes()
877
//    Like SplitStructuredLine but also allows characters to be escaped.
878
//
879
//    WARNING: the escape characters will be replicated in the output
880
//    columns rather than being consumed, i.e. if {} were the opening and
881
//    closing symbols, using \{ to quote a curly brace in the middle of
882
//    an option would pass this unchanged.
883
//
884
//    Example:
885
//     SplitStructuredLineWithEscapes(
886
//       "\{item1\},it\\em2,{\{subitem1\},sub\\item2},item4\,item5,[5,{6,7}]",
887
//                     ',',
888
//                     "{}[]",
889
//                     &output)
890
//     --> output = { "\{item1\}", "it\\em2", "{\{subitem1\},sub\\item2}",
891
//                    "item4\,item5", "[5,{6,7}]" }
892
//
893
// ----------------------------------------------------------------------
894
char* SplitStructuredLineWithEscapes(char* line, char delimiter, const char* symbol_pairs,
895
                                     vector<char*>* cols);
896
897
// Similar to the function with the same name above, but splits a StringPiece
898
// into StringPiece parts. Returns true if successful.
899
bool SplitStructuredLineWithEscapes(StringPiece line, char delimiter, const char* symbol_pairs,
900
                                    vector<StringPiece>* cols);
901
902
// ----------------------------------------------------------------------
903
// DEPRECATED(jgm): See the "NEW API" comment about this function below for
904
// example code showing an alternative.
905
//
906
// SplitStringIntoKeyValues()
907
// Split a line into a key string and a vector of value strings. The line has
908
// the following format:
909
//
910
// <key><kvsep>+<vvsep>*<value1><vvsep>+<value2><vvsep>+<value3>...<vvsep>*
911
//
912
// where key and value are strings; */+ means zero/one or more; <kvsep> is
913
// a delimiter character to separate key and value; and <vvsep> is a delimiter
914
// character to separate between values. The user can specify a bunch of
915
// delimiter characters using a string. For example, if the user specifies
916
// the separator string as "\t ", then either ' ' or '\t' or any combination
917
// of them wil be treated as separator. For <vvsep>, the user can specify a
918
// empty string to indicate there is only one value.
919
//
920
// Note: this function assumes the input string begins exactly with a
921
// key. Therefore, if you use whitespaces to separate key and value, you
922
// should not let whitespace precedes the key in the input. Otherwise, you
923
// will get an empty string as the key.
924
//
925
// A line with no <kvsep> will return an empty string as the key, even if
926
// <key> is non-empty!
927
//
928
// The syntax makes it impossible for a value to be the empty string.
929
// It is possible for the number of values to be zero.
930
//
931
// Returns false if the line has no <kvsep> or if the number of values is
932
// zero.
933
//
934
// ==> NEW API: Consider using the new Split API defined above. <==
935
//
936
// The SplitStringIntoKeyValues() function has some subtle and surprising
937
// semantics in various corner cases. To avoid this the strings::Split API is
938
// recommended. The following example shows how to split a string of delimited
939
// key-value pairs into a vector of pairs using the strings::Split API.
940
//
941
//   using strings::Split;
942
//   using strings::delimiter::AnyOf;
943
//   using strings::delimiter::Limit;
944
//
945
//   pair<string, StringPiece> key_values =
946
//       Split(line, Limit(AnyOf(kv_delim), 1));
947
//   string key = key_values.first;
948
//   vector<string> values = Split(key_values.second, AnyOf(vv_delim));
949
//
950
// ----------------------------------------------------------------------
951
bool SplitStringIntoKeyValues(const string& line, const string& key_value_delimiters,
952
                              const string& value_value_delimiters, string* key,
953
                              vector<string>* values);
954
955
// ----------------------------------------------------------------------
956
// SplitStringIntoKeyValuePairs()
957
// Split a line into a vector of <key, value> pairs. The line has
958
// the following format:
959
//
960
// <kvpsep>*<key1><kvsep>+<value1><kvpsep>+<key2><kvsep>+<value2>...<kvpsep>*
961
//
962
// Where key and value are strings; */+ means zero/one or more. <kvsep> is
963
// a delimiter character to separate key and value and <kvpsep> is a delimiter
964
// character to separate key value pairs. The user can specify a bunch of
965
// delimiter characters using a string.
966
//
967
// Note: this function assumes each key-value pair begins exactly with a
968
// key. Therefore, if you use whitespaces to separate key and value, you
969
// should not let whitespace precede the key in the pair. Otherwise, you
970
// will get an empty string as the key.
971
//
972
// A pair with no <kvsep> will return empty strings as the key and value,
973
// even if <key> is non-empty!
974
//
975
// Returns false for pairs with no <kvsep> specified and for pairs with
976
// empty strings as values.
977
//
978
// ==> NEW API: Consider using the new Split API defined above. <==
979
//
980
// The SplitStringIntoKeyValuePairs() function has some subtle and surprising
981
// semantics in various corner cases. To avoid this the strings::Split API is
982
// recommended. The following example shows how to split a string of delimited
983
// key-value pairs into a vector of pairs using the strings::Split API.
984
//
985
//   using strings::SkipEmpty;
986
//   using strings::Split;
987
//   using strings::delimiter::AnyOf;
988
//   using strings::delimiter::Limit;
989
//
990
//   vector<pair<string, string>> pairs;  // or even map<string, string>
991
//   for (StringPiece sp : Split(line, AnyOf(pair_delim), SkipEmpty())) {
992
//     pairs.push_back(Split(sp, Limit(AnyOf(kv_delim), 1), SkipEmpty()));
993
//   }
994
//
995
// ----------------------------------------------------------------------
996
bool SplitStringIntoKeyValuePairs(const string& line, const string& key_value_delimiters,
997
                                  const string& key_value_pair_delimiters,
998
                                  vector<pair<string, string>>* kv_pairs);
999
1000
// ----------------------------------------------------------------------
1001
// SplitLeadingDec32Values()
1002
// SplitLeadingDec64Values()
1003
//    A simple parser for space-separated decimal int32/int64 values.
1004
//    Appends parsed integers to the end of the result vector, stopping
1005
//    at the first unparsable spot.  Skips past leading and repeated
1006
//    whitespace (does not consume trailing whitespace), and returns
1007
//    a pointer beyond the last character parsed.
1008
// --------------------------------------------------------------------
1009
const char* SplitLeadingDec32Values(const char* next, vector<int32>* result);
1010
const char* SplitLeadingDec64Values(const char* next, vector<int64>* result);
1011
1012
// ----------------------------------------------------------------------
1013
// SplitOneIntToken()
1014
// SplitOneInt32Token()
1015
// SplitOneUint32Token()
1016
// SplitOneInt64Token()
1017
// SplitOneUint64Token()
1018
// SplitOneDoubleToken()
1019
// SplitOneFloatToken()
1020
//   Parse a single "delim" delimited number from "*source" into "*value".
1021
//   Modify *source to point after the delimiter.
1022
//   If no delimiter is present after the number, set *source to NULL.
1023
//
1024
//   If the start of *source is not an number, return false.
1025
//   If the int is followed by the null character, return true.
1026
//   If the int is not followed by a character from delim, return false.
1027
//   If *source is NULL, return false.
1028
//
1029
//   They cannot handle decimal numbers with leading 0s, since they will be
1030
//   treated as octal.
1031
// ----------------------------------------------------------------------
1032
bool SplitOneIntToken(const char** source, const char* delim, int* value);
1033
bool SplitOneInt32Token(const char** source, const char* delim, int32* value);
1034
bool SplitOneUint32Token(const char** source, const char* delim, uint32* value);
1035
bool SplitOneInt64Token(const char** source, const char* delim, int64* value);
1036
bool SplitOneUint64Token(const char** source, const char* delim, uint64* value);
1037
bool SplitOneDoubleToken(const char** source, const char* delim, double* value);
1038
bool SplitOneFloatToken(const char** source, const char* delim, float* value);
1039
1040
// Some aliases, so that the function names are standardized against the names
1041
// of the reflection setters/getters in proto2. This makes it easier to use
1042
// certain macros with reflection when creating custom text formats for protos.
1043
1044
0
inline bool SplitOneUInt32Token(const char** source, const char* delim, uint32* value) {
1045
0
    return SplitOneUint32Token(source, delim, value);
1046
0
}
1047
1048
0
inline bool SplitOneUInt64Token(const char** source, const char* delim, uint64* value) {
1049
0
    return SplitOneUint64Token(source, delim, value);
1050
0
}
1051
1052
// ----------------------------------------------------------------------
1053
// SplitOneDecimalIntToken()
1054
// SplitOneDecimalInt32Token()
1055
// SplitOneDecimalUint32Token()
1056
// SplitOneDecimalInt64Token()
1057
// SplitOneDecimalUint64Token()
1058
// Parse a single "delim"-delimited number from "*source" into "*value".
1059
// Unlike SplitOneIntToken, etc., this function always interprets
1060
// the numbers as decimal.
1061
bool SplitOneDecimalIntToken(const char** source, const char* delim, int* value);
1062
bool SplitOneDecimalInt32Token(const char** source, const char* delim, int32* value);
1063
bool SplitOneDecimalUint32Token(const char** source, const char* delim, uint32* value);
1064
bool SplitOneDecimalInt64Token(const char** source, const char* delim, int64* value);
1065
bool SplitOneDecimalUint64Token(const char** source, const char* delim, uint64* value);
1066
1067
// ----------------------------------------------------------------------
1068
// SplitOneHexUint32Token()
1069
// SplitOneHexUint64Token()
1070
// Once more, for hexadecimal numbers (unsigned only).
1071
bool SplitOneHexUint32Token(const char** source, const char* delim, uint32* value);
1072
bool SplitOneHexUint64Token(const char** source, const char* delim, uint64* value);
1073
1074
// ###################### TEMPLATE INSTANTIATIONS BELOW #######################
1075
1076
// SplitStringAndParse() -- see description above
1077
template <class T>
1078
bool SplitStringAndParse(StringPiece source, StringPiece delim,
1079
0
                         bool (*parse)(const string& str, T* value), vector<T>* result) {
1080
0
    return SplitStringAndParseToList(source, delim, parse, result);
1081
0
}
1082
1083
namespace strings {
1084
namespace internal {
1085
1086
template <class Container, class InsertPolicy>
1087
bool SplitStringAndParseToInserter(StringPiece source, StringPiece delim,
1088
                                   bool (*parse)(const string& str,
1089
                                                 typename Container::value_type* value),
1090
0
                                   Container* result, InsertPolicy insert_policy) {
1091
0
    CHECK(NULL != parse);
1092
0
    CHECK(NULL != result);
1093
0
    CHECK(NULL != delim.data());
1094
0
    CHECK_GT(delim.size(), 0);
1095
0
    bool retval = true;
1096
0
    vector<StringPiece> pieces =
1097
0
            strings::Split(source, strings::delimiter::AnyOf(delim), strings::SkipEmpty());
1098
0
    for (const auto& piece : pieces) {
1099
0
        typename Container::value_type t;
1100
0
        if (parse(piece.as_string(), &t)) {
1101
0
            insert_policy(result, t);
1102
0
        } else {
1103
0
            retval = false;
1104
0
        }
1105
0
    }
1106
0
    return retval;
1107
0
}
1108
1109
// Cannot use output iterator here (e.g. std::inserter, std::back_inserter)
1110
// because some callers use non-standard containers that don't have iterators,
1111
// only an insert() or push_back() method.
1112
struct BasicInsertPolicy {
1113
    template <class C, class V>
1114
    void operator()(C* c, const V& v) const {
1115
        c->insert(v);
1116
    }
1117
};
1118
1119
struct BackInsertPolicy {
1120
    template <class C, class V>
1121
0
    void operator()(C* c, const V& v) const {
1122
0
        c->push_back(v);
1123
0
    }
1124
};
1125
1126
} // namespace internal
1127
} // namespace strings
1128
1129
// SplitStringAndParseToContainer() -- see description above
1130
template <class Container>
1131
bool SplitStringAndParseToContainer(StringPiece source, StringPiece delim,
1132
                                    bool (*parse)(const string& str,
1133
                                                  typename Container::value_type* value),
1134
                                    Container* result) {
1135
    return strings::internal::SplitStringAndParseToInserter(source, delim, parse, result,
1136
                                                            strings::internal::BasicInsertPolicy());
1137
}
1138
1139
// SplitStringAndParseToList() -- see description above
1140
template <class List>
1141
bool SplitStringAndParseToList(StringPiece source, StringPiece delim,
1142
                               bool (*parse)(const string& str, typename List::value_type* value),
1143
0
                               List* result) {
1144
0
    return strings::internal::SplitStringAndParseToInserter(source, delim, parse, result,
1145
0
                                                            strings::internal::BackInsertPolicy());
1146
0
}
1147
1148
// END DOXYGEN SplitFunctions grouping
1149
/* @} */