/root/doris/be/src/gutil/strings/stringpiece.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2001, Google Inc. All rights reserved. |
2 | | // Maintainer: mec@google.com (Michael Chastain) |
3 | | // |
4 | | // A StringPiece points to part or all of a string, Cord, double-quoted string |
5 | | // literal, or other string-like object. A StringPiece does *not* own the |
6 | | // string to which it points. A StringPiece is not null-terminated. |
7 | | // |
8 | | // You can use StringPiece as a function or method parameter. A StringPiece |
9 | | // parameter can receive a double-quoted string literal argument, a "const |
10 | | // char*" argument, a string argument, or a StringPiece argument with no data |
11 | | // copying. Systematic use of StringPiece for arguments reduces data |
12 | | // copies and strlen() calls. |
13 | | // |
14 | | // You may pass a StringPiece argument by value or const reference. |
15 | | // Passing by value generates slightly smaller code. |
16 | | // void MyFunction(const StringPiece& arg); |
17 | | // // Slightly better, but same lifetime requirements as const-ref parameter: |
18 | | // void MyFunction(StringPiece arg); |
19 | | // |
20 | | // StringPiece is also suitable for local variables if you know that |
21 | | // the lifetime of the underlying object is longer than the lifetime |
22 | | // of your StringPiece variable. |
23 | | // |
24 | | // Beware of binding a StringPiece to a temporary: |
25 | | // StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem |
26 | | // |
27 | | // This code is okay: |
28 | | // string str = obj.MethodReturningString(); // str owns its contents |
29 | | // StringPiece sp(str); // GOOD, although you may not need sp at all |
30 | | // |
31 | | // StringPiece is sometimes a poor choice for a return value and usually a poor |
32 | | // choice for a data member. If you do use a StringPiece this way, it is your |
33 | | // responsibility to ensure that the object pointed to by the StringPiece |
34 | | // outlives the StringPiece. |
35 | | // |
36 | | // A StringPiece may represent just part of a string; thus the name "Piece". |
37 | | // For example, when splitting a string, vector<StringPiece> is a natural data |
38 | | // type for the output. For another example, a Cord is a non-contiguous, |
39 | | // potentially very long string-like object. The Cord class has an interface |
40 | | // that iteratively provides StringPiece objects that point to the |
41 | | // successive pieces of a Cord object. |
42 | | // |
43 | | // A StringPiece is not null-terminated. If you write code that scans a |
44 | | // StringPiece, you must check its length before reading any characters. |
45 | | // Common idioms that work on null-terminated strings do not work on |
46 | | // StringPiece objects. |
47 | | // |
48 | | // There are several ways to create a null StringPiece: |
49 | | // StringPiece() |
50 | | // StringPiece(NULL) |
51 | | // StringPiece(NULL, 0) |
52 | | // For all of the above, sp.data() == NULL, sp.length() == 0, |
53 | | // and sp.empty() == true. Also, if you create a StringPiece with |
54 | | // a non-NULL pointer then sp.data() != non-NULL. Once created, |
55 | | // sp.data() will stay either NULL or not-NULL, except if you call |
56 | | // sp.clear() or sp.set(). |
57 | | // |
58 | | // Thus, you can use StringPiece(NULL) to signal an out-of-band value |
59 | | // that is different from other StringPiece values. This is similar |
60 | | // to the way that const char* p1 = NULL; is different from |
61 | | // const char* p2 = "";. |
62 | | // |
63 | | // There are many ways to create an empty StringPiece: |
64 | | // StringPiece() |
65 | | // StringPiece(NULL) |
66 | | // StringPiece(NULL, 0) |
67 | | // StringPiece("") |
68 | | // StringPiece("", 0) |
69 | | // StringPiece("abcdef", 0) |
70 | | // StringPiece("abcdef"+6, 0) |
71 | | // For all of the above, sp.length() will be 0 and sp.empty() will be true. |
72 | | // For some empty StringPiece values, sp.data() will be NULL. |
73 | | // For some empty StringPiece values, sp.data() will not be NULL. |
74 | | // |
75 | | // Be careful not to confuse: null StringPiece and empty StringPiece. |
76 | | // The set of empty StringPieces properly includes the set of null StringPieces. |
77 | | // That is, every null StringPiece is an empty StringPiece, |
78 | | // but some non-null StringPieces are empty Stringpieces too. |
79 | | // |
80 | | // All empty StringPiece values compare equal to each other. |
81 | | // Even a null StringPieces compares equal to a non-null empty StringPiece: |
82 | | // StringPiece() == StringPiece("", 0) |
83 | | // StringPiece(NULL) == StringPiece("abc", 0) |
84 | | // StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0) |
85 | | // |
86 | | // Look carefully at this example: |
87 | | // StringPiece("") == NULL |
88 | | // True or false? TRUE, because StringPiece::operator== converts |
89 | | // the right-hand side from NULL to StringPiece(NULL), |
90 | | // and then compares two zero-length spans of characters. |
91 | | // However, we are working to make this example produce a compile error. |
92 | | // |
93 | | // Suppose you want to write: |
94 | | // bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD |
95 | | // Do not do that. Write one of these instead: |
96 | | // bool TestNull(StringPiece sp) { return sp.data() == NULL; } |
97 | | // bool TestEmpty(StringPiece sp) { return sp.empty(); } |
98 | | // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty? |
99 | | // Right now, TestWhat? behaves likes TestEmpty. |
100 | | // We are working to make TestWhat? produce a compile error. |
101 | | // TestNull is good to test for an out-of-band signal. |
102 | | // TestEmpty is good to test for an empty StringPiece. |
103 | | // |
104 | | // Caveats (again): |
105 | | // (1) The lifetime of the pointed-to string (or piece of a string) |
106 | | // must be longer than the lifetime of the StringPiece. |
107 | | // (2) There may or may not be a '\0' character after the end of |
108 | | // StringPiece data. |
109 | | // (3) A null StringPiece is empty. |
110 | | // An empty StringPiece may or may not be a null StringPiece. |
111 | | |
112 | | #pragma once |
113 | | |
114 | | #include <assert.h> |
115 | | #include <stddef.h> |
116 | | #include <string.h> |
117 | | #include <iosfwd> |
118 | | #include <string> |
119 | | #include <cstddef> |
120 | | #include <iterator> |
121 | | #include <string_view> |
122 | | #include <limits> // IWYU pragma: keep |
123 | | |
124 | | #include "gutil/strings/fastmem.h" |
125 | | #include "gutil/hash/string_hash.h" |
126 | | #include "gutil/int128.h" |
127 | | |
128 | | class StringPiece { |
129 | | private: |
130 | | const char* ptr_ = nullptr; |
131 | | int length_; |
132 | | |
133 | | public: |
134 | | // We provide non-explicit singleton constructors so users can pass |
135 | | // in a "const char*" or a "string" wherever a "StringPiece" is |
136 | | // expected. |
137 | | // |
138 | | // Style guide exception granted: |
139 | | // http://goto/style-guide-exception-20978288 |
140 | 2.49k | StringPiece() : ptr_(NULL), length_(0) {} |
141 | | StringPiece(const char* str) // NOLINT(runtime/explicit) |
142 | 122k | : ptr_(str), length_(0) { |
143 | 122k | if (str != NULL) { |
144 | 122k | size_t length = strlen(str); |
145 | 122k | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
146 | 0 | length_ = static_cast<int>(length); |
147 | 122k | } |
148 | 122k | } |
149 | | StringPiece(const std::string& str) // NOLINT(runtime/explicit) |
150 | 7.82k | : ptr_(str.data()), length_(0) { |
151 | 7.82k | size_t length = str.size(); |
152 | 7.82k | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
153 | 0 | length_ = static_cast<int>(length); |
154 | 7.82k | } |
155 | | StringPiece(std::string_view view) // NOLINT(runtime/explicit) |
156 | 35 | : ptr_(view.data()), length_(0) { |
157 | 35 | size_t length = view.size(); |
158 | 35 | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
159 | 0 | length_ = static_cast<int>(length); |
160 | 35 | } |
161 | 3.81k | StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { assert(len >= 0); } |
162 | | |
163 | | // Substring of another StringPiece. |
164 | | // pos must be non-negative and <= x.length(). |
165 | | StringPiece(StringPiece x, int pos); |
166 | | // Substring of another StringPiece. |
167 | | // pos must be non-negative and <= x.length(). |
168 | | // len must be non-negative and will be pinned to at most x.length() - pos. |
169 | | StringPiece(StringPiece x, int pos, int len); |
170 | | |
171 | | // data() may return a pointer to a buffer with embedded NULs, and the |
172 | | // returned buffer may or may not be null terminated. Therefore it is |
173 | | // typically a mistake to pass data() to a routine that expects a NUL |
174 | | // terminated string. |
175 | 16.4k | const char* data() const { return ptr_; } |
176 | 2.93M | int size() const { return length_; } |
177 | 3.10k | int length() const { return length_; } |
178 | 6.52k | bool empty() const { return length_ == 0; } |
179 | | |
180 | 0 | void clear() { |
181 | 0 | ptr_ = NULL; |
182 | 0 | length_ = 0; |
183 | 0 | } |
184 | | |
185 | 9.21k | void set(const char* data, int len) { |
186 | 9.21k | assert(len >= 0); |
187 | 0 | ptr_ = data; |
188 | 9.21k | length_ = len; |
189 | 9.21k | } |
190 | | |
191 | 0 | void set(const char* str) { |
192 | 0 | ptr_ = str; |
193 | 0 | if (str != NULL) |
194 | 0 | length_ = static_cast<int>(strlen(str)); |
195 | 0 | else |
196 | 0 | length_ = 0; |
197 | 0 | } |
198 | 0 | void set(const void* data, int len) { |
199 | 0 | ptr_ = reinterpret_cast<const char*>(data); |
200 | 0 | length_ = len; |
201 | 0 | } |
202 | | |
203 | 4.54M | char operator[](int i) const { |
204 | 4.54M | assert(0 <= i); |
205 | 0 | assert(i < length_); |
206 | 0 | return ptr_[i]; |
207 | 4.54M | } |
208 | | |
209 | 3.51k | void remove_prefix(int n) { |
210 | 3.51k | assert(length_ >= n); |
211 | 0 | ptr_ += n; |
212 | 3.51k | length_ -= n; |
213 | 3.51k | } |
214 | | |
215 | 410 | void remove_suffix(int n) { |
216 | 410 | assert(length_ >= n); |
217 | 0 | length_ -= n; |
218 | 410 | } |
219 | | |
220 | | // returns {-1, 0, 1} |
221 | 0 | int compare(StringPiece x) const { |
222 | 0 | const int min_size = length_ < x.length_ ? length_ : x.length_; |
223 | 0 | int r = memcmp(ptr_, x.ptr_, min_size); |
224 | 0 | if (r < 0) return -1; |
225 | 0 | if (r > 0) return 1; |
226 | 0 | if (length_ < x.length_) return -1; |
227 | 0 | if (length_ > x.length_) return 1; |
228 | 0 | return 0; |
229 | 0 | } |
230 | | |
231 | 2.77k | std::string as_string() const { return ToString(); } |
232 | | // We also define ToString() here, since many other string-like |
233 | | // interfaces name the routine that converts to a C++ string |
234 | | // "ToString", and it's confusing to have the method that does that |
235 | | // for a StringPiece be called "as_string()". We also leave the |
236 | | // "as_string()" method defined here for existing code. |
237 | 6.60k | std::string ToString() const { |
238 | 6.60k | if (ptr_ == NULL) return std::string(); |
239 | 6.59k | return std::string(data(), size()); |
240 | 6.60k | } |
241 | | |
242 | | void CopyToString(std::string* target) const; |
243 | | void AppendToString(std::string* target) const; |
244 | | |
245 | 1.18k | bool starts_with(StringPiece x) const { |
246 | 1.18k | return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0); |
247 | 1.18k | } |
248 | | |
249 | 1.59k | bool ends_with(StringPiece x) const { |
250 | 1.59k | return ((length_ >= x.length_) && |
251 | 1.59k | (memcmp(ptr_ + (length_ - x.length_), x.ptr_, x.length_) == 0)); |
252 | 1.59k | } |
253 | | |
254 | | // standard STL container boilerplate |
255 | | typedef char value_type; |
256 | | typedef const char* pointer; |
257 | | typedef const char& reference; |
258 | | typedef const char& const_reference; |
259 | | typedef size_t size_type; |
260 | | typedef ptrdiff_t difference_type; |
261 | | static const size_type npos; |
262 | | typedef const char* const_iterator; |
263 | | typedef const char* iterator; |
264 | | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
265 | | typedef std::reverse_iterator<iterator> reverse_iterator; |
266 | 21.1k | iterator begin() const { return ptr_; } |
267 | 22.7k | iterator end() const { return ptr_ + length_; } |
268 | 0 | const_reverse_iterator rbegin() const { return const_reverse_iterator(ptr_ + length_); } |
269 | 0 | const_reverse_iterator rend() const { return const_reverse_iterator(ptr_); } |
270 | | // STLS says return size_type, but Google says return int |
271 | 0 | int max_size() const { return length_; } |
272 | 0 | int capacity() const { return length_; } |
273 | | |
274 | | // cpplint.py emits a false positive [build/include_what_you_use] |
275 | | int copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT |
276 | | |
277 | | bool contains(StringPiece s) const; |
278 | | |
279 | | int find(StringPiece s, size_type pos = 0) const; |
280 | | int find(char c, size_type pos = 0) const; |
281 | | int rfind(StringPiece s, size_type pos = npos) const; |
282 | | int rfind(char c, size_type pos = npos) const; |
283 | | |
284 | | int find_first_of(StringPiece s, size_type pos = 0) const; |
285 | 0 | int find_first_of(char c, size_type pos = 0) const { return find(c, pos); } |
286 | | int find_first_not_of(StringPiece s, size_type pos = 0) const; |
287 | | int find_first_not_of(char c, size_type pos = 0) const; |
288 | | int find_last_of(StringPiece s, size_type pos = npos) const; |
289 | 0 | int find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); } |
290 | | int find_last_not_of(StringPiece s, size_type pos = npos) const; |
291 | | int find_last_not_of(char c, size_type pos = npos) const; |
292 | | |
293 | | StringPiece substr(size_type pos, size_type n = npos) const; |
294 | | }; |
295 | | |
296 | | // This large function is defined inline so that in a fairly common case where |
297 | | // one of the arguments is a literal, the compiler can elide a lot of the |
298 | | // following comparisons. |
299 | 0 | inline bool operator==(StringPiece x, StringPiece y) { |
300 | 0 | int len = x.size(); |
301 | 0 | if (len != y.size()) { |
302 | 0 | return false; |
303 | 0 | } |
304 | | |
305 | 0 | return x.data() == y.data() || len <= 0 || strings::memeq(x.data(), y.data(), len); |
306 | 0 | } |
307 | | |
308 | 0 | inline bool operator!=(StringPiece x, StringPiece y) { |
309 | 0 | return !(x == y); |
310 | 0 | } |
311 | | |
312 | 0 | inline bool operator<(StringPiece x, StringPiece y) { |
313 | 0 | const int min_size = x.size() < y.size() ? x.size() : y.size(); |
314 | 0 | const int r = memcmp(x.data(), y.data(), min_size); |
315 | 0 | return (r < 0) || (r == 0 && x.size() < y.size()); |
316 | 0 | } |
317 | | |
318 | 0 | inline bool operator>(StringPiece x, StringPiece y) { |
319 | 0 | return y < x; |
320 | 0 | } |
321 | | |
322 | 0 | inline bool operator<=(StringPiece x, StringPiece y) { |
323 | 0 | return !(x > y); |
324 | 0 | } |
325 | | |
326 | 0 | inline bool operator>=(StringPiece x, StringPiece y) { |
327 | 0 | return !(x < y); |
328 | 0 | } |
329 | | template <class X> |
330 | | struct GoodFastHash; |
331 | | |
332 | | // ------------------------------------------------------------------ |
333 | | // Functions used to create STL containers that use StringPiece |
334 | | // Remember that a StringPiece's lifetime had better be less than |
335 | | // that of the underlying string or char*. If it is not, then you |
336 | | // cannot safely store a StringPiece into an STL container |
337 | | // ------------------------------------------------------------------ |
338 | | |
339 | | // SWIG doesn't know how to parse this stuff properly. Omit it. |
340 | | #ifndef SWIG |
341 | | |
342 | | template <> |
343 | | struct std::hash<StringPiece> { |
344 | | size_t operator()(StringPiece s) const; |
345 | | }; |
346 | | |
347 | | // An implementation of GoodFastHash for StringPiece. See |
348 | | // GoodFastHash values. |
349 | | template <> |
350 | | struct GoodFastHash<StringPiece> { |
351 | 0 | size_t operator()(StringPiece s) const { return HashStringThoroughly(s.data(), s.size()); } |
352 | | // Less than operator, for MSVC. |
353 | 0 | bool operator()(const StringPiece& s1, const StringPiece& s2) const { return s1 < s2; } |
354 | | static const size_t bucket_size = 4; // These are required by MSVC |
355 | | static const size_t min_buckets = 8; // 4 and 8 are defaults. |
356 | | }; |
357 | | #endif |
358 | | |
359 | | // allow StringPiece to be logged |
360 | | extern ostream& operator<<(ostream& o, StringPiece piece); |