/root/doris/be/src/gutil/strings/stringpiece.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2001, Google Inc. All rights reserved. |
2 | | // Maintainer: mec@google.com (Michael Chastain) |
3 | | // |
4 | | // A StringPiece points to part or all of a string, Cord, double-quoted string |
5 | | // literal, or other string-like object. A StringPiece does *not* own the |
6 | | // string to which it points. A StringPiece is not null-terminated. |
7 | | // |
8 | | // You can use StringPiece as a function or method parameter. A StringPiece |
9 | | // parameter can receive a double-quoted string literal argument, a "const |
10 | | // char*" argument, a string argument, or a StringPiece argument with no data |
11 | | // copying. Systematic use of StringPiece for arguments reduces data |
12 | | // copies and strlen() calls. |
13 | | // |
14 | | // You may pass a StringPiece argument by value or const reference. |
15 | | // Passing by value generates slightly smaller code. |
16 | | // void MyFunction(const StringPiece& arg); |
17 | | // // Slightly better, but same lifetime requirements as const-ref parameter: |
18 | | // void MyFunction(StringPiece arg); |
19 | | // |
20 | | // StringPiece is also suitable for local variables if you know that |
21 | | // the lifetime of the underlying object is longer than the lifetime |
22 | | // of your StringPiece variable. |
23 | | // |
24 | | // Beware of binding a StringPiece to a temporary: |
25 | | // StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem |
26 | | // |
27 | | // This code is okay: |
28 | | // string str = obj.MethodReturningString(); // str owns its contents |
29 | | // StringPiece sp(str); // GOOD, although you may not need sp at all |
30 | | // |
31 | | // StringPiece is sometimes a poor choice for a return value and usually a poor |
32 | | // choice for a data member. If you do use a StringPiece this way, it is your |
33 | | // responsibility to ensure that the object pointed to by the StringPiece |
34 | | // outlives the StringPiece. |
35 | | // |
36 | | // A StringPiece may represent just part of a string; thus the name "Piece". |
37 | | // For example, when splitting a string, vector<StringPiece> is a natural data |
38 | | // type for the output. For another example, a Cord is a non-contiguous, |
39 | | // potentially very long string-like object. The Cord class has an interface |
40 | | // that iteratively provides StringPiece objects that point to the |
41 | | // successive pieces of a Cord object. |
42 | | // |
43 | | // A StringPiece is not null-terminated. If you write code that scans a |
44 | | // StringPiece, you must check its length before reading any characters. |
45 | | // Common idioms that work on null-terminated strings do not work on |
46 | | // StringPiece objects. |
47 | | // |
48 | | // There are several ways to create a null StringPiece: |
49 | | // StringPiece() |
50 | | // StringPiece(NULL) |
51 | | // StringPiece(NULL, 0) |
52 | | // For all of the above, sp.data() == NULL, sp.length() == 0, |
53 | | // and sp.empty() == true. Also, if you create a StringPiece with |
54 | | // a non-NULL pointer then sp.data() != non-NULL. Once created, |
55 | | // sp.data() will stay either NULL or not-NULL, except if you call |
56 | | // sp.clear() or sp.set(). |
57 | | // |
58 | | // Thus, you can use StringPiece(NULL) to signal an out-of-band value |
59 | | // that is different from other StringPiece values. This is similar |
60 | | // to the way that const char* p1 = NULL; is different from |
61 | | // const char* p2 = "";. |
62 | | // |
63 | | // There are many ways to create an empty StringPiece: |
64 | | // StringPiece() |
65 | | // StringPiece(NULL) |
66 | | // StringPiece(NULL, 0) |
67 | | // StringPiece("") |
68 | | // StringPiece("", 0) |
69 | | // StringPiece("abcdef", 0) |
70 | | // StringPiece("abcdef"+6, 0) |
71 | | // For all of the above, sp.length() will be 0 and sp.empty() will be true. |
72 | | // For some empty StringPiece values, sp.data() will be NULL. |
73 | | // For some empty StringPiece values, sp.data() will not be NULL. |
74 | | // |
75 | | // Be careful not to confuse: null StringPiece and empty StringPiece. |
76 | | // The set of empty StringPieces properly includes the set of null StringPieces. |
77 | | // That is, every null StringPiece is an empty StringPiece, |
78 | | // but some non-null StringPieces are empty Stringpieces too. |
79 | | // |
80 | | // All empty StringPiece values compare equal to each other. |
81 | | // Even a null StringPieces compares equal to a non-null empty StringPiece: |
82 | | // StringPiece() == StringPiece("", 0) |
83 | | // StringPiece(NULL) == StringPiece("abc", 0) |
84 | | // StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0) |
85 | | // |
86 | | // Look carefully at this example: |
87 | | // StringPiece("") == NULL |
88 | | // True or false? TRUE, because StringPiece::operator== converts |
89 | | // the right-hand side from NULL to StringPiece(NULL), |
90 | | // and then compares two zero-length spans of characters. |
91 | | // However, we are working to make this example produce a compile error. |
92 | | // |
93 | | // Suppose you want to write: |
94 | | // bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD |
95 | | // Do not do that. Write one of these instead: |
96 | | // bool TestNull(StringPiece sp) { return sp.data() == NULL; } |
97 | | // bool TestEmpty(StringPiece sp) { return sp.empty(); } |
98 | | // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty? |
99 | | // Right now, TestWhat? behaves likes TestEmpty. |
100 | | // We are working to make TestWhat? produce a compile error. |
101 | | // TestNull is good to test for an out-of-band signal. |
102 | | // TestEmpty is good to test for an empty StringPiece. |
103 | | // |
104 | | // Caveats (again): |
105 | | // (1) The lifetime of the pointed-to string (or piece of a string) |
106 | | // must be longer than the lifetime of the StringPiece. |
107 | | // (2) There may or may not be a '\0' character after the end of |
108 | | // StringPiece data. |
109 | | // (3) A null StringPiece is empty. |
110 | | // An empty StringPiece may or may not be a null StringPiece. |
111 | | |
112 | | #pragma once |
113 | | |
114 | | #include <assert.h> |
115 | | #include <stddef.h> |
116 | | #include <string.h> |
117 | | #include <iosfwd> |
118 | | #include <string> |
119 | | #include <cstddef> |
120 | | #include <iterator> |
121 | | #include <string_view> |
122 | | #include <limits> // IWYU pragma: keep |
123 | | |
124 | | #include "gutil/strings/fastmem.h" |
125 | | #include "gutil/hash/string_hash.h" |
126 | | #include "gutil/int128.h" |
127 | | |
128 | | class StringPiece { |
129 | | private: |
130 | | const char* ptr_ = nullptr; |
131 | | int length_; |
132 | | |
133 | | public: |
134 | | // We provide non-explicit singleton constructors so users can pass |
135 | | // in a "const char*" or a "string" wherever a "StringPiece" is |
136 | | // expected. |
137 | | // |
138 | | // Style guide exception granted: |
139 | | // http://goto/style-guide-exception-20978288 |
140 | 1.98k | StringPiece() : ptr_(NULL), length_(0) {} |
141 | | StringPiece(const char* str) // NOLINT(runtime/explicit) |
142 | 118k | : ptr_(str), length_(0) { |
143 | 118k | if (str != NULL) { |
144 | 118k | size_t length = strlen(str); |
145 | 118k | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
146 | 0 | length_ = static_cast<int>(length); |
147 | 118k | } |
148 | 118k | } |
149 | | StringPiece(const std::string& str) // NOLINT(runtime/explicit) |
150 | 4.22k | : ptr_(str.data()), length_(0) { |
151 | 4.22k | size_t length = str.size(); |
152 | 4.22k | assert(length <= static_cast<size_t>(std::numeric_limits<int>::max())); |
153 | 0 | length_ = static_cast<int>(length); |
154 | 4.22k | } |
155 | 2.66k | StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { assert(len >= 0); } |
156 | | |
157 | | // Substring of another StringPiece. |
158 | | // pos must be non-negative and <= x.length(). |
159 | | StringPiece(StringPiece x, int pos); |
160 | | // Substring of another StringPiece. |
161 | | // pos must be non-negative and <= x.length(). |
162 | | // len must be non-negative and will be pinned to at most x.length() - pos. |
163 | | StringPiece(StringPiece x, int pos, int len); |
164 | | |
165 | | // data() may return a pointer to a buffer with embedded NULs, and the |
166 | | // returned buffer may or may not be null terminated. Therefore it is |
167 | | // typically a mistake to pass data() to a routine that expects a NUL |
168 | | // terminated string. |
169 | 10.3k | const char* data() const { return ptr_; } |
170 | 2.89M | int size() const { return length_; } |
171 | 2.10k | int length() const { return length_; } |
172 | 4.76k | bool empty() const { return length_ == 0; } |
173 | | |
174 | 0 | void clear() { |
175 | 0 | ptr_ = NULL; |
176 | 0 | length_ = 0; |
177 | 0 | } |
178 | | |
179 | 6.70k | void set(const char* data, int len) { |
180 | 6.70k | assert(len >= 0); |
181 | 0 | ptr_ = data; |
182 | 6.70k | length_ = len; |
183 | 6.70k | } |
184 | | |
185 | 0 | void set(const char* str) { |
186 | 0 | ptr_ = str; |
187 | 0 | if (str != NULL) |
188 | 0 | length_ = static_cast<int>(strlen(str)); |
189 | 0 | else |
190 | 0 | length_ = 0; |
191 | 0 | } |
192 | 0 | void set(const void* data, int len) { |
193 | 0 | ptr_ = reinterpret_cast<const char*>(data); |
194 | 0 | length_ = len; |
195 | 0 | } |
196 | | |
197 | 4.48M | char operator[](int i) const { |
198 | 4.48M | assert(0 <= i); |
199 | 0 | assert(i < length_); |
200 | 0 | return ptr_[i]; |
201 | 4.48M | } |
202 | | |
203 | 2.59k | void remove_prefix(int n) { |
204 | 2.59k | assert(length_ >= n); |
205 | 0 | ptr_ += n; |
206 | 2.59k | length_ -= n; |
207 | 2.59k | } |
208 | | |
209 | 160 | void remove_suffix(int n) { |
210 | 160 | assert(length_ >= n); |
211 | 0 | length_ -= n; |
212 | 160 | } |
213 | | |
214 | | // returns {-1, 0, 1} |
215 | 0 | int compare(StringPiece x) const { |
216 | 0 | const int min_size = length_ < x.length_ ? length_ : x.length_; |
217 | 0 | int r = memcmp(ptr_, x.ptr_, min_size); |
218 | 0 | if (r < 0) return -1; |
219 | 0 | if (r > 0) return 1; |
220 | 0 | if (length_ < x.length_) return -1; |
221 | 0 | if (length_ > x.length_) return 1; |
222 | 0 | return 0; |
223 | 0 | } |
224 | | |
225 | 720 | std::string as_string() const { return ToString(); } |
226 | | // We also define ToString() here, since many other string-like |
227 | | // interfaces name the routine that converts to a C++ string |
228 | | // "ToString", and it's confusing to have the method that does that |
229 | | // for a StringPiece be called "as_string()". We also leave the |
230 | | // "as_string()" method defined here for existing code. |
231 | 3.45k | std::string ToString() const { |
232 | 3.45k | if (ptr_ == NULL) return std::string(); |
233 | 3.45k | return std::string(data(), size()); |
234 | 3.45k | } |
235 | | |
236 | | void CopyToString(std::string* target) const; |
237 | | void AppendToString(std::string* target) const; |
238 | | |
239 | 280 | bool starts_with(StringPiece x) const { |
240 | 280 | return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0); |
241 | 280 | } |
242 | | |
243 | 440 | bool ends_with(StringPiece x) const { |
244 | 440 | return ((length_ >= x.length_) && |
245 | 440 | (memcmp(ptr_ + (length_ - x.length_), x.ptr_, x.length_) == 0)); |
246 | 440 | } |
247 | | |
248 | | // standard STL container boilerplate |
249 | | typedef char value_type; |
250 | | typedef const char* pointer; |
251 | | typedef const char& reference; |
252 | | typedef const char& const_reference; |
253 | | typedef size_t size_type; |
254 | | typedef ptrdiff_t difference_type; |
255 | | static const size_type npos; |
256 | | typedef const char* const_iterator; |
257 | | typedef const char* iterator; |
258 | | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
259 | | typedef std::reverse_iterator<iterator> reverse_iterator; |
260 | 15.5k | iterator begin() const { return ptr_; } |
261 | 16.8k | iterator end() const { return ptr_ + length_; } |
262 | 0 | const_reverse_iterator rbegin() const { return const_reverse_iterator(ptr_ + length_); } |
263 | 0 | const_reverse_iterator rend() const { return const_reverse_iterator(ptr_); } |
264 | | // STLS says return size_type, but Google says return int |
265 | 0 | int max_size() const { return length_; } |
266 | 0 | int capacity() const { return length_; } |
267 | | |
268 | | // cpplint.py emits a false positive [build/include_what_you_use] |
269 | | int copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT |
270 | | |
271 | | bool contains(StringPiece s) const; |
272 | | |
273 | | int find(StringPiece s, size_type pos = 0) const; |
274 | | int find(char c, size_type pos = 0) const; |
275 | | int rfind(StringPiece s, size_type pos = npos) const; |
276 | | int rfind(char c, size_type pos = npos) const; |
277 | | |
278 | | int find_first_of(StringPiece s, size_type pos = 0) const; |
279 | 0 | int find_first_of(char c, size_type pos = 0) const { return find(c, pos); } |
280 | | int find_first_not_of(StringPiece s, size_type pos = 0) const; |
281 | | int find_first_not_of(char c, size_type pos = 0) const; |
282 | | int find_last_of(StringPiece s, size_type pos = npos) const; |
283 | 0 | int find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); } |
284 | | int find_last_not_of(StringPiece s, size_type pos = npos) const; |
285 | | int find_last_not_of(char c, size_type pos = npos) const; |
286 | | |
287 | | StringPiece substr(size_type pos, size_type n = npos) const; |
288 | | }; |
289 | | |
290 | | // This large function is defined inline so that in a fairly common case where |
291 | | // one of the arguments is a literal, the compiler can elide a lot of the |
292 | | // following comparisons. |
293 | 0 | inline bool operator==(StringPiece x, StringPiece y) { |
294 | 0 | int len = x.size(); |
295 | 0 | if (len != y.size()) { |
296 | 0 | return false; |
297 | 0 | } |
298 | | |
299 | 0 | return x.data() == y.data() || len <= 0 || strings::memeq(x.data(), y.data(), len); |
300 | 0 | } |
301 | | |
302 | 0 | inline bool operator!=(StringPiece x, StringPiece y) { |
303 | 0 | return !(x == y); |
304 | 0 | } |
305 | | |
306 | 0 | inline bool operator<(StringPiece x, StringPiece y) { |
307 | 0 | const int min_size = x.size() < y.size() ? x.size() : y.size(); |
308 | 0 | const int r = memcmp(x.data(), y.data(), min_size); |
309 | 0 | return (r < 0) || (r == 0 && x.size() < y.size()); |
310 | 0 | } |
311 | | |
312 | 0 | inline bool operator>(StringPiece x, StringPiece y) { |
313 | 0 | return y < x; |
314 | 0 | } |
315 | | |
316 | 0 | inline bool operator<=(StringPiece x, StringPiece y) { |
317 | 0 | return !(x > y); |
318 | 0 | } |
319 | | |
320 | 0 | inline bool operator>=(StringPiece x, StringPiece y) { |
321 | 0 | return !(x < y); |
322 | 0 | } |
323 | | template <class X> |
324 | | struct GoodFastHash; |
325 | | |
326 | | // ------------------------------------------------------------------ |
327 | | // Functions used to create STL containers that use StringPiece |
328 | | // Remember that a StringPiece's lifetime had better be less than |
329 | | // that of the underlying string or char*. If it is not, then you |
330 | | // cannot safely store a StringPiece into an STL container |
331 | | // ------------------------------------------------------------------ |
332 | | |
333 | | // SWIG doesn't know how to parse this stuff properly. Omit it. |
334 | | #ifndef SWIG |
335 | | |
336 | | template <> |
337 | | struct std::hash<StringPiece> { |
338 | | size_t operator()(StringPiece s) const; |
339 | | }; |
340 | | |
341 | | // An implementation of GoodFastHash for StringPiece. See |
342 | | // GoodFastHash values. |
343 | | template <> |
344 | | struct GoodFastHash<StringPiece> { |
345 | 0 | size_t operator()(StringPiece s) const { return HashStringThoroughly(s.data(), s.size()); } |
346 | | // Less than operator, for MSVC. |
347 | 0 | bool operator()(const StringPiece& s1, const StringPiece& s2) const { return s1 < s2; } |
348 | | static const size_t bucket_size = 4; // These are required by MSVC |
349 | | static const size_t min_buckets = 8; // 4 and 8 are defaults. |
350 | | }; |
351 | | #endif |
352 | | |
353 | | // allow StringPiece to be logged |
354 | | extern ostream& operator<<(ostream& o, StringPiece piece); |