Coverage Report

Created: 2024-11-21 14:46

/root/doris/be/src/gutil/strings/stringpiece.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2001, Google Inc.  All rights reserved.
2
// Maintainer: mec@google.com (Michael Chastain)
3
//
4
// A StringPiece points to part or all of a string, Cord, double-quoted string
5
// literal, or other string-like object.  A StringPiece does *not* own the
6
// string to which it points.  A StringPiece is not null-terminated.
7
//
8
// You can use StringPiece as a function or method parameter.  A StringPiece
9
// parameter can receive a double-quoted string literal argument, a "const
10
// char*" argument, a string argument, or a StringPiece argument with no data
11
// copying.  Systematic use of StringPiece for arguments reduces data
12
// copies and strlen() calls.
13
//
14
// You may pass a StringPiece argument by value or const reference.
15
// Passing by value generates slightly smaller code.
16
//   void MyFunction(const StringPiece& arg);
17
//   // Slightly better, but same lifetime requirements as const-ref parameter:
18
//   void MyFunction(StringPiece arg);
19
//
20
// StringPiece is also suitable for local variables if you know that
21
// the lifetime of the underlying object is longer than the lifetime
22
// of your StringPiece variable.
23
//
24
// Beware of binding a StringPiece to a temporary:
25
//   StringPiece sp = obj.MethodReturningString();  // BAD: lifetime problem
26
//
27
// This code is okay:
28
//   string str = obj.MethodReturningString();  // str owns its contents
29
//   StringPiece sp(str);  // GOOD, although you may not need sp at all
30
//
31
// StringPiece is sometimes a poor choice for a return value and usually a poor
32
// choice for a data member.  If you do use a StringPiece this way, it is your
33
// responsibility to ensure that the object pointed to by the StringPiece
34
// outlives the StringPiece.
35
//
36
// A StringPiece may represent just part of a string; thus the name "Piece".
37
// For example, when splitting a string, vector<StringPiece> is a natural data
38
// type for the output.  For another example, a Cord is a non-contiguous,
39
// potentially very long string-like object.  The Cord class has an interface
40
// that iteratively provides StringPiece objects that point to the
41
// successive pieces of a Cord object.
42
//
43
// A StringPiece is not null-terminated.  If you write code that scans a
44
// StringPiece, you must check its length before reading any characters.
45
// Common idioms that work on null-terminated strings do not work on
46
// StringPiece objects.
47
//
48
// There are several ways to create a null StringPiece:
49
//   StringPiece()
50
//   StringPiece(NULL)
51
//   StringPiece(NULL, 0)
52
// For all of the above, sp.data() == NULL, sp.length() == 0,
53
// and sp.empty() == true.  Also, if you create a StringPiece with
54
// a non-NULL pointer then sp.data() != non-NULL.  Once created,
55
// sp.data() will stay either NULL or not-NULL, except if you call
56
// sp.clear() or sp.set().
57
//
58
// Thus, you can use StringPiece(NULL) to signal an out-of-band value
59
// that is different from other StringPiece values.  This is similar
60
// to the way that const char* p1 = NULL; is different from
61
// const char* p2 = "";.
62
//
63
// There are many ways to create an empty StringPiece:
64
//   StringPiece()
65
//   StringPiece(NULL)
66
//   StringPiece(NULL, 0)
67
//   StringPiece("")
68
//   StringPiece("", 0)
69
//   StringPiece("abcdef", 0)
70
//   StringPiece("abcdef"+6, 0)
71
// For all of the above, sp.length() will be 0 and sp.empty() will be true.
72
// For some empty StringPiece values, sp.data() will be NULL.
73
// For some empty StringPiece values, sp.data() will not be NULL.
74
//
75
// Be careful not to confuse: null StringPiece and empty StringPiece.
76
// The set of empty StringPieces properly includes the set of null StringPieces.
77
// That is, every null StringPiece is an empty StringPiece,
78
// but some non-null StringPieces are empty Stringpieces too.
79
//
80
// All empty StringPiece values compare equal to each other.
81
// Even a null StringPieces compares equal to a non-null empty StringPiece:
82
//  StringPiece() == StringPiece("", 0)
83
//  StringPiece(NULL) == StringPiece("abc", 0)
84
//  StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0)
85
//
86
// Look carefully at this example:
87
//   StringPiece("") == NULL
88
// True or false?  TRUE, because StringPiece::operator== converts
89
// the right-hand side from NULL to StringPiece(NULL),
90
// and then compares two zero-length spans of characters.
91
// However, we are working to make this example produce a compile error.
92
//
93
// Suppose you want to write:
94
//   bool TestWhat?(StringPiece sp) { return sp == NULL; }  // BAD
95
// Do not do that.  Write one of these instead:
96
//   bool TestNull(StringPiece sp) { return sp.data() == NULL; }
97
//   bool TestEmpty(StringPiece sp) { return sp.empty(); }
98
// The intent of TestWhat? is unclear.  Did you mean TestNull or TestEmpty?
99
// Right now, TestWhat? behaves likes TestEmpty.
100
// We are working to make TestWhat? produce a compile error.
101
// TestNull is good to test for an out-of-band signal.
102
// TestEmpty is good to test for an empty StringPiece.
103
//
104
// Caveats (again):
105
// (1) The lifetime of the pointed-to string (or piece of a string)
106
//     must be longer than the lifetime of the StringPiece.
107
// (2) There may or may not be a '\0' character after the end of
108
//     StringPiece data.
109
// (3) A null StringPiece is empty.
110
//     An empty StringPiece may or may not be a null StringPiece.
111
112
#pragma once
113
114
#include <assert.h>
115
#include <stddef.h>
116
#include <string.h>
117
#include <iosfwd>
118
#include <string>
119
#include <cstddef>
120
#include <iterator>
121
#include <string_view>
122
#include <limits> // IWYU pragma: keep
123
124
#include "gutil/strings/fastmem.h"
125
#include "gutil/hash/string_hash.h"
126
#include "gutil/int128.h"
127
128
class StringPiece {
129
private:
130
    const char* ptr_ = nullptr;
131
    int length_;
132
133
public:
134
    // We provide non-explicit singleton constructors so users can pass
135
    // in a "const char*" or a "string" wherever a "StringPiece" is
136
    // expected.
137
    //
138
    // Style guide exception granted:
139
    // http://goto/style-guide-exception-20978288
140
1.98k
    StringPiece() : ptr_(NULL), length_(0) {}
141
    StringPiece(const char* str) // NOLINT(runtime/explicit)
142
118k
            : ptr_(str), length_(0) {
143
118k
        if (str != NULL) {
144
118k
            size_t length = strlen(str);
145
118k
            assert(length <= static_cast<size_t>(std::numeric_limits<int>::max()));
146
0
            length_ = static_cast<int>(length);
147
118k
        }
148
118k
    }
149
    StringPiece(const std::string& str) // NOLINT(runtime/explicit)
150
4.22k
            : ptr_(str.data()), length_(0) {
151
4.22k
        size_t length = str.size();
152
4.22k
        assert(length <= static_cast<size_t>(std::numeric_limits<int>::max()));
153
0
        length_ = static_cast<int>(length);
154
4.22k
    }
155
2.66k
    StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { assert(len >= 0); }
156
157
    // Substring of another StringPiece.
158
    // pos must be non-negative and <= x.length().
159
    StringPiece(StringPiece x, int pos);
160
    // Substring of another StringPiece.
161
    // pos must be non-negative and <= x.length().
162
    // len must be non-negative and will be pinned to at most x.length() - pos.
163
    StringPiece(StringPiece x, int pos, int len);
164
165
    // data() may return a pointer to a buffer with embedded NULs, and the
166
    // returned buffer may or may not be null terminated.  Therefore it is
167
    // typically a mistake to pass data() to a routine that expects a NUL
168
    // terminated string.
169
10.3k
    const char* data() const { return ptr_; }
170
2.89M
    int size() const { return length_; }
171
2.10k
    int length() const { return length_; }
172
4.76k
    bool empty() const { return length_ == 0; }
173
174
0
    void clear() {
175
0
        ptr_ = NULL;
176
0
        length_ = 0;
177
0
    }
178
179
6.70k
    void set(const char* data, int len) {
180
6.70k
        assert(len >= 0);
181
0
        ptr_ = data;
182
6.70k
        length_ = len;
183
6.70k
    }
184
185
0
    void set(const char* str) {
186
0
        ptr_ = str;
187
0
        if (str != NULL)
188
0
            length_ = static_cast<int>(strlen(str));
189
0
        else
190
0
            length_ = 0;
191
0
    }
192
0
    void set(const void* data, int len) {
193
0
        ptr_ = reinterpret_cast<const char*>(data);
194
0
        length_ = len;
195
0
    }
196
197
4.48M
    char operator[](int i) const {
198
4.48M
        assert(0 <= i);
199
0
        assert(i < length_);
200
0
        return ptr_[i];
201
4.48M
    }
202
203
2.59k
    void remove_prefix(int n) {
204
2.59k
        assert(length_ >= n);
205
0
        ptr_ += n;
206
2.59k
        length_ -= n;
207
2.59k
    }
208
209
160
    void remove_suffix(int n) {
210
160
        assert(length_ >= n);
211
0
        length_ -= n;
212
160
    }
213
214
    // returns {-1, 0, 1}
215
0
    int compare(StringPiece x) const {
216
0
        const int min_size = length_ < x.length_ ? length_ : x.length_;
217
0
        int r = memcmp(ptr_, x.ptr_, min_size);
218
0
        if (r < 0) return -1;
219
0
        if (r > 0) return 1;
220
0
        if (length_ < x.length_) return -1;
221
0
        if (length_ > x.length_) return 1;
222
0
        return 0;
223
0
    }
224
225
720
    std::string as_string() const { return ToString(); }
226
    // We also define ToString() here, since many other string-like
227
    // interfaces name the routine that converts to a C++ string
228
    // "ToString", and it's confusing to have the method that does that
229
    // for a StringPiece be called "as_string()".  We also leave the
230
    // "as_string()" method defined here for existing code.
231
3.45k
    std::string ToString() const {
232
3.45k
        if (ptr_ == NULL) return std::string();
233
3.45k
        return std::string(data(), size());
234
3.45k
    }
235
236
    void CopyToString(std::string* target) const;
237
    void AppendToString(std::string* target) const;
238
239
280
    bool starts_with(StringPiece x) const {
240
280
        return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0);
241
280
    }
242
243
440
    bool ends_with(StringPiece x) const {
244
440
        return ((length_ >= x.length_) &&
245
440
                (memcmp(ptr_ + (length_ - x.length_), x.ptr_, x.length_) == 0));
246
440
    }
247
248
    // standard STL container boilerplate
249
    typedef char value_type;
250
    typedef const char* pointer;
251
    typedef const char& reference;
252
    typedef const char& const_reference;
253
    typedef size_t size_type;
254
    typedef ptrdiff_t difference_type;
255
    static const size_type npos;
256
    typedef const char* const_iterator;
257
    typedef const char* iterator;
258
    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
259
    typedef std::reverse_iterator<iterator> reverse_iterator;
260
15.5k
    iterator begin() const { return ptr_; }
261
16.8k
    iterator end() const { return ptr_ + length_; }
262
0
    const_reverse_iterator rbegin() const { return const_reverse_iterator(ptr_ + length_); }
263
0
    const_reverse_iterator rend() const { return const_reverse_iterator(ptr_); }
264
    // STLS says return size_type, but Google says return int
265
0
    int max_size() const { return length_; }
266
0
    int capacity() const { return length_; }
267
268
    // cpplint.py emits a false positive [build/include_what_you_use]
269
    int copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT
270
271
    bool contains(StringPiece s) const;
272
273
    int find(StringPiece s, size_type pos = 0) const;
274
    int find(char c, size_type pos = 0) const;
275
    int rfind(StringPiece s, size_type pos = npos) const;
276
    int rfind(char c, size_type pos = npos) const;
277
278
    int find_first_of(StringPiece s, size_type pos = 0) const;
279
0
    int find_first_of(char c, size_type pos = 0) const { return find(c, pos); }
280
    int find_first_not_of(StringPiece s, size_type pos = 0) const;
281
    int find_first_not_of(char c, size_type pos = 0) const;
282
    int find_last_of(StringPiece s, size_type pos = npos) const;
283
0
    int find_last_of(char c, size_type pos = npos) const { return rfind(c, pos); }
284
    int find_last_not_of(StringPiece s, size_type pos = npos) const;
285
    int find_last_not_of(char c, size_type pos = npos) const;
286
287
    StringPiece substr(size_type pos, size_type n = npos) const;
288
};
289
290
// This large function is defined inline so that in a fairly common case where
291
// one of the arguments is a literal, the compiler can elide a lot of the
292
// following comparisons.
293
0
inline bool operator==(StringPiece x, StringPiece y) {
294
0
    int len = x.size();
295
0
    if (len != y.size()) {
296
0
        return false;
297
0
    }
298
299
0
    return x.data() == y.data() || len <= 0 || strings::memeq(x.data(), y.data(), len);
300
0
}
301
302
0
inline bool operator!=(StringPiece x, StringPiece y) {
303
0
    return !(x == y);
304
0
}
305
306
0
inline bool operator<(StringPiece x, StringPiece y) {
307
0
    const int min_size = x.size() < y.size() ? x.size() : y.size();
308
0
    const int r = memcmp(x.data(), y.data(), min_size);
309
0
    return (r < 0) || (r == 0 && x.size() < y.size());
310
0
}
311
312
0
inline bool operator>(StringPiece x, StringPiece y) {
313
0
    return y < x;
314
0
}
315
316
0
inline bool operator<=(StringPiece x, StringPiece y) {
317
0
    return !(x > y);
318
0
}
319
320
0
inline bool operator>=(StringPiece x, StringPiece y) {
321
0
    return !(x < y);
322
0
}
323
template <class X>
324
struct GoodFastHash;
325
326
// ------------------------------------------------------------------
327
// Functions used to create STL containers that use StringPiece
328
//  Remember that a StringPiece's lifetime had better be less than
329
//  that of the underlying string or char*.  If it is not, then you
330
//  cannot safely store a StringPiece into an STL container
331
// ------------------------------------------------------------------
332
333
// SWIG doesn't know how to parse this stuff properly. Omit it.
334
#ifndef SWIG
335
336
template <>
337
struct std::hash<StringPiece> {
338
    size_t operator()(StringPiece s) const;
339
};
340
341
// An implementation of GoodFastHash for StringPiece.  See
342
// GoodFastHash values.
343
template <>
344
struct GoodFastHash<StringPiece> {
345
0
    size_t operator()(StringPiece s) const { return HashStringThoroughly(s.data(), s.size()); }
346
    // Less than operator, for MSVC.
347
0
    bool operator()(const StringPiece& s1, const StringPiece& s2) const { return s1 < s2; }
348
    static const size_t bucket_size = 4; // These are required by MSVC
349
    static const size_t min_buckets = 8; // 4 and 8 are defaults.
350
};
351
#endif
352
353
// allow StringPiece to be logged
354
extern ostream& operator<<(ostream& o, StringPiece piece);