Coverage Report

Created: 2024-11-21 23:45

/root/doris/be/src/util/slice.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <assert.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
#include <string.h>
24
25
#include <iostream>
26
#include <map>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "vec/common/allocator.h"
32
33
namespace doris {
34
35
class faststring;
36
37
/// @brief A wrapper around externally allocated data.
38
///
39
/// Slice is a simple structure containing a pointer into some external
40
/// storage and a size. The user of a Slice must ensure that the slice
41
/// is not used after the corresponding external storage has been
42
/// deallocated.
43
///
44
/// Multiple threads can invoke const methods on a Slice without
45
/// external synchronization, but if any of the threads may call a
46
/// non-const method, all threads accessing the same Slice must use
47
/// external synchronization.
48
struct Slice {
49
public:
50
    char* data = nullptr;
51
    size_t size;
52
    // Intentionally copyable
53
54
    /// Create an empty slice.
55
1.59M
    Slice() : data(const_cast<char*>("")), size(0) {}
56
57
    /// Create a slice that refers to a @c char byte array.
58
2.90M
    Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {}
59
60
    // Create a slice that refers to a @c uint8_t byte array.
61
    //
62
    // @param [in] d
63
    //   The input array.
64
    // @param [in] n
65
    //   Number of bytes in the array.
66
    Slice(const uint8_t* s, size_t n)
67
222k
            : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {}
68
69
    /// Create a slice that refers to the contents of the given string.
70
    Slice(const std::string& s)
71
            : // NOLINT(runtime/explicit)
72
              data(const_cast<char*>(s.data())),
73
387k
              size(s.size()) {}
74
75
    Slice(const faststring& s);
76
77
    /// Create a slice that refers to a C-string s[0,strlen(s)-1].
78
    Slice(const char* s)
79
            : // NOLINT(runtime/explicit)
80
              data(const_cast<char*>(s)),
81
1.16k
              size(strlen(s)) {}
82
83
    /// default copy/move constructor and assignment
84
    Slice(const Slice&) = default;
85
    Slice& operator=(const Slice&) = default;
86
    Slice(Slice&&) noexcept = default;
87
    Slice& operator=(Slice&&) noexcept = default;
88
89
    /// @return A pointer to the beginning of the referenced data.
90
1.65M
    const char* get_data() const { return data; }
91
92
    /// @return A mutable pointer to the beginning of the referenced data.
93
4
    char* mutable_data() { return const_cast<char*>(data); }
94
95
    /// @return The length (in bytes) of the referenced data.
96
2.10M
    size_t get_size() const { return size; }
97
98
    /// @return @c true iff the length of the referenced data is zero.
99
47.4k
    bool empty() const { return size == 0; }
100
101
    /// @return the n-th byte in the referenced data.
102
110k
    const char& operator[](size_t n) const {
103
110k
        assert(n < size);
104
0
        return data[n];
105
110k
    }
106
107
    /// Change this slice to refer to an empty array.
108
0
    void clear() {
109
0
        data = const_cast<char*>("");
110
0
        size = 0;
111
0
    }
112
113
    /// Drop the first "n" bytes from this slice.
114
    ///
115
    /// @pre n <= size
116
    ///
117
    /// @note Only the base and bounds of the slice are changed;
118
    ///   the data is not modified.
119
    ///
120
    /// @param [in] n
121
    ///   Number of bytes that should be dropped from the beginning.
122
34.5k
    void remove_prefix(size_t n) {
123
34.5k
        assert(n <= size);
124
0
        data += n;
125
34.5k
        size -= n;
126
34.5k
    }
127
128
    /// Drop the last "n" bytes from this slice.
129
    ///
130
    /// @pre n <= size
131
    ///
132
    /// @note Only the base and bounds of the slice are changed;
133
    ///   the data is not modified.
134
    ///
135
    /// @param [in] n
136
    ///   Number of bytes that should be dropped from the last.
137
544
    void remove_suffix(size_t n) {
138
544
        assert(n <= size);
139
0
        size -= n;
140
544
    }
141
142
    /// Remove leading spaces.
143
    ///
144
    /// @pre n <= size
145
    ///
146
    /// @note Only the base and bounds of the slice are changed;
147
    ///   the data is not modified.
148
    ///
149
    /// @param [in] n
150
    ///   Number of bytes of space that should be dropped from the beginning.
151
839
    void trim_prefix() {
152
839
        int32_t begin = 0;
153
1.01k
        while (begin < size && data[begin] == ' ') {
154
174
            data += 1;
155
174
            size -= 1;
156
174
        }
157
839
    }
158
159
    /// Remove quote char '"' or ''' which should exist as first and last char.
160
    ///
161
    /// @pre n <= size
162
    ///
163
    /// @note Only the base and bounds of the slice are changed;
164
    ///   the data is not modified.
165
    ///
166
    /// @param [in] n
167
    ///   Number of bytes of space that should be dropped from the beginning.
168
598
    bool trim_quote() {
169
598
        int32_t begin = 0;
170
598
        bool change = false;
171
598
        if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') ||
172
532
                          (data[begin] == '\'' && data[size - 1] == '\''))) {
173
141
            data += 1;
174
141
            size -= 2;
175
141
            change = true;
176
141
        }
177
598
        return change;
178
598
    }
179
180
    /// Remove quote char '"' which should exist as first and last char.
181
    ///
182
    /// @pre n <= size
183
    ///
184
    /// @note Only the base and bounds of the slice are changed;
185
    ///   the data is not modified.
186
    ///
187
    /// @param [in] n
188
    ///   Number of bytes of space that should be dropped from the beginning.
189
0
    bool trim_double_quotes() {
190
0
        int32_t begin = 0;
191
0
        if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) {
192
0
            data += 1;
193
0
            size -= 2;
194
0
            return true;
195
0
        }
196
0
        return false;
197
0
    }
198
199
    /// Truncate the slice to the given number of bytes.
200
    ///
201
    /// @pre n <= size
202
    ///
203
    /// @note Only the base and bounds of the slice are changed;
204
    ///   the data is not modified.
205
    ///
206
    /// @param [in] n
207
    ///   The new size of the slice.
208
0
    void truncate(size_t n) {
209
0
        assert(n <= size);
210
0
        size = n;
211
0
    }
212
213
    /// @return A string that contains a copy of the referenced data.
214
39.6k
    std::string to_string() const { return std::string(data, size); }
215
216
    /// Do a three-way comparison of the slice's data.
217
    int compare(const Slice& b) const;
218
219
    /// Check whether the slice starts with the given prefix.
220
0
    bool starts_with(const Slice& x) const {
221
0
        return ((size >= x.size) && (mem_equal(data, x.data, x.size)));
222
0
    }
223
224
11
    bool ends_with(const Slice& x) const {
225
11
        return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size));
226
11
    }
227
228
    /// @brief Comparator struct, useful for ordered collections (like STL maps).
229
    struct Comparator {
230
        /// Compare two slices using Slice::compare()
231
        ///
232
        /// @param [in] a
233
        ///   The slice to call Slice::compare() at.
234
        /// @param [in] b
235
        ///   The slice to use as a parameter for Slice::compare().
236
        /// @return @c true iff @c a is less than @c b by Slice::compare().
237
188k
        bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; }
238
    };
239
240
    /// Relocate/copy the slice's data into a new location.
241
    ///
242
    /// @param [in] d
243
    ///   The new location for the data. If it's the same location, then no
244
    ///   relocation is done. It is assumed that the new location is
245
    ///   large enough to fit the data.
246
462
    void relocate(char* d) {
247
462
        if (data != d) {
248
462
            memcpy(d, data, size);
249
462
            data = d;
250
462
        }
251
462
    }
252
253
    friend bool operator==(const Slice& x, const Slice& y);
254
255
    friend std::ostream& operator<<(std::ostream& os, const Slice& slice);
256
257
19.2k
    static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; }
258
259
1.30M
    static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); }
260
261
42.6k
    static size_t compute_total_size(const std::vector<Slice>& slices) {
262
42.6k
        size_t total_size = 0;
263
42.6k
        for (auto& slice : slices) {
264
42.6k
            total_size += slice.size;
265
42.6k
        }
266
42.6k
        return total_size;
267
42.6k
    }
268
269
0
    static std::string to_string(const std::vector<Slice>& slices) {
270
0
        std::string buf;
271
0
        for (auto& slice : slices) {
272
0
            buf.append(slice.data, slice.size);
273
0
        }
274
0
        return buf;
275
0
    }
276
};
277
278
0
inline std::ostream& operator<<(std::ostream& os, const Slice& slice) {
279
0
    os << slice.to_string();
280
0
    return os;
281
0
}
282
283
/// Check whether two slices are identical.
284
19.2k
inline bool operator==(const Slice& x, const Slice& y) {
285
19.2k
    return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size)));
286
19.2k
}
287
288
/// Check whether two slices are not identical.
289
0
inline bool operator!=(const Slice& x, const Slice& y) {
290
0
    return !(x == y);
291
0
}
292
293
1.30M
inline int Slice::compare(const Slice& b) const {
294
1.30M
    const int min_len = (size < b.size) ? size : b.size;
295
1.30M
    int r = mem_compare(data, b.data, min_len);
296
1.30M
    if (r == 0) {
297
394k
        if (size < b.size)
298
798
            r = -1;
299
393k
        else if (size > b.size)
300
967
            r = +1;
301
394k
    }
302
1.30M
    return r;
303
1.30M
}
304
305
/// @brief STL map whose keys are Slices.
306
///
307
/// An example of usage:
308
/// @code
309
///   typedef SliceMap<int>::type MySliceMap;
310
///
311
///   MySliceMap my_map;
312
///   my_map.insert(MySliceMap::value_type(a, 1));
313
///   my_map.insert(MySliceMap::value_type(b, 2));
314
///   my_map.insert(MySliceMap::value_type(c, 3));
315
///
316
///   for (const MySliceMap::value_type& pair : my_map) {
317
///     ...
318
///   }
319
/// @endcode
320
template <typename T>
321
struct SliceMap {
322
    /// A handy typedef for the slice map with appropriate comparison operator.
323
    typedef std::map<Slice, T, Slice::Comparator> type;
324
};
325
326
// A move-only type which manage the lifecycle of externally allocated data.
327
// Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access
328
// the underlying buffer as a Slice.
329
//
330
// Usage example:
331
//   OwnedSlice read_page(PagePointer pp);
332
//   {
333
//     OwnedSlice page_data(new uint8_t[pp.size], pp.size);
334
//     Status s = _file.read_at(pp.offset, owned.slice());
335
//     if (!s.ok()) {
336
//       return s; // `page_data` destructs, deallocate underlying buffer
337
//     }
338
//     return page_data; // transfer ownership of buffer into the caller
339
//   }
340
//
341
// only receive the memory allocated by Allocator and disables mmap,
342
// otherwise the memory may not be freed correctly, currently only be constructed by faststring.
343
class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> {
344
public:
345
70.2k
    OwnedSlice() : _slice((uint8_t*)nullptr, 0) {}
346
347
    OwnedSlice(size_t length)
348
            : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length),
349
98
              _capacity(length) {}
350
351
23.5k
    OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) {
352
23.5k
        src._slice.data = nullptr;
353
23.5k
        src._slice.size = 0;
354
23.5k
        src._capacity = 0;
355
23.5k
    }
356
357
42.8k
    OwnedSlice& operator=(OwnedSlice&& src) {
358
42.8k
        if (this != &src) {
359
42.8k
            std::swap(_slice, src._slice);
360
42.8k
            std::swap(_capacity, src._capacity);
361
42.8k
        }
362
42.8k
        return *this;
363
42.8k
    }
364
365
    // disable copy constructor and copy assignment
366
    OwnedSlice(const OwnedSlice&) = delete;
367
    void operator=(const OwnedSlice&) = delete;
368
369
120k
    ~OwnedSlice() {
370
120k
        if (_slice.data != nullptr) {
371
26.5k
            DCHECK(_capacity != 0);
372
26.5k
            Allocator::free(_slice.data, _capacity);
373
26.5k
        }
374
120k
    }
375
376
412
    char* data() const { return _slice.data; }
377
378
98.8k
    const Slice& slice() const { return _slice; }
379
380
private:
381
    // faststring also inherits Allocator and disables mmap.
382
    friend class faststring;
383
384
    OwnedSlice(uint8_t* _data, size_t size, size_t capacity)
385
26.4k
            : _slice(_data, size), _capacity(capacity) {}
386
387
    Slice _slice;
388
    size_t _capacity = 0;
389
};
390
391
} // namespace doris