Coverage Report

Created: 2025-03-13 18:54

/root/doris/be/src/util/slice.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <assert.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
#include <string.h>
24
25
#include <iostream>
26
#include <map>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "vec/common/allocator.h"
32
33
namespace doris {
34
35
class faststring;
36
37
/// @brief A wrapper around externally allocated data.
38
///
39
/// Slice is a simple structure containing a pointer into some external
40
/// storage and a size. The user of a Slice must ensure that the slice
41
/// is not used after the corresponding external storage has been
42
/// deallocated.
43
///
44
/// Multiple threads can invoke const methods on a Slice without
45
/// external synchronization, but if any of the threads may call a
46
/// non-const method, all threads accessing the same Slice must use
47
/// external synchronization.
48
struct Slice {
49
public:
50
    char* data = nullptr;
51
    size_t size;
52
    // Intentionally copyable
53
54
    /// Create an empty slice.
55
1.63M
    Slice() : data(const_cast<char*>("")), size(0) {}
56
57
    /// Create a slice that refers to a @c char byte array.
58
4.17M
    Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {}
59
60
    // Create a slice that refers to a @c uint8_t byte array.
61
    //
62
    // @param [in] d
63
    //   The input array.
64
    // @param [in] n
65
    //   Number of bytes in the array.
66
    Slice(const uint8_t* s, size_t n)
67
555k
            : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {}
68
69
    /// Create a slice that refers to the contents of the given string.
70
    Slice(const std::string& s)
71
            : // NOLINT(runtime/explicit)
72
              data(const_cast<char*>(s.data())),
73
409k
              size(s.size()) {}
74
75
    Slice(const faststring& s);
76
77
    /// Create a slice that refers to a C-string s[0,strlen(s)-1].
78
    Slice(const char* s)
79
            : // NOLINT(runtime/explicit)
80
              data(const_cast<char*>(s)),
81
161
              size(strlen(s)) {}
82
83
    /// default copy/move constructor and assignment
84
    Slice(const Slice&) = default;
85
    Slice& operator=(const Slice&) = default;
86
    Slice(Slice&&) noexcept = default;
87
    Slice& operator=(Slice&&) noexcept = default;
88
89
    /// @return A pointer to the beginning of the referenced data.
90
1.82M
    const char* get_data() const { return data; }
91
92
    /// @return A mutable pointer to the beginning of the referenced data.
93
3.54k
    char* mutable_data() { return const_cast<char*>(data); }
94
95
    /// @return The length (in bytes) of the referenced data.
96
2.33M
    size_t get_size() const { return size; }
97
98
    /// @return @c true iff the length of the referenced data is zero.
99
1.58M
    bool empty() const { return size == 0; }
100
101
    /// @return the n-th byte in the referenced data.
102
441M
    const char& operator[](size_t n) const {
103
441M
        assert(n < size);
104
0
        return data[n];
105
441M
    }
106
107
    /// Change this slice to refer to an empty array.
108
0
    void clear() {
109
0
        data = const_cast<char*>("");
110
0
        size = 0;
111
0
    }
112
113
    /// Drop the first "n" bytes from this slice.
114
    ///
115
    /// @pre n <= size
116
    ///
117
    /// @note Only the base and bounds of the slice are changed;
118
    ///   the data is not modified.
119
    ///
120
    /// @param [in] n
121
    ///   Number of bytes that should be dropped from the beginning.
122
196k
    void remove_prefix(size_t n) {
123
196k
        assert(n <= size);
124
0
        data += n;
125
196k
        size -= n;
126
196k
    }
127
128
    /// Drop the last "n" bytes from this slice.
129
    ///
130
    /// @pre n <= size
131
    ///
132
    /// @note Only the base and bounds of the slice are changed;
133
    ///   the data is not modified.
134
    ///
135
    /// @param [in] n
136
    ///   Number of bytes that should be dropped from the last.
137
1.14M
    void remove_suffix(size_t n) {
138
1.14M
        assert(n <= size);
139
0
        size -= n;
140
1.14M
    }
141
142
    /// Remove leading spaces.
143
    ///
144
    /// @pre n <= size
145
    ///
146
    /// @note Only the base and bounds of the slice are changed;
147
    ///   the data is not modified.
148
    ///
149
    /// @param [in] n
150
    ///   Number of bytes of space that should be dropped from the beginning.
151
1.17M
    void trim_prefix() {
152
1.17M
        int32_t begin = 0;
153
2.17M
        while (begin < size && data[begin] == ' ') {
154
994k
            data += 1;
155
994k
            size -= 1;
156
994k
        }
157
1.17M
    }
158
159
    /// Remove quote char '"' or ''' which should exist as first and last char.
160
    ///
161
    /// @pre n <= size
162
    ///
163
    /// @note Only the base and bounds of the slice are changed;
164
    ///   the data is not modified.
165
    ///
166
    /// @param [in] n
167
    ///   Number of bytes of space that should be dropped from the beginning.
168
366k
    bool trim_quote() {
169
366k
        int32_t begin = 0;
170
366k
        bool change = false;
171
366k
        if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') ||
172
363k
                          (data[begin] == '\'' && data[size - 1] == '\''))) {
173
353k
            data += 1;
174
353k
            size -= 2;
175
353k
            change = true;
176
353k
        }
177
366k
        return change;
178
366k
    }
179
180
    /// Remove quote char '"' which should exist as first and last char.
181
    ///
182
    /// @pre n <= size
183
    ///
184
    /// @note Only the base and bounds of the slice are changed;
185
    ///   the data is not modified.
186
    ///
187
    /// @param [in] n
188
    ///   Number of bytes of space that should be dropped from the beginning.
189
0
    bool trim_double_quotes() {
190
0
        int32_t begin = 0;
191
0
        if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) {
192
0
            data += 1;
193
0
            size -= 2;
194
0
            return true;
195
0
        }
196
0
        return false;
197
0
    }
198
199
    /// Truncate the slice to the given number of bytes.
200
    ///
201
    /// @pre n <= size
202
    ///
203
    /// @note Only the base and bounds of the slice are changed;
204
    ///   the data is not modified.
205
    ///
206
    /// @param [in] n
207
    ///   The new size of the slice.
208
0
    void truncate(size_t n) {
209
0
        assert(n <= size);
210
0
        size = n;
211
0
    }
212
213
    /// @return A string that contains a copy of the referenced data.
214
68.6k
    std::string to_string() const { return std::string(data, size); }
215
216
    /// Do a three-way comparison of the slice's data.
217
    int compare(const Slice& b) const;
218
219
    /// Check whether the slice starts with the given prefix.
220
0
    bool starts_with(const Slice& x) const {
221
0
        return ((size >= x.size) && (mem_equal(data, x.data, x.size)));
222
0
    }
223
224
11
    bool ends_with(const Slice& x) const {
225
11
        return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size));
226
11
    }
227
228
    /// @brief Comparator struct, useful for ordered collections (like STL maps).
229
    struct Comparator {
230
        /// Compare two slices using Slice::compare()
231
        ///
232
        /// @param [in] a
233
        ///   The slice to call Slice::compare() at.
234
        /// @param [in] b
235
        ///   The slice to use as a parameter for Slice::compare().
236
        /// @return @c true iff @c a is less than @c b by Slice::compare().
237
0
        bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; }
238
    };
239
240
    /// Relocate/copy the slice's data into a new location.
241
    ///
242
    /// @param [in] d
243
    ///   The new location for the data. If it's the same location, then no
244
    ///   relocation is done. It is assumed that the new location is
245
    ///   large enough to fit the data.
246
60.0k
    void relocate(char* d) {
247
60.0k
        if (data != d) {
248
60.0k
            memcpy(d, data, size);
249
60.0k
            data = d;
250
60.0k
        }
251
60.0k
    }
252
253
    friend bool operator==(const Slice& x, const Slice& y);
254
255
    friend std::ostream& operator<<(std::ostream& os, const Slice& slice);
256
257
224k
    static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; }
258
259
1.16M
    static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); }
260
261
56.6k
    static size_t compute_total_size(const std::vector<Slice>& slices) {
262
56.6k
        size_t total_size = 0;
263
56.6k
        for (auto& slice : slices) {
264
56.6k
            total_size += slice.size;
265
56.6k
        }
266
56.6k
        return total_size;
267
56.6k
    }
268
269
0
    static std::string to_string(const std::vector<Slice>& slices) {
270
0
        std::string buf;
271
0
        for (auto& slice : slices) {
272
0
            buf.append(slice.data, slice.size);
273
0
        }
274
0
        return buf;
275
0
    }
276
};
277
278
0
inline std::ostream& operator<<(std::ostream& os, const Slice& slice) {
279
0
    os << slice.to_string();
280
0
    return os;
281
0
}
282
283
/// Check whether two slices are identical.
284
227k
inline bool operator==(const Slice& x, const Slice& y) {
285
227k
    return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size)));
286
227k
}
287
288
/// Check whether two slices are not identical.
289
0
inline bool operator!=(const Slice& x, const Slice& y) {
290
0
    return !(x == y);
291
0
}
292
293
1.16M
inline int Slice::compare(const Slice& b) const {
294
1.16M
    const int min_len = (size < b.size) ? size : b.size;
295
1.16M
    int r = mem_compare(data, b.data, min_len);
296
1.16M
    if (r == 0) {
297
392k
        if (size < b.size)
298
111
            r = -1;
299
392k
        else if (size > b.size)
300
256
            r = +1;
301
392k
    }
302
1.16M
    return r;
303
1.16M
}
304
305
// A move-only type which manage the lifecycle of externally allocated data.
306
// Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access
307
// the underlying buffer as a Slice.
308
//
309
// Usage example:
310
//   OwnedSlice read_page(PagePointer pp);
311
//   {
312
//     OwnedSlice page_data(new uint8_t[pp.size], pp.size);
313
//     Status s = _file.read_at(pp.offset, owned.slice());
314
//     if (!s.ok()) {
315
//       return s; // `page_data` destructs, deallocate underlying buffer
316
//     }
317
//     return page_data; // transfer ownership of buffer into the caller
318
//   }
319
//
320
// only receive the memory allocated by Allocator and disables mmap,
321
// otherwise the memory may not be freed correctly, currently only be constructed by faststring.
322
class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> {
323
public:
324
93.7k
    OwnedSlice() : _slice((uint8_t*)nullptr, 0) {}
325
326
    OwnedSlice(size_t length)
327
            : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length),
328
98
              _capacity(length) {}
329
330
30.7k
    OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) {
331
30.7k
        src._slice.data = nullptr;
332
30.7k
        src._slice.size = 0;
333
30.7k
        src._capacity = 0;
334
30.7k
    }
335
336
58.8k
    OwnedSlice& operator=(OwnedSlice&& src) {
337
58.8k
        if (this != &src) {
338
58.8k
            std::swap(_slice, src._slice);
339
58.8k
            std::swap(_capacity, src._capacity);
340
58.8k
        }
341
58.8k
        return *this;
342
58.8k
    }
343
344
    // disable copy constructor and copy assignment
345
    OwnedSlice(const OwnedSlice&) = delete;
346
    void operator=(const OwnedSlice&) = delete;
347
348
162k
    ~OwnedSlice() {
349
162k
        if (_slice.data != nullptr) {
350
37.5k
            DCHECK(_capacity != 0);
351
37.5k
            Allocator::free(_slice.data, _capacity);
352
37.5k
        }
353
162k
    }
354
355
412
    char* data() const { return _slice.data; }
356
357
135k
    const Slice& slice() const { return _slice; }
358
359
private:
360
    // faststring also inherits Allocator and disables mmap.
361
    friend class faststring;
362
363
    OwnedSlice(uint8_t* _data, size_t size, size_t capacity)
364
37.4k
            : _slice(_data, size), _capacity(capacity) {}
365
366
    Slice _slice;
367
    size_t _capacity = 0;
368
};
369
370
} // namespace doris