Coverage Report

Created: 2025-06-09 14:12

/root/doris/be/src/util/slice.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <assert.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
#include <string.h>
24
25
#include <iostream>
26
#include <map>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "vec/common/allocator.h"
32
33
namespace doris {
34
35
class faststring;
36
37
/// @brief A wrapper around externally allocated data.
38
///
39
/// Slice is a simple structure containing a pointer into some external
40
/// storage and a size. The user of a Slice must ensure that the slice
41
/// is not used after the corresponding external storage has been
42
/// deallocated.
43
///
44
/// Multiple threads can invoke const methods on a Slice without
45
/// external synchronization, but if any of the threads may call a
46
/// non-const method, all threads accessing the same Slice must use
47
/// external synchronization.
48
struct Slice {
49
public:
50
    char* data = nullptr;
51
    size_t size;
52
    // Intentionally copyable
53
54
    /// Create an empty slice.
55
54.5M
    Slice() : data(const_cast<char*>("")), size(0) {}
56
57
    /// Create a slice that refers to a @c char byte array.
58
1.05G
    Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {}
59
60
    // Create a slice that refers to a @c uint8_t byte array.
61
    //
62
    // @param [in] d
63
    //   The input array.
64
    // @param [in] n
65
    //   Number of bytes in the array.
66
    Slice(const uint8_t* s, size_t n)
67
68.2M
            : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {}
68
69
    /// Create a slice that refers to the contents of the given string.
70
    Slice(const std::string& s)
71
            : // NOLINT(runtime/explicit)
72
              data(const_cast<char*>(s.data())),
73
39.4M
              size(s.size()) {}
74
75
    Slice(const faststring& s);
76
77
    /// Create a slice that refers to a C-string s[0,strlen(s)-1].
78
    Slice(const char* s)
79
            : // NOLINT(runtime/explicit)
80
              data(const_cast<char*>(s)),
81
2.18k
              size(strlen(s)) {}
82
83
    /// default copy/move constructor and assignment
84
    Slice(const Slice&) = default;
85
    Slice& operator=(const Slice&) = default;
86
    Slice(Slice&&) noexcept = default;
87
    Slice& operator=(Slice&&) noexcept = default;
88
89
    /// @return A pointer to the beginning of the referenced data.
90
117M
    const char* get_data() const { return data; }
91
92
    /// @return A mutable pointer to the beginning of the referenced data.
93
2.39M
    char* mutable_data() { return const_cast<char*>(data); }
94
95
    /// @return The length (in bytes) of the referenced data.
96
119M
    size_t get_size() const { return size; }
97
98
    /// @return @c true iff the length of the referenced data is zero.
99
210M
    bool empty() const { return size == 0; }
100
101
    /// @return the n-th byte in the referenced data.
102
575M
    const char& operator[](size_t n) const {
103
575M
        assert(n < size);
104
0
        return data[n];
105
575M
    }
106
107
    /// Change this slice to refer to an empty array.
108
0
    void clear() {
109
0
        data = const_cast<char*>("");
110
0
        size = 0;
111
0
    }
112
113
    /// Drop the first "n" bytes from this slice.
114
    ///
115
    /// @pre n <= size
116
    ///
117
    /// @note Only the base and bounds of the slice are changed;
118
    ///   the data is not modified.
119
    ///
120
    /// @param [in] n
121
    ///   Number of bytes that should be dropped from the beginning.
122
2.95M
    void remove_prefix(size_t n) {
123
2.95M
        assert(n <= size);
124
0
        data += n;
125
2.95M
        size -= n;
126
2.95M
    }
127
128
    /// Drop the last "n" bytes from this slice.
129
    ///
130
    /// @pre n <= size
131
    ///
132
    /// @note Only the base and bounds of the slice are changed;
133
    ///   the data is not modified.
134
    ///
135
    /// @param [in] n
136
    ///   Number of bytes that should be dropped from the last.
137
4.40M
    void remove_suffix(size_t n) {
138
4.40M
        assert(n <= size);
139
0
        size -= n;
140
4.40M
    }
141
142
    /// Remove leading spaces.
143
    ///
144
    /// @pre n <= size
145
    ///
146
    /// @note Only the base and bounds of the slice are changed;
147
    ///   the data is not modified.
148
    ///
149
    /// @param [in] n
150
    ///   Number of bytes of space that should be dropped from the beginning.
151
6.26M
    void trim_prefix() {
152
6.26M
        int32_t begin = 0;
153
7.63M
        while (begin < size && data[begin] == ' ') {
154
1.37M
            data += 1;
155
1.37M
            size -= 1;
156
1.37M
        }
157
6.26M
    }
158
159
    /// Remove quote char '"' or ''' which should exist as first and last char.
160
    ///
161
    /// @pre n <= size
162
    ///
163
    /// @note Only the base and bounds of the slice are changed;
164
    ///   the data is not modified.
165
    ///
166
    /// @param [in] n
167
    ///   Number of bytes of space that should be dropped from the beginning.
168
7.07M
    bool trim_quote() {
169
7.07M
        int32_t begin = 0;
170
7.07M
        bool change = false;
171
7.07M
        if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') ||
172
6.76M
                          (data[begin] == '\'' && data[size - 1] == '\''))) {
173
2.45M
            data += 1;
174
2.45M
            size -= 2;
175
2.45M
            change = true;
176
2.45M
        }
177
7.07M
        return change;
178
7.07M
    }
179
180
    /// Remove quote char '"' which should exist as first and last char.
181
    ///
182
    /// @pre n <= size
183
    ///
184
    /// @note Only the base and bounds of the slice are changed;
185
    ///   the data is not modified.
186
    ///
187
    /// @param [in] n
188
    ///   Number of bytes of space that should be dropped from the beginning.
189
6.72k
    bool trim_double_quotes() {
190
6.72k
        int32_t begin = 0;
191
6.72k
        if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) {
192
754
            data += 1;
193
754
            size -= 2;
194
754
            return true;
195
754
        }
196
5.97k
        return false;
197
6.72k
    }
198
199
    /// Truncate the slice to the given number of bytes.
200
    ///
201
    /// @pre n <= size
202
    ///
203
    /// @note Only the base and bounds of the slice are changed;
204
    ///   the data is not modified.
205
    ///
206
    /// @param [in] n
207
    ///   The new size of the slice.
208
0
    void truncate(size_t n) {
209
0
        assert(n <= size);
210
0
        size = n;
211
0
    }
212
213
    /// @return A string that contains a copy of the referenced data.
214
1.02M
    std::string to_string() const { return std::string(data, size); }
215
216
    /// Do a three-way comparison of the slice's data.
217
    int compare(const Slice& b) const;
218
219
    /// Check whether the slice starts with the given prefix.
220
0
    bool starts_with(const Slice& x) const {
221
0
        return ((size >= x.size) && (mem_equal(data, x.data, x.size)));
222
0
    }
223
224
444
    bool ends_with(const Slice& x) const {
225
444
        return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size));
226
444
    }
227
228
    /// @brief Comparator struct, useful for ordered collections (like STL maps).
229
    struct Comparator {
230
        /// Compare two slices using Slice::compare()
231
        ///
232
        /// @param [in] a
233
        ///   The slice to call Slice::compare() at.
234
        /// @param [in] b
235
        ///   The slice to use as a parameter for Slice::compare().
236
        /// @return @c true iff @c a is less than @c b by Slice::compare().
237
0
        bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; }
238
    };
239
240
    /// Relocate/copy the slice's data into a new location.
241
    ///
242
    /// @param [in] d
243
    ///   The new location for the data. If it's the same location, then no
244
    ///   relocation is done. It is assumed that the new location is
245
    ///   large enough to fit the data.
246
23.7M
    void relocate(char* d) {
247
23.8M
        if (data != d) {
248
23.8M
            memcpy(d, data, size);
249
23.8M
            data = d;
250
23.8M
        }
251
23.7M
    }
252
253
    friend bool operator==(const Slice& x, const Slice& y);
254
255
    friend std::ostream& operator<<(std::ostream& os, const Slice& slice);
256
257
173M
    static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; }
258
259
543M
    static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); }
260
261
4.41M
    static size_t compute_total_size(const std::vector<Slice>& slices) {
262
4.41M
        size_t total_size = 0;
263
4.61M
        for (auto& slice : slices) {
264
4.61M
            total_size += slice.size;
265
4.61M
        }
266
4.41M
        return total_size;
267
4.41M
    }
268
269
0
    static std::string to_string(const std::vector<Slice>& slices) {
270
0
        std::string buf;
271
0
        for (auto& slice : slices) {
272
0
            buf.append(slice.data, slice.size);
273
0
        }
274
0
        return buf;
275
0
    }
276
277
    // X is (maybe) a truncated prefix of string X'
278
0
    // Y is (maybe) a truncated prefix of string Y'
279
0
    // return true only if we can determine that X' is strictly less than Y'
280
0
    // based on these maybe truncated prefixes
281
0
    static bool lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated, Slice Y,
282
                                              bool Y_is_truncated);
283
};
284
176M
285
176M
inline std::ostream& operator<<(std::ostream& os, const Slice& slice) {
286
176M
    os << slice.to_string();
287
    return os;
288
}
289
30.9k
290
30.9k
/// Check whether two slices are identical.
291
30.9k
inline bool operator==(const Slice& x, const Slice& y) {
292
    return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size)));
293
543M
}
294
543M
295
543M
/// Check whether two slices are not identical.
296
543M
inline bool operator!=(const Slice& x, const Slice& y) {
297
31.0M
    return !(x == y);
298
24.9M
}
299
6.10M
300
179k
inline int Slice::compare(const Slice& b) const {
301
31.0M
    const int min_len = (size < b.size) ? size : b.size;
302
543M
    int r = mem_compare(data, b.data, min_len);
303
543M
    if (r == 0) {
304
        if (size < b.size)
305
            r = -1;
306
        else if (size > b.size)
307
            r = +1;
308
    }
309
    return r;
310
}
311
312
// A move-only type which manage the lifecycle of externally allocated data.
313
// Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access
314
// the underlying buffer as a Slice.
315
//
316
// Usage example:
317
//   OwnedSlice read_page(PagePointer pp);
318
//   {
319
//     OwnedSlice page_data(new uint8_t[pp.size], pp.size);
320
//     Status s = _file.read_at(pp.offset, owned.slice());
321
//     if (!s.ok()) {
322
//       return s; // `page_data` destructs, deallocate underlying buffer
323
//     }
324
7.95M
//     return page_data; // transfer ownership of buffer into the caller
325
//   }
326
//
327
// only receive the memory allocated by Allocator and disables mmap,
328
445
// otherwise the memory may not be freed correctly, currently only be constructed by faststring.
329
class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> {
330
3.11M
public:
331
3.11M
    OwnedSlice() : _slice((uint8_t*)nullptr, 0) {}
332
3.11M
333
3.11M
    OwnedSlice(size_t length)
334
3.11M
            : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length),
335
              _capacity(length) {}
336
4.87M
337
4.87M
    OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) {
338
4.87M
        src._slice.data = nullptr;
339
4.87M
        src._slice.size = 0;
340
4.87M
        src._capacity = 0;
341
4.87M
    }
342
4.87M
343
    OwnedSlice& operator=(OwnedSlice&& src) {
344
        if (this != &src) {
345
            std::swap(_slice, src._slice);
346
            std::swap(_capacity, src._capacity);
347
        }
348
13.8M
        return *this;
349
13.8M
    }
350
2.75M
351
2.75M
    // disable copy constructor and copy assignment
352
2.75M
    OwnedSlice(const OwnedSlice&) = delete;
353
13.8M
    void operator=(const OwnedSlice&) = delete;
354
355
11.0k
    ~OwnedSlice() {
356
        if (_slice.data != nullptr) {
357
11.6M
            DCHECK(_capacity != 0);
358
            Allocator::free(_slice.data, _capacity);
359
        }
360
    }
361
362
    char* data() const { return _slice.data; }
363
364
2.75M
    const Slice& slice() const { return _slice; }
365
366
private:
367
    // faststring also inherits Allocator and disables mmap.
368
    friend class faststring;
369
370
    OwnedSlice(uint8_t* _data, size_t size, size_t capacity)
371
            : _slice(_data, size), _capacity(capacity) {}
372
373
    Slice _slice;
374
    size_t _capacity = 0;
375
};
376
377
} // namespace doris