Coverage Report

Created: 2025-07-24 03:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/slice.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <assert.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
#include <string.h>
24
25
#include <iostream>
26
#include <map>
27
#include <string>
28
#include <utility>
29
#include <vector>
30
31
#include "vec/common/allocator.h"
32
#include "vec/common/allocator_fwd.h"
33
34
namespace doris {
35
36
class faststring;
37
38
/// @brief A wrapper around externally allocated data.
39
///
40
/// Slice is a simple structure containing a pointer into some external
41
/// storage and a size. The user of a Slice must ensure that the slice
42
/// is not used after the corresponding external storage has been
43
/// deallocated.
44
///
45
/// Multiple threads can invoke const methods on a Slice without
46
/// external synchronization, but if any of the threads may call a
47
/// non-const method, all threads accessing the same Slice must use
48
/// external synchronization.
49
struct Slice {
50
public:
51
    char* data = nullptr;
52
    size_t size;
53
    // Intentionally copyable
54
55
    /// Create an empty slice.
56
1.70M
    Slice() : data(const_cast<char*>("")), size(0) {}
57
58
    /// Create a slice that refers to a @c char byte array.
59
23.0M
    Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {}
60
61
    // Create a slice that refers to a @c uint8_t byte array.
62
    //
63
    // @param [in] d
64
    //   The input array.
65
    // @param [in] n
66
    //   Number of bytes in the array.
67
    Slice(const uint8_t* s, size_t n)
68
724k
            : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {}
69
70
    /// Create a slice that refers to the contents of the given string.
71
    Slice(const std::string& s)
72
            : // NOLINT(runtime/explicit)
73
1.02M
              data(const_cast<char*>(s.data())),
74
1.02M
              size(s.size()) {}
75
76
    Slice(const faststring& s);
77
78
    /// Create a slice that refers to a C-string s[0,strlen(s)-1].
79
    Slice(const char* s)
80
            : // NOLINT(runtime/explicit)
81
2.28k
              data(const_cast<char*>(s)),
82
2.28k
              size(strlen(s)) {}
83
84
    /// default copy/move constructor and assignment
85
    Slice(const Slice&) = default;
86
    Slice& operator=(const Slice&) = default;
87
    Slice(Slice&&) noexcept = default;
88
    Slice& operator=(Slice&&) noexcept = default;
89
90
    /// @return A pointer to the beginning of the referenced data.
91
20.6M
    const char* get_data() const { return data; }
92
93
    /// @return A mutable pointer to the beginning of the referenced data.
94
8.51k
    char* mutable_data() { return const_cast<char*>(data); }
95
96
    /// @return The length (in bytes) of the referenced data.
97
60.8M
    size_t get_size() const { return size; }
98
99
    /// @return @c true iff the length of the referenced data is zero.
100
1.58M
    bool empty() const { return size == 0; }
101
102
    /// @return the n-th byte in the referenced data.
103
435M
    const char& operator[](size_t n) const {
104
435M
        assert(n < size);
105
435M
        return data[n];
106
435M
    }
107
108
    /// Change this slice to refer to an empty array.
109
0
    void clear() {
110
0
        data = const_cast<char*>("");
111
0
        size = 0;
112
0
    }
113
114
    /// Drop the first "n" bytes from this slice.
115
    ///
116
    /// @pre n <= size
117
    ///
118
    /// @note Only the base and bounds of the slice are changed;
119
    ///   the data is not modified.
120
    ///
121
    /// @param [in] n
122
    ///   Number of bytes that should be dropped from the beginning.
123
211k
    void remove_prefix(size_t n) {
124
211k
        assert(n <= size);
125
211k
        data += n;
126
211k
        size -= n;
127
211k
    }
128
129
    /// Drop the last "n" bytes from this slice.
130
    ///
131
    /// @pre n <= size
132
    ///
133
    /// @note Only the base and bounds of the slice are changed;
134
    ///   the data is not modified.
135
    ///
136
    /// @param [in] n
137
    ///   Number of bytes that should be dropped from the last.
138
1.12M
    void remove_suffix(size_t n) {
139
1.12M
        assert(n <= size);
140
1.12M
        size -= n;
141
1.12M
    }
142
143
    /// Remove leading spaces.
144
    ///
145
    /// @pre n <= size
146
    ///
147
    /// @note Only the base and bounds of the slice are changed;
148
    ///   the data is not modified.
149
    ///
150
    /// @param [in] n
151
    ///   Number of bytes of space that should be dropped from the beginning.
152
1.16M
    void trim_prefix() {
153
1.16M
        int32_t begin = 0;
154
2.14M
        while (begin < size && data[begin] == ' ') {
155
980k
            data += 1;
156
980k
            size -= 1;
157
980k
        }
158
1.16M
    }
159
160
    /// Remove quote char '"' or ''' which should exist as first and last char.
161
    ///
162
    /// @pre n <= size
163
    ///
164
    /// @note Only the base and bounds of the slice are changed;
165
    ///   the data is not modified.
166
    ///
167
    /// @param [in] n
168
    ///   Number of bytes of space that should be dropped from the beginning.
169
362k
    bool trim_quote() {
170
362k
        int32_t begin = 0;
171
362k
        bool change = false;
172
362k
        if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') ||
173
358k
                          (data[begin] == '\'' && data[size - 1] == '\''))) {
174
348k
            data += 1;
175
348k
            size -= 2;
176
348k
            change = true;
177
348k
        }
178
362k
        return change;
179
362k
    }
180
181
    /// Remove quote char '"' which should exist as first and last char.
182
    ///
183
    /// @pre n <= size
184
    ///
185
    /// @note Only the base and bounds of the slice are changed;
186
    ///   the data is not modified.
187
    ///
188
    /// @param [in] n
189
    ///   Number of bytes of space that should be dropped from the beginning.
190
0
    bool trim_double_quotes() {
191
0
        int32_t begin = 0;
192
0
        if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) {
193
0
            data += 1;
194
0
            size -= 2;
195
0
            return true;
196
0
        }
197
0
        return false;
198
0
    }
199
200
    /// Truncate the slice to the given number of bytes.
201
    ///
202
    /// @pre n <= size
203
    ///
204
    /// @note Only the base and bounds of the slice are changed;
205
    ///   the data is not modified.
206
    ///
207
    /// @param [in] n
208
    ///   The new size of the slice.
209
    void truncate(size_t n) {
210
        assert(n <= size);
211
        size = n;
212
    }
213
214
    /// @return A string that contains a copy of the referenced data.
215
77.6k
    std::string to_string() const { return std::string(data, size); }
216
217
    /// Do a three-way comparison of the slice's data.
218
    int compare(const Slice& b) const;
219
220
    /// Check whether the slice starts with the given prefix.
221
0
    bool starts_with(const Slice& x) const {
222
0
        return ((size >= x.size) && (mem_equal(data, x.data, x.size)));
223
0
    }
224
225
31
    bool ends_with(const Slice& x) const {
226
31
        return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size));
227
31
    }
228
229
    /// @brief Comparator struct, useful for ordered collections (like STL maps).
230
    struct Comparator {
231
        /// Compare two slices using Slice::compare()
232
        ///
233
        /// @param [in] a
234
        ///   The slice to call Slice::compare() at.
235
        /// @param [in] b
236
        ///   The slice to use as a parameter for Slice::compare().
237
        /// @return @c true iff @c a is less than @c b by Slice::compare().
238
0
        bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; }
239
    };
240
241
    /// Relocate/copy the slice's data into a new location.
242
    ///
243
    /// @param [in] d
244
    ///   The new location for the data. If it's the same location, then no
245
    ///   relocation is done. It is assumed that the new location is
246
    ///   large enough to fit the data.
247
50.0k
    void relocate(char* d) {
248
50.0k
        if (data != d) {
249
50.0k
            memcpy(d, data, size);
250
50.0k
            data = d;
251
50.0k
        }
252
50.0k
    }
253
254
    friend bool operator==(const Slice& x, const Slice& y);
255
256
    friend std::ostream& operator<<(std::ostream& os, const Slice& slice);
257
258
227k
    static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; }
259
260
43.3M
    static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); }
261
262
86.7k
    static size_t compute_total_size(const std::vector<Slice>& slices) {
263
86.7k
        size_t total_size = 0;
264
87.7k
        for (auto& slice : slices) {
265
87.7k
            total_size += slice.size;
266
87.7k
        }
267
86.7k
        return total_size;
268
86.7k
    }
269
270
0
    static std::string to_string(const std::vector<Slice>& slices) {
271
0
        std::string buf;
272
0
        for (auto& slice : slices) {
273
0
            buf.append(slice.data, slice.size);
274
0
        }
275
0
        return buf;
276
0
    }
277
278
    // X is (maybe) a truncated prefix of string X'
279
    // Y is (maybe) a truncated prefix of string Y'
280
    // return true only if we can determine that X' is strictly less than Y'
281
    // based on these maybe truncated prefixes
282
    static bool lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated, Slice Y,
283
                                              bool Y_is_truncated);
284
};
285
286
0
inline std::ostream& operator<<(std::ostream& os, const Slice& slice) {
287
0
    os << slice.to_string();
288
0
    return os;
289
0
}
290
291
/// Check whether two slices are identical.
292
230k
inline bool operator==(const Slice& x, const Slice& y) {
293
230k
    return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size)));
294
230k
}
295
296
/// Check whether two slices are not identical.
297
0
inline bool operator!=(const Slice& x, const Slice& y) {
298
0
    return !(x == y);
299
0
}
300
301
43.3M
inline int Slice::compare(const Slice& b) const {
302
43.3M
    const int min_len = (size < b.size) ? size : b.size;
303
43.3M
    int r = mem_compare(data, b.data, min_len);
304
43.3M
    if (r == 0) {
305
4.99M
        if (size < b.size)
306
269k
            r = -1;
307
4.72M
        else if (size > b.size)
308
2.13M
            r = +1;
309
4.99M
    }
310
43.3M
    return r;
311
43.3M
}
312
313
// A move-only type which manage the lifecycle of externally allocated data.
314
// Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access
315
// the underlying buffer as a Slice.
316
//
317
// Usage example:
318
//   OwnedSlice read_page(PagePointer pp);
319
//   {
320
//     OwnedSlice page_data(new uint8_t[pp.size], pp.size);
321
//     Status s = _file.read_at(pp.offset, owned.slice());
322
//     if (!s.ok()) {
323
//       return s; // `page_data` destructs, deallocate underlying buffer
324
//     }
325
//     return page_data; // transfer ownership of buffer into the caller
326
//   }
327
//
328
// only receive the memory allocated by Allocator and disables mmap,
329
// otherwise the memory may not be freed correctly, currently only be constructed by faststring.
330
class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> {
331
public:
332
150k
    OwnedSlice() : _slice((uint8_t*)nullptr, 0) {}
333
334
    OwnedSlice(size_t length)
335
136
            : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length),
336
136
              _capacity(length) {}
337
338
48.0k
    OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) {
339
48.0k
        src._slice.data = nullptr;
340
48.0k
        src._slice.size = 0;
341
48.0k
        src._capacity = 0;
342
48.0k
    }
343
344
96.6k
    OwnedSlice& operator=(OwnedSlice&& src) {
345
96.6k
        if (this != &src) {
346
96.6k
            std::swap(_slice, src._slice);
347
96.6k
            std::swap(_capacity, src._capacity);
348
96.6k
        }
349
96.6k
        return *this;
350
96.6k
    }
351
352
    // disable copy constructor and copy assignment
353
    OwnedSlice(const OwnedSlice&) = delete;
354
    void operator=(const OwnedSlice&) = delete;
355
356
262k
    ~OwnedSlice() {
357
262k
        if (_slice.data != nullptr) {
358
64.0k
            DCHECK(_capacity != 0);
359
64.0k
            Allocator::free(_slice.data, _capacity);
360
64.0k
        }
361
262k
    }
362
363
650
    char* data() const { return _slice.data; }
364
365
221k
    const Slice& slice() const { return _slice; }
366
367
private:
368
    // faststring also inherits Allocator and disables mmap.
369
    friend class faststring;
370
371
    OwnedSlice(uint8_t* _data, size_t size, size_t capacity)
372
63.9k
            : _slice(_data, size), _capacity(capacity) {}
373
374
    Slice _slice;
375
    size_t _capacity = 0;
376
};
377
378
} // namespace doris