/root/doris/be/src/util/slice.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <assert.h> |
21 | | #include <stddef.h> |
22 | | #include <stdint.h> |
23 | | #include <string.h> |
24 | | |
25 | | #include <iostream> |
26 | | #include <map> |
27 | | #include <string> |
28 | | #include <utility> |
29 | | #include <vector> |
30 | | |
31 | | #include "vec/common/allocator.h" |
32 | | #include "vec/common/allocator_fwd.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | class faststring; |
37 | | |
38 | | /// @brief A wrapper around externally allocated data. |
39 | | /// |
40 | | /// Slice is a simple structure containing a pointer into some external |
41 | | /// storage and a size. The user of a Slice must ensure that the slice |
42 | | /// is not used after the corresponding external storage has been |
43 | | /// deallocated. |
44 | | /// |
45 | | /// Multiple threads can invoke const methods on a Slice without |
46 | | /// external synchronization, but if any of the threads may call a |
47 | | /// non-const method, all threads accessing the same Slice must use |
48 | | /// external synchronization. |
49 | | struct Slice { |
50 | | public: |
51 | | char* data = nullptr; |
52 | | size_t size; |
53 | | // Intentionally copyable |
54 | | |
55 | | /// Create an empty slice. |
56 | 1.70M | Slice() : data(const_cast<char*>("")), size(0) {} |
57 | | |
58 | | /// Create a slice that refers to a @c char byte array. |
59 | 23.0M | Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {} |
60 | | |
61 | | // Create a slice that refers to a @c uint8_t byte array. |
62 | | // |
63 | | // @param [in] d |
64 | | // The input array. |
65 | | // @param [in] n |
66 | | // Number of bytes in the array. |
67 | | Slice(const uint8_t* s, size_t n) |
68 | 724k | : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {} |
69 | | |
70 | | /// Create a slice that refers to the contents of the given string. |
71 | | Slice(const std::string& s) |
72 | | : // NOLINT(runtime/explicit) |
73 | 1.02M | data(const_cast<char*>(s.data())), |
74 | 1.02M | size(s.size()) {} |
75 | | |
76 | | Slice(const faststring& s); |
77 | | |
78 | | /// Create a slice that refers to a C-string s[0,strlen(s)-1]. |
79 | | Slice(const char* s) |
80 | | : // NOLINT(runtime/explicit) |
81 | 2.28k | data(const_cast<char*>(s)), |
82 | 2.28k | size(strlen(s)) {} |
83 | | |
84 | | /// default copy/move constructor and assignment |
85 | | Slice(const Slice&) = default; |
86 | | Slice& operator=(const Slice&) = default; |
87 | | Slice(Slice&&) noexcept = default; |
88 | | Slice& operator=(Slice&&) noexcept = default; |
89 | | |
90 | | /// @return A pointer to the beginning of the referenced data. |
91 | 20.6M | const char* get_data() const { return data; } |
92 | | |
93 | | /// @return A mutable pointer to the beginning of the referenced data. |
94 | 8.51k | char* mutable_data() { return const_cast<char*>(data); } |
95 | | |
96 | | /// @return The length (in bytes) of the referenced data. |
97 | 60.8M | size_t get_size() const { return size; } |
98 | | |
99 | | /// @return @c true iff the length of the referenced data is zero. |
100 | 1.58M | bool empty() const { return size == 0; } |
101 | | |
102 | | /// @return the n-th byte in the referenced data. |
103 | 435M | const char& operator[](size_t n) const { |
104 | 435M | assert(n < size); |
105 | 435M | return data[n]; |
106 | 435M | } |
107 | | |
108 | | /// Change this slice to refer to an empty array. |
109 | 0 | void clear() { |
110 | 0 | data = const_cast<char*>(""); |
111 | 0 | size = 0; |
112 | 0 | } |
113 | | |
114 | | /// Drop the first "n" bytes from this slice. |
115 | | /// |
116 | | /// @pre n <= size |
117 | | /// |
118 | | /// @note Only the base and bounds of the slice are changed; |
119 | | /// the data is not modified. |
120 | | /// |
121 | | /// @param [in] n |
122 | | /// Number of bytes that should be dropped from the beginning. |
123 | 211k | void remove_prefix(size_t n) { |
124 | 211k | assert(n <= size); |
125 | 211k | data += n; |
126 | 211k | size -= n; |
127 | 211k | } |
128 | | |
129 | | /// Drop the last "n" bytes from this slice. |
130 | | /// |
131 | | /// @pre n <= size |
132 | | /// |
133 | | /// @note Only the base and bounds of the slice are changed; |
134 | | /// the data is not modified. |
135 | | /// |
136 | | /// @param [in] n |
137 | | /// Number of bytes that should be dropped from the last. |
138 | 1.12M | void remove_suffix(size_t n) { |
139 | 1.12M | assert(n <= size); |
140 | 1.12M | size -= n; |
141 | 1.12M | } |
142 | | |
143 | | /// Remove leading spaces. |
144 | | /// |
145 | | /// @pre n <= size |
146 | | /// |
147 | | /// @note Only the base and bounds of the slice are changed; |
148 | | /// the data is not modified. |
149 | | /// |
150 | | /// @param [in] n |
151 | | /// Number of bytes of space that should be dropped from the beginning. |
152 | 1.16M | void trim_prefix() { |
153 | 1.16M | int32_t begin = 0; |
154 | 2.14M | while (begin < size && data[begin] == ' ') { |
155 | 980k | data += 1; |
156 | 980k | size -= 1; |
157 | 980k | } |
158 | 1.16M | } |
159 | | |
160 | | /// Remove quote char '"' or ''' which should exist as first and last char. |
161 | | /// |
162 | | /// @pre n <= size |
163 | | /// |
164 | | /// @note Only the base and bounds of the slice are changed; |
165 | | /// the data is not modified. |
166 | | /// |
167 | | /// @param [in] n |
168 | | /// Number of bytes of space that should be dropped from the beginning. |
169 | 362k | bool trim_quote() { |
170 | 362k | int32_t begin = 0; |
171 | 362k | bool change = false; |
172 | 362k | if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') || |
173 | 358k | (data[begin] == '\'' && data[size - 1] == '\''))) { |
174 | 348k | data += 1; |
175 | 348k | size -= 2; |
176 | 348k | change = true; |
177 | 348k | } |
178 | 362k | return change; |
179 | 362k | } |
180 | | |
181 | | /// Remove quote char '"' which should exist as first and last char. |
182 | | /// |
183 | | /// @pre n <= size |
184 | | /// |
185 | | /// @note Only the base and bounds of the slice are changed; |
186 | | /// the data is not modified. |
187 | | /// |
188 | | /// @param [in] n |
189 | | /// Number of bytes of space that should be dropped from the beginning. |
190 | 0 | bool trim_double_quotes() { |
191 | 0 | int32_t begin = 0; |
192 | 0 | if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) { |
193 | 0 | data += 1; |
194 | 0 | size -= 2; |
195 | 0 | return true; |
196 | 0 | } |
197 | 0 | return false; |
198 | 0 | } |
199 | | |
200 | | /// Truncate the slice to the given number of bytes. |
201 | | /// |
202 | | /// @pre n <= size |
203 | | /// |
204 | | /// @note Only the base and bounds of the slice are changed; |
205 | | /// the data is not modified. |
206 | | /// |
207 | | /// @param [in] n |
208 | | /// The new size of the slice. |
209 | | void truncate(size_t n) { |
210 | | assert(n <= size); |
211 | | size = n; |
212 | | } |
213 | | |
214 | | /// @return A string that contains a copy of the referenced data. |
215 | 77.6k | std::string to_string() const { return std::string(data, size); } |
216 | | |
217 | | /// Do a three-way comparison of the slice's data. |
218 | | int compare(const Slice& b) const; |
219 | | |
220 | | /// Check whether the slice starts with the given prefix. |
221 | 0 | bool starts_with(const Slice& x) const { |
222 | 0 | return ((size >= x.size) && (mem_equal(data, x.data, x.size))); |
223 | 0 | } |
224 | | |
225 | 31 | bool ends_with(const Slice& x) const { |
226 | 31 | return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size)); |
227 | 31 | } |
228 | | |
229 | | /// @brief Comparator struct, useful for ordered collections (like STL maps). |
230 | | struct Comparator { |
231 | | /// Compare two slices using Slice::compare() |
232 | | /// |
233 | | /// @param [in] a |
234 | | /// The slice to call Slice::compare() at. |
235 | | /// @param [in] b |
236 | | /// The slice to use as a parameter for Slice::compare(). |
237 | | /// @return @c true iff @c a is less than @c b by Slice::compare(). |
238 | 0 | bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; } |
239 | | }; |
240 | | |
241 | | /// Relocate/copy the slice's data into a new location. |
242 | | /// |
243 | | /// @param [in] d |
244 | | /// The new location for the data. If it's the same location, then no |
245 | | /// relocation is done. It is assumed that the new location is |
246 | | /// large enough to fit the data. |
247 | 50.0k | void relocate(char* d) { |
248 | 50.0k | if (data != d) { |
249 | 50.0k | memcpy(d, data, size); |
250 | 50.0k | data = d; |
251 | 50.0k | } |
252 | 50.0k | } |
253 | | |
254 | | friend bool operator==(const Slice& x, const Slice& y); |
255 | | |
256 | | friend std::ostream& operator<<(std::ostream& os, const Slice& slice); |
257 | | |
258 | 227k | static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; } |
259 | | |
260 | 43.3M | static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); } |
261 | | |
262 | 86.7k | static size_t compute_total_size(const std::vector<Slice>& slices) { |
263 | 86.7k | size_t total_size = 0; |
264 | 87.7k | for (auto& slice : slices) { |
265 | 87.7k | total_size += slice.size; |
266 | 87.7k | } |
267 | 86.7k | return total_size; |
268 | 86.7k | } |
269 | | |
270 | 0 | static std::string to_string(const std::vector<Slice>& slices) { |
271 | 0 | std::string buf; |
272 | 0 | for (auto& slice : slices) { |
273 | 0 | buf.append(slice.data, slice.size); |
274 | 0 | } |
275 | 0 | return buf; |
276 | 0 | } |
277 | | |
278 | | // X is (maybe) a truncated prefix of string X' |
279 | | // Y is (maybe) a truncated prefix of string Y' |
280 | | // return true only if we can determine that X' is strictly less than Y' |
281 | | // based on these maybe truncated prefixes |
282 | | static bool lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated, Slice Y, |
283 | | bool Y_is_truncated); |
284 | | }; |
285 | | |
286 | 0 | inline std::ostream& operator<<(std::ostream& os, const Slice& slice) { |
287 | 0 | os << slice.to_string(); |
288 | 0 | return os; |
289 | 0 | } |
290 | | |
291 | | /// Check whether two slices are identical. |
292 | 230k | inline bool operator==(const Slice& x, const Slice& y) { |
293 | 230k | return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size))); |
294 | 230k | } |
295 | | |
296 | | /// Check whether two slices are not identical. |
297 | 0 | inline bool operator!=(const Slice& x, const Slice& y) { |
298 | 0 | return !(x == y); |
299 | 0 | } |
300 | | |
301 | 43.3M | inline int Slice::compare(const Slice& b) const { |
302 | 43.3M | const int min_len = (size < b.size) ? size : b.size; |
303 | 43.3M | int r = mem_compare(data, b.data, min_len); |
304 | 43.3M | if (r == 0) { |
305 | 4.99M | if (size < b.size) |
306 | 269k | r = -1; |
307 | 4.72M | else if (size > b.size) |
308 | 2.13M | r = +1; |
309 | 4.99M | } |
310 | 43.3M | return r; |
311 | 43.3M | } |
312 | | |
313 | | // A move-only type which manage the lifecycle of externally allocated data. |
314 | | // Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access |
315 | | // the underlying buffer as a Slice. |
316 | | // |
317 | | // Usage example: |
318 | | // OwnedSlice read_page(PagePointer pp); |
319 | | // { |
320 | | // OwnedSlice page_data(new uint8_t[pp.size], pp.size); |
321 | | // Status s = _file.read_at(pp.offset, owned.slice()); |
322 | | // if (!s.ok()) { |
323 | | // return s; // `page_data` destructs, deallocate underlying buffer |
324 | | // } |
325 | | // return page_data; // transfer ownership of buffer into the caller |
326 | | // } |
327 | | // |
328 | | // only receive the memory allocated by Allocator and disables mmap, |
329 | | // otherwise the memory may not be freed correctly, currently only be constructed by faststring. |
330 | | class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> { |
331 | | public: |
332 | 150k | OwnedSlice() : _slice((uint8_t*)nullptr, 0) {} |
333 | | |
334 | | OwnedSlice(size_t length) |
335 | 136 | : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length), |
336 | 136 | _capacity(length) {} |
337 | | |
338 | 48.0k | OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) { |
339 | 48.0k | src._slice.data = nullptr; |
340 | 48.0k | src._slice.size = 0; |
341 | 48.0k | src._capacity = 0; |
342 | 48.0k | } |
343 | | |
344 | 96.6k | OwnedSlice& operator=(OwnedSlice&& src) { |
345 | 96.6k | if (this != &src) { |
346 | 96.6k | std::swap(_slice, src._slice); |
347 | 96.6k | std::swap(_capacity, src._capacity); |
348 | 96.6k | } |
349 | 96.6k | return *this; |
350 | 96.6k | } |
351 | | |
352 | | // disable copy constructor and copy assignment |
353 | | OwnedSlice(const OwnedSlice&) = delete; |
354 | | void operator=(const OwnedSlice&) = delete; |
355 | | |
356 | 262k | ~OwnedSlice() { |
357 | 262k | if (_slice.data != nullptr) { |
358 | 64.0k | DCHECK(_capacity != 0); |
359 | 64.0k | Allocator::free(_slice.data, _capacity); |
360 | 64.0k | } |
361 | 262k | } |
362 | | |
363 | 650 | char* data() const { return _slice.data; } |
364 | | |
365 | 221k | const Slice& slice() const { return _slice; } |
366 | | |
367 | | private: |
368 | | // faststring also inherits Allocator and disables mmap. |
369 | | friend class faststring; |
370 | | |
371 | | OwnedSlice(uint8_t* _data, size_t size, size_t capacity) |
372 | 63.9k | : _slice(_data, size), _capacity(capacity) {} |
373 | | |
374 | | Slice _slice; |
375 | | size_t _capacity = 0; |
376 | | }; |
377 | | |
378 | | } // namespace doris |